Index: ps/trunk/binaries/data/mods/public/gui/credits/texts/programming.json =================================================================== --- ps/trunk/binaries/data/mods/public/gui/credits/texts/programming.json +++ ps/trunk/binaries/data/mods/public/gui/credits/texts/programming.json @@ -242,6 +242,7 @@ {"nick": "usey11"}, {"nick": "vincent_c", "name": "Vincent Cheng"}, {"nick": "vladislavbelov", "name": "Vladislav Belov"}, + {"nick": "voroskoi"}, {"nick": "vts", "name": "Jeroen DR"}, {"nick": "wacko", "name": "Andrew Spiering"}, {"nick": "WhiteTreePaladin", "name": "Brian Ashley"}, Index: ps/trunk/build/premake/extern_libs5.lua =================================================================== --- ps/trunk/build/premake/extern_libs5.lua +++ ps/trunk/build/premake/extern_libs5.lua @@ -463,7 +463,7 @@ add_default_links({ win_names = { "nvtt" }, unix_names = { "nvcore", "nvmath", "nvimage", "nvtt" }, - osx_names = { "nvcore", "nvmath", "nvimage", "nvtt", "squish" }, + osx_names = { "bc6h", "bc7", "nvcore", "nvimage", "nvmath", "nvthread", "nvtt", "squish" }, dbg_suffix = "", -- for performance we always use the release-mode version }) end, Index: ps/trunk/libraries/osx/build-osx-libs.sh =================================================================== --- ps/trunk/libraries/osx/build-osx-libs.sh +++ ps/trunk/libraries/osx/build-osx-libs.sh @@ -1002,7 +1002,7 @@ # Could use CMAKE_OSX_DEPLOYMENT_TARGET and CMAKE_OSX_SYSROOT # but they're not as flexible for cross-compiling - # Disable optional libs that we don't need (avoids some conflicts with MacPorts) + # Disable png support (avoids some conflicts with MacPorts) (cmake .. \ -DCMAKE_LINK_FLAGS="$LDFLAGS" \ -DCMAKE_C_FLAGS="$CFLAGS" \ @@ -1010,19 +1010,13 @@ -DCMAKE_BUILD_TYPE=Release \ -DBINDIR=bin \ -DLIBDIR=lib \ - -DGLUT=0 \ - -DGLEW=0 \ - -DCG=0 \ - -DCUDA=0 \ - -DOPENEXR=0 \ - -DJPEG=0 \ -DPNG=0 \ - -DTIFF=0 \ -G "Unix Makefiles" \ && make clean && make nvtt ${JOBS}) || die "NVTT build failed" popd mkdir -p ../lib + cp build/src/bc*/libbc*.a ../lib/ cp build/src/nv*/libnv*.a ../lib/ cp build/src/nvtt/squish/libsquish.a ../lib/ popd Index: ps/trunk/libraries/source/nvtt/README.txt =================================================================== --- ps/trunk/libraries/source/nvtt/README.txt +++ ps/trunk/libraries/source/nvtt/README.txt @@ -1,21 +1,9 @@ -This is NVTT 2.0.8-1 from http://code.google.com/p/nvidia-texture-tools/ +This is NVTT 2.1.1 from https://github.com/castano/nvidia-texture-tools plus some patches (see patches/): - r1156.patch (from NVTT SVN r1156 - fixes build with libtiff 4.0) - r1157.patch (from NVTT SVN r1157 - fixes build with CUDA 3.0) - r1172.patch (from NVTT SVN r1172 - fixes memory allocator interaction with Valgrind) - r907.patch and r1025.patch (from NVTT SVN - fixes build on FreeBSD) - rpath.patch (fixes .so file search paths for bundled copy) - issue139.patch (fixes http://code.google.com/p/nvidia-texture-tools/issues/detail?id=139) - issue176.patch (partially from http:/code.google.com/p/nvidia-texture-tools/issues/detail?id=176 - fixes build on OpenBSD) - png-api.patch (partially from NVTT SVN r1248 - fixes build with libpng 1.5) + cmake.patch - disables some dependencies cmake-freebsd.patch (fixes build on FreeBSD) - gcc47-unistd.patch (fixes build on GCC 4.7) - cmake-devflags.patch (from https://407191.bugs.gentoo.org/attachment.cgi?id=308589 - allows disabling various dependencies) - cmake-devflags2.patch - allows disabling more dependencies - issue182.patch (fixes http://code.google.com/p/nvidia-texture-tools/issues/detail?id=182) - cmake-noqt4.patch (removes unused dependency on Qt4, fixes build on systems without Qt) - arm-fix.patch (from NVTT SVN r1173 - fixes ARM build) issue188.patch (fixes http://code.google.com/p/nvidia-texture-tools/issues/detail?id=188) - clang-cpp11-error.patch (fixes build error on OS X Yosemite with clang, libc++ and c++11) - arm64-fix.patch (backported in http://trac.wildfiregames.com/ticket/3344 from upstream https://github.com/castano/nvidia-texture-tools/commit/58617584d4d2541ff9fcfe23a9a492af86b11efb - fixes ARM64 build) - gcc6-fix.path (fixes a compilation issue where GCC 6 doesn't want to cast a boolean to a pointer anymore) + issue261.patch (fixes https://github.com/castano/nvidia-texture-tools/issues/261) + rpath.patch (fixes .so file search paths for bundled copy) + win-shared-build.patch (adapted from https://github.com/castano/nvidia-texture-tools/pull/285) + musl-build.patch (fixes build on musl linux; contributed by voroskoi, with a part by leper, see https://code.wildfiregames.com/D2491) Index: ps/trunk/libraries/source/nvtt/build.sh =================================================================== --- ps/trunk/libraries/source/nvtt/build.sh +++ ps/trunk/libraries/source/nvtt/build.sh @@ -11,7 +11,7 @@ mkdir -p src/build/ cd src/build/ -cmake .. -DNVTT_SHARED=1 -DCMAKE_BUILD_TYPE=Release -DBINDIR=bin -DLIBDIR=lib -DGLUT=0 -DGLEW=0 -DCG=0 -DCUDA=0 -DOPENEXR=0 -G "Unix Makefiles" +cmake .. -DNVTT_SHARED=1 -DCMAKE_BUILD_TYPE=Release -DBINDIR=bin -DLIBDIR=lib -G "Unix Makefiles" ${MAKE} nvtt ${JOBS} Index: ps/trunk/libraries/source/nvtt/include/nvtt/nvtt.h =================================================================== --- ps/trunk/libraries/source/nvtt/include/nvtt/nvtt.h +++ ps/trunk/libraries/source/nvtt/include/nvtt/nvtt.h @@ -1,308 +1,676 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_H -#define NV_TT_H - -// Function linkage -#if NVTT_SHARED - -#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__ || defined __MINGW32__ -# ifdef NVTT_EXPORTS -# define NVTT_API __declspec(dllexport) -# else -# define NVTT_API __declspec(dllimport) -# endif -#endif - -#if defined __GNUC__ >= 4 -# ifdef NVTT_EXPORTS -# define NVTT_API __attribute__((visibility("default"))) -# endif -#endif - -#endif // NVTT_SHARED - -#if !defined NVTT_API -# define NVTT_API -#endif - -#define NVTT_VERSION 200 - -#define NVTT_DECLARE_PIMPL(Class) \ - private: \ - Class(const Class &); \ - void operator=(const Class &); \ - public: \ - struct Private; \ - Private & m - - -// Public interface. -namespace nvtt -{ - /// Supported compression formats. - enum Format - { - // No compression. - Format_RGB, - Format_RGBA = Format_RGB, - - // DX9 formats. - Format_DXT1, - Format_DXT1a, // DXT1 with binary alpha. - Format_DXT3, - Format_DXT5, - Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x - - // DX10 formats. - Format_BC1 = Format_DXT1, - Format_BC1a = Format_DXT1a, - Format_BC2 = Format_DXT3, - Format_BC3 = Format_DXT5, - Format_BC3n = Format_DXT5n, - Format_BC4, // ATI1 - Format_BC5, // 3DC, ATI2 - }; - - /// Quality modes. - enum Quality - { - Quality_Fastest, - Quality_Normal, - Quality_Production, - Quality_Highest, - }; - - /// Compression options. This class describes the desired compression format and other compression settings. - struct CompressionOptions - { - NVTT_DECLARE_PIMPL(CompressionOptions); - - NVTT_API CompressionOptions(); - NVTT_API ~CompressionOptions(); - - NVTT_API void reset(); - - NVTT_API void setFormat(Format format); - NVTT_API void setQuality(Quality quality); - NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f); - - NVTT_API void setExternalCompressor(const char * name); - - // Set color mask to describe the RGB/RGBA format. - NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask); - - NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127); - }; - - - /// Wrap modes. - enum WrapMode - { - WrapMode_Clamp, - WrapMode_Repeat, - WrapMode_Mirror, - }; - - /// Texture types. - enum TextureType - { - TextureType_2D, - TextureType_Cube, - // TextureType_3D, - }; - - /// Input formats. - enum InputFormat - { - InputFormat_BGRA_8UB, - // InputFormat_RGBE_8UB, - // InputFormat_BGRA_32F, - }; - - /// Mipmap downsampling filters. - enum MipmapFilter - { - MipmapFilter_Box, ///< Box filter is quite good and very fast. - MipmapFilter_Triangle, ///< Triangle filter blurs the results too much, but that might be what you want. - MipmapFilter_Kaiser, ///< Kaiser-windowed Sinc filter is the best downsampling filter. - }; - - /// Color transformation. - enum ColorTransform - { - ColorTransform_None, - ColorTransform_Linear, - }; - - /// Extents rounding mode. - enum RoundMode - { - RoundMode_None, - RoundMode_ToNextPowerOfTwo, - RoundMode_ToNearestPowerOfTwo, - RoundMode_ToPreviousPowerOfTwo, - }; - - /// Alpha mode. - enum AlphaMode - { - AlphaMode_None, - AlphaMode_Transparency, - AlphaMode_Premultiplied, - }; - - /// Input options. Specify format and layout of the input texture. - struct InputOptions - { - NVTT_DECLARE_PIMPL(InputOptions); - - NVTT_API InputOptions(); - NVTT_API ~InputOptions(); - - // Set default options. - NVTT_API void reset(); - - // Setup input layout. - NVTT_API void setTextureLayout(TextureType type, int w, int h, int d = 1); - NVTT_API void resetTextureLayout(); - - // Set mipmap data. Copies the data. - NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0); - - // Describe the format of the input. - NVTT_API void setFormat(InputFormat format); - - // Set the way the input alpha channel is interpreted. - NVTT_API void setAlphaMode(AlphaMode alphaMode); - - // Set gamma settings. - NVTT_API void setGamma(float inputGamma, float outputGamma); - - // Set texture wrappign mode. - NVTT_API void setWrapMode(WrapMode mode); - - // Set mipmapping options. - NVTT_API void setMipmapFilter(MipmapFilter filter); - NVTT_API void setMipmapGeneration(bool enabled, int maxLevel = -1); - NVTT_API void setKaiserParameters(float width, float alpha, float stretch); - - // Set normal map options. - NVTT_API void setNormalMap(bool b); - NVTT_API void setConvertToNormalMap(bool convert); - NVTT_API void setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale); - NVTT_API void setNormalFilter(float sm, float medium, float big, float large); - NVTT_API void setNormalizeMipmaps(bool b); - - // Set color transforms. @@ Not implemented! - NVTT_API void setColorTransform(ColorTransform t); - NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3); - - // Set resizing options. - NVTT_API void setMaxExtents(int d); - NVTT_API void setRoundMode(RoundMode mode); - }; - - - /// Output handler. - struct OutputHandler - { - virtual ~OutputHandler() {} - - /// Indicate the start of a new compressed image that's part of the final texture. - virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) = 0; - - /// Output data. Compressed data is output as soon as it's generated to minimize memory allocations. - virtual bool writeData(const void * data, int size) = 0; - }; - - /// Error codes. - enum Error - { - Error_Unknown, - Error_InvalidInput, - Error_UnsupportedFeature, - Error_CudaError, - Error_FileOpen, - Error_FileWrite, - }; - - /// Error handler. - struct ErrorHandler - { - virtual ~ErrorHandler() {} - - // Signal error. - virtual void error(Error e) = 0; - }; - - - /// Output Options. This class holds pointers to the interfaces that are used to report the output of - /// the compressor to the user. - struct OutputOptions - { - NVTT_DECLARE_PIMPL(OutputOptions); - - NVTT_API OutputOptions(); - NVTT_API ~OutputOptions(); - - // Set default options. - NVTT_API void reset(); - - NVTT_API void setFileName(const char * fileName); - - NVTT_API void setOutputHandler(OutputHandler * outputHandler); - NVTT_API void setErrorHandler(ErrorHandler * errorHandler); - NVTT_API void setOutputHeader(bool outputHeader); - }; - - - /// Texture compressor. - struct Compressor - { - NVTT_DECLARE_PIMPL(Compressor); - - NVTT_API Compressor(); - NVTT_API ~Compressor(); - - NVTT_API void enableCudaAcceleration(bool enable); - NVTT_API bool isCudaAccelerationEnabled() const; - - // Main entrypoint of the compression library. - NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; - - // Estimate the size of compressing the input with the given options. - NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const; - }; - - - // Return string for the given error code. - NVTT_API const char * errorString(Error e); - - // Return NVTT version. - NVTT_API unsigned int version(); - -} // nvtt namespace - -#endif // NV_TT_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#pragma once +#ifndef NVTT_H +#define NVTT_H + +// Function linkage +#if NVTT_SHARED + +#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__ || defined __MINGW32__ +# ifdef NVTT_EXPORTS +# define NVTT_API __declspec(dllexport) +# else +# define NVTT_API __declspec(dllimport) +# endif +#endif + +#if defined __GNUC__ >= 4 +# ifdef NVTT_EXPORTS +# define NVTT_API __attribute__((visibility("default"))) +# endif +#endif + +#endif // NVTT_SHARED + +#if !defined NVTT_API +# define NVTT_API +#endif + +#define NVTT_VERSION 20100 + +#define NVTT_FORBID_COPY(Class) \ + private: \ + Class(const Class &); \ + void operator=(const Class &); \ + public: + +#define NVTT_DECLARE_PIMPL(Class) \ + public: \ + struct Private; \ + Private & m + + +// Public interface. +namespace nvtt +{ + // Forward declarations. + struct Surface; + struct CubeSurface; + + + // Supported block-compression formats. + // @@ I wish I had distinguished between "formats" and compressors. + // That is: + // - 'DXT1' is a format 'DXT1a' and 'DXT1n' are DXT1 compressors. + // - 'DXT3' is a format 'DXT3n' is a DXT3 compressor. + // Having multiple enums for the same ids only creates confusion. Clean this up. + enum Format + { + // No block-compression (linear). + Format_RGB, + Format_RGBA = Format_RGB, + + // DX9 formats. + Format_DXT1, + Format_DXT1a, // DXT1 with binary alpha. + Format_DXT3, + Format_DXT5, + Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x + + // DX10 formats. + Format_BC1 = Format_DXT1, + Format_BC1a = Format_DXT1a, + Format_BC2 = Format_DXT3, + Format_BC3 = Format_DXT5, + Format_BC3n = Format_DXT5n, + Format_BC4, // ATI1 + Format_BC5, // 3DC, ATI2 + + Format_DXT1n, // Not supported. + Format_CTX1, // Not supported. + + Format_BC6, + Format_BC7, + + Format_BC3_RGBM, // + + Format_Count + }; + + // Pixel types. These basically indicate how the output should be interpreted, but do not have any influence over the input. They are only relevant in RGBA mode. + enum PixelType + { + PixelType_UnsignedNorm = 0, + PixelType_SignedNorm = 1, // Not supported yet. + PixelType_UnsignedInt = 2, // Not supported yet. + PixelType_SignedInt = 3, // Not supported yet. + PixelType_Float = 4, + PixelType_UnsignedFloat = 5, + PixelType_SharedExp = 6, // Shared exponent. + }; + + // Quality modes. + enum Quality + { + Quality_Fastest, + Quality_Normal, + Quality_Production, + Quality_Highest, + }; + + // DXT decoder. + enum Decoder + { + Decoder_D3D10, + Decoder_D3D9, + Decoder_NV5x, + //Decoder_RSX, // To take advantage of DXT5 bug. + }; + + + // Compression options. This class describes the desired compression format and other compression settings. + struct CompressionOptions + { + NVTT_FORBID_COPY(CompressionOptions); + NVTT_DECLARE_PIMPL(CompressionOptions); + + NVTT_API CompressionOptions(); + NVTT_API ~CompressionOptions(); + + NVTT_API void reset(); + + NVTT_API void setFormat(Format format); + NVTT_API void setQuality(Quality quality); + NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f); + + NVTT_API void setExternalCompressor(const char * name); + + // Set color mask to describe the RGB/RGBA format. + NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask); + NVTT_API void setPixelFormat(unsigned char rsize, unsigned char gsize, unsigned char bsize, unsigned char asize); + + NVTT_API void setPixelType(PixelType pixelType); + + NVTT_API void setPitchAlignment(int pitchAlignment); + + // @@ I wish this wasn't part of the compression options. Quantization is applied before compression. We don't have compressors with error diffusion. + // @@ These options are only taken into account when using the InputOptions API. + NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127); + + NVTT_API void setTargetDecoder(Decoder decoder); + + // Translate to and from D3D formats. + NVTT_API unsigned int d3d9Format() const; + //NVTT_API bool setD3D9Format(unsigned int format); + //NVTT_API unsigned int dxgiFormat() const; + //NVTT_API bool setDxgiFormat(unsigned int format); + }; + + /* + // DXGI_FORMAT_R16G16_FLOAT + compressionOptions.setPixelType(PixelType_Float); + compressionOptions.setPixelFormat2(16, 16, 0, 0); + + // DXGI_FORMAT_R32G32B32A32_FLOAT + compressionOptions.setPixelType(PixelType_Float); + compressionOptions.setPixelFormat2(32, 32, 32, 32); + */ + + + // Wrap modes. + enum WrapMode + { + WrapMode_Clamp, + WrapMode_Repeat, + WrapMode_Mirror, + }; + + // Texture types. + enum TextureType + { + TextureType_2D, + TextureType_Cube, + TextureType_3D, + TextureType_Array, + }; + + // Input formats. + enum InputFormat + { + InputFormat_BGRA_8UB, // Normalized [0, 1] 8 bit fixed point. + InputFormat_RGBA_16F, // 16 bit floating point. + InputFormat_RGBA_32F, // 32 bit floating point. + InputFormat_R_32F, // Single channel 32 bit floating point. + }; + + // Mipmap downsampling filters. + enum MipmapFilter + { + MipmapFilter_Box, // Box filter is quite good and very fast. + MipmapFilter_Triangle, // Triangle filter blurs the results too much, but that might be what you want. + MipmapFilter_Kaiser, // Kaiser-windowed Sinc filter is the best downsampling filter. + }; + + // Texture resize filters. + enum ResizeFilter + { + ResizeFilter_Box, + ResizeFilter_Triangle, + ResizeFilter_Kaiser, + ResizeFilter_Mitchell, + }; + + // Extents rounding mode. + enum RoundMode + { + RoundMode_None, + RoundMode_ToNextPowerOfTwo, + RoundMode_ToNearestPowerOfTwo, + RoundMode_ToPreviousPowerOfTwo, + RoundMode_ToNextMultipleOfFour, // (New in NVTT 2.1) + RoundMode_ToNearestMultipleOfFour, // (New in NVTT 2.1) + RoundMode_ToPreviousMultipleOfFour, // (New in NVTT 2.1) + }; + + // Alpha mode. + enum AlphaMode + { + AlphaMode_None, + AlphaMode_Transparency, + AlphaMode_Premultiplied, + }; + + // Input options. Specify format and layout of the input texture. (Deprecated in NVTT 2.1) + struct InputOptions + { + NVTT_FORBID_COPY(InputOptions); + NVTT_DECLARE_PIMPL(InputOptions); + + NVTT_API InputOptions(); + NVTT_API ~InputOptions(); + + // Set default options. + NVTT_API void reset(); + + // Setup input layout. + NVTT_API void setTextureLayout(TextureType type, int w, int h, int d = 1, int arraySize = 1); + NVTT_API void resetTextureLayout(); + + // Set mipmap data. Copies the data. + NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0); + + // Describe the format of the input. + NVTT_API void setFormat(InputFormat format); + + // Set the way the input alpha channel is interpreted. @@ Not implemented! + NVTT_API void setAlphaMode(AlphaMode alphaMode); + + // Set gamma settings. + NVTT_API void setGamma(float inputGamma, float outputGamma); + + // Set texture wrapping mode. + NVTT_API void setWrapMode(WrapMode mode); + + // Set mipmapping options. + NVTT_API void setMipmapFilter(MipmapFilter filter); + NVTT_API void setMipmapGeneration(bool enabled, int maxLevel = -1); + NVTT_API void setKaiserParameters(float width, float alpha, float stretch); + + // Set normal map options. + NVTT_API void setNormalMap(bool b); + NVTT_API void setConvertToNormalMap(bool convert); + NVTT_API void setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale); + NVTT_API void setNormalFilter(float sm, float medium, float big, float large); + NVTT_API void setNormalizeMipmaps(bool b); + + // Set resizing options. + NVTT_API void setMaxExtents(int d); + NVTT_API void setRoundMode(RoundMode mode); + }; + + + // Output handler. + struct OutputHandler + { + virtual ~OutputHandler() {} + + // Indicate the start of a new compressed image that's part of the final texture. + virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) = 0; + + // Output data. Compressed data is output as soon as it's generated to minimize memory allocations. + virtual bool writeData(const void * data, int size) = 0; + + // Indicate the end of the compressed image. (New in NVTT 2.1) + virtual void endImage() = 0; + }; + + // Error codes. + enum Error + { + Error_Unknown, + Error_InvalidInput, + Error_UnsupportedFeature, + Error_CudaError, + Error_FileOpen, + Error_FileWrite, + Error_UnsupportedOutputFormat, + Error_Count + }; + + // Error handler. + struct ErrorHandler + { + virtual ~ErrorHandler() {} + + // Signal error. + virtual void error(Error e) = 0; + }; + + // Container. + enum Container + { + Container_DDS, + Container_DDS10, + // Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/ + // Container_VTF, // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format + }; + + + // Output Options. This class holds pointers to the interfaces that are used to report the output of + // the compressor to the user. + struct OutputOptions + { + NVTT_FORBID_COPY(OutputOptions); + NVTT_DECLARE_PIMPL(OutputOptions); + + NVTT_API OutputOptions(); + NVTT_API ~OutputOptions(); + + // Set default options. + NVTT_API void reset(); + + NVTT_API void setFileName(const char * fileName); + NVTT_API void setFileHandle(void * fp); + + NVTT_API void setOutputHandler(OutputHandler * outputHandler); + NVTT_API void setErrorHandler(ErrorHandler * errorHandler); + + NVTT_API void setOutputHeader(bool outputHeader); + NVTT_API void setContainer(Container container); + NVTT_API void setUserVersion(int version); + NVTT_API void setSrgbFlag(bool b); + }; + + // (New in NVTT 2.1) + typedef void Task(void * context, int id); + + // (New in NVTT 2.1) + struct TaskDispatcher + { + virtual ~TaskDispatcher() {} + + virtual void dispatch(Task * task, void * context, int count) = 0; + }; + + // Context. + struct Compressor + { + NVTT_FORBID_COPY(Compressor); + NVTT_DECLARE_PIMPL(Compressor); + + NVTT_API Compressor(); + NVTT_API ~Compressor(); + + // Context settings. + NVTT_API void enableCudaAcceleration(bool enable); + NVTT_API bool isCudaAccelerationEnabled() const; + NVTT_API void setTaskDispatcher(TaskDispatcher * disp); // (New in NVTT 2.1) + + // InputOptions API. + NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const; + + // Surface API. (New in NVTT 2.1) + NVTT_API bool outputHeader(const Surface & img, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API bool compress(const Surface & img, int face, int mipmap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API int estimateSize(const Surface & img, int mipmapCount, const CompressionOptions & compressionOptions) const; + + // CubeSurface API. (New in NVTT 2.1) + NVTT_API bool outputHeader(const CubeSurface & cube, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API bool compress(const CubeSurface & cube, int mipmap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API int estimateSize(const CubeSurface & cube, int mipmapCount, const CompressionOptions & compressionOptions) const; + + // Raw API. (New in NVTT 2.1) + NVTT_API bool outputHeader(TextureType type, int w, int h, int d, int arraySize, int mipmapCount, bool isNormalMap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API bool compress(int w, int h, int d, int face, int mipmap, const float * rgba, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API int estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions & compressionOptions) const; + }; + + // "Compressor" is deprecated. This should have been called "Context" + typedef Compressor Context; + + // (New in NVTT 2.1) + enum NormalTransform { + NormalTransform_Orthographic, + NormalTransform_Stereographic, + NormalTransform_Paraboloid, + NormalTransform_Quartic + //NormalTransform_DualParaboloid, + }; + + // (New in NVTT 2.1) + enum ToneMapper { + ToneMapper_Linear, + ToneMapper_Reindhart, + ToneMapper_Halo, + ToneMapper_Lightmap, + }; + + + // A surface is one level of a 2D or 3D texture. (New in NVTT 2.1) + // @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression. + struct Surface + { + NVTT_API Surface(); + NVTT_API Surface(const Surface & img); + NVTT_API ~Surface(); + + NVTT_API void operator=(const Surface & img); + + // Texture parameters. + NVTT_API void setWrapMode(WrapMode mode); + NVTT_API void setAlphaMode(AlphaMode alphaMode); + NVTT_API void setNormalMap(bool isNormalMap); + + // Queries. + NVTT_API bool isNull() const; + NVTT_API int width() const; + NVTT_API int height() const; + NVTT_API int depth() const; + NVTT_API TextureType type() const; + NVTT_API WrapMode wrapMode() const; + NVTT_API AlphaMode alphaMode() const; + NVTT_API bool isNormalMap() const; + NVTT_API int countMipmaps() const; + NVTT_API int countMipmaps(int min_size) const; + NVTT_API float alphaTestCoverage(float alphaRef = 0.5, int alpha_channel = 3) const; + NVTT_API float average(int channel, int alpha_channel = -1, float gamma = 2.2f) const; + NVTT_API const float * data() const; + NVTT_API const float * channel(int i) const; + NVTT_API void histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const; + NVTT_API void range(int channel, float * rangeMin, float * rangeMax, int alpha_channel = -1, float alpha_ref = 0.f) const; + + // Texture data. + NVTT_API bool load(const char * fileName, bool * hasAlpha = 0); + NVTT_API bool save(const char * fileName, bool hasAlpha = 0, bool hdr = 0) const; + NVTT_API bool setImage(int w, int h, int d); + NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * data); + NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * r, const void * g, const void * b, const void * a); + NVTT_API bool setImage2D(Format format, Decoder decoder, int w, int h, const void * data); + + // Resizing methods. + NVTT_API void resize(int w, int h, int d, ResizeFilter filter); + NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0); + NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter); + NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0); + NVTT_API void resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter); + + NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1); + NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1); + NVTT_API bool buildNextMipmapSolidColor(const float * const color_components); + NVTT_API void canvasSize(int w, int h, int d); + // associated to resizing: + NVTT_API bool canMakeNextMipmap(int min_size = 1); + + // Color transforms. + NVTT_API void toLinear(float gamma); + NVTT_API void toGamma(float gamma); + NVTT_API void toLinear(int channel, float gamma); + NVTT_API void toGamma(int channel, float gamma); + NVTT_API void toSrgb(); + NVTT_API void toLinearFromSrgb(); + NVTT_API void toXenonSrgb(); + NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]); + NVTT_API void swizzle(int r, int g, int b, int a); + NVTT_API void scaleBias(int channel, float scale, float bias); + NVTT_API void clamp(int channel, float low = 0.0f, float high = 1.0f); + NVTT_API void blend(float r, float g, float b, float a, float t); + NVTT_API void premultiplyAlpha(); + NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale); + NVTT_API void setBorder(float r, float g, float b, float a); + NVTT_API void fill(float r, float g, float b, float a); + NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f, int alpha_channel = 3); + NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.25f); + NVTT_API void fromRGBM(float range = 1.0f, float threshold = 0.25f); + NVTT_API void toLM(float range = 1.0f, float threshold = 0.0f); + NVTT_API void toRGBE(int mantissaBits, int exponentBits); + NVTT_API void fromRGBE(int mantissaBits, int exponentBits); + NVTT_API void toYCoCg(); + NVTT_API void blockScaleCoCg(int bits = 5, float threshold = 0.0f); + NVTT_API void fromYCoCg(); + NVTT_API void toLUVW(float range = 1.0f); + NVTT_API void fromLUVW(float range = 1.0f); + NVTT_API void abs(int channel); + NVTT_API void convolve(int channel, int kernelSize, float * kernelData); + NVTT_API void toLogScale(int channel, float base); + NVTT_API void fromLogScale(int channel, float base); + NVTT_API void setAtlasBorder(int w, int h, float r, float g, float b, float a); + + NVTT_API void toneMap(ToneMapper tm, float * parameters); + + //NVTT_API void blockLuminanceScale(float scale); + + // Color quantization. + NVTT_API void binarize(int channel, float threshold, bool dither); + NVTT_API void quantize(int channel, int bits, bool exactEndPoints, bool dither); + + // Normal map transforms. + NVTT_API void toNormalMap(float sm, float medium, float big, float large); + NVTT_API void normalizeNormalMap(); + NVTT_API void transformNormals(NormalTransform xform); + NVTT_API void reconstructNormals(NormalTransform xform); + NVTT_API void toCleanNormalMap(); + NVTT_API void packNormals(float scale = 0.5f, float bias = 0.5f); // [-1,1] -> [ 0,1] + NVTT_API void expandNormals(float scale = 2.0f, float bias = -1.0f); // [ 0,1] -> [-1,1] + NVTT_API Surface createToksvigMap(float power) const; + NVTT_API Surface createCleanMap() const; + + // Geometric transforms. + NVTT_API void flipX(); + NVTT_API void flipY(); + NVTT_API void flipZ(); + NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const; + + // Copy image data. + NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel); + NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel, int dstChannel); + + NVTT_API bool addChannel(const Surface & img, int srcChannel, int dstChannel, float scale); + + NVTT_API bool copy(const Surface & src, int xsrc, int ysrc, int zsrc, int xsize, int ysize, int zsize, int xdst, int ydst, int zdst); + + + //private: + void detach(); + + struct Private; + Private * m; + }; + + + // Cube layout formats. (New in NVTT 2.1) + enum CubeLayout { + CubeLayout_VerticalCross, + CubeLayout_HorizontalCross, + CubeLayout_Column, + CubeLayout_Row, + CubeLayout_LatitudeLongitude + }; + + // (New in NVTT 2.1) + enum EdgeFixup { + EdgeFixup_None, + EdgeFixup_Stretch, + EdgeFixup_Warp, + EdgeFixup_Average, + }; + + // A CubeSurface is one level of a cube map texture. (New in NVTT 2.1) + struct CubeSurface + { + NVTT_API CubeSurface(); + NVTT_API CubeSurface(const CubeSurface & img); + NVTT_API ~CubeSurface(); + + NVTT_API void operator=(const CubeSurface & img); + + // Queries. + NVTT_API bool isNull() const; + NVTT_API int edgeLength() const; + NVTT_API int countMipmaps() const; + + // Texture data. + NVTT_API bool load(const char * fileName, int mipmap); + NVTT_API bool save(const char * fileName) const; + + NVTT_API Surface & face(int face); + NVTT_API const Surface & face(int face) const; + + // Layout conversion. @@ Not implemented. + NVTT_API void fold(const Surface & img, CubeLayout layout); + NVTT_API Surface unfold(CubeLayout layout) const; + + // @@ Angular extent filtering. + + // @@ Add resizing methods. + + // @@ Add edge fixup methods. + + NVTT_API float average(int channel) const; + NVTT_API void range(int channel, float * minimum_ptr, float * maximum_ptr) const; + NVTT_API void clamp(int channel, float low = 0.0f, float high = 1.0f); + + + // Filtering. + NVTT_API CubeSurface irradianceFilter(int size, EdgeFixup fixupMethod) const; + NVTT_API CubeSurface cosinePowerFilter(int size, float cosinePower, EdgeFixup fixupMethod) const; + + NVTT_API CubeSurface fastResample(int size, EdgeFixup fixupMethod) const; + + + /* + NVTT_API void resize(int w, int h, ResizeFilter filter); + NVTT_API void resize(int w, int h, ResizeFilter filter, float filterWidth, const float * params = 0); + NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter); + NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0); + NVTT_API bool buildNextMipmap(MipmapFilter filter); + NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0); + */ + + // Color transforms. + NVTT_API void toLinear(float gamma); + NVTT_API void toGamma(float gamma); + + //private: + void detach(); + + struct Private; + Private * m; + }; + + + // Return string for the given error code. + NVTT_API const char * errorString(Error e); + + // Return NVTT version. + NVTT_API unsigned int version(); + + // Image comparison and error measurement functions. (New in NVTT 2.1) + NVTT_API float rmsError(const Surface & reference, const Surface & img); + NVTT_API float rmsAlphaError(const Surface & reference, const Surface & img); + NVTT_API float cieLabError(const Surface & reference, const Surface & img); + NVTT_API float angularError(const Surface & reference, const Surface & img); + NVTT_API Surface diff(const Surface & reference, const Surface & img, float scale); + + NVTT_API float rmsToneMappedError(const Surface & reference, const Surface & img, float exposure); + + + NVTT_API Surface histogram(const Surface & img, int width, int height); + NVTT_API Surface histogram(const Surface & img, float minRange, float maxRange, int width, int height); + +} // nvtt namespace + +#endif // NVTT_H Index: ps/trunk/libraries/source/nvtt/patches/arm-fix.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/arm-fix.patch +++ ps/trunk/libraries/source/nvtt/patches/arm-fix.patch @@ -1,21 +0,0 @@ -Index: src/src/nvcore/nvcore.h -=================================================================== ---- src/src/nvcore/nvcore.h (revision 13633) -+++ src/src/nvcore/nvcore.h (revision 13634) -@@ -67,6 +67,7 @@ - // NV_CPU_X86 - // NV_CPU_X86_64 - // NV_CPU_PPC -+// NV_CPU_ARM - - #define NV_CPU_STRING POSH_CPU_STRING - -@@ -76,6 +77,8 @@ - # define NV_CPU_X86 1 - #elif defined POSH_CPU_PPC - # define NV_CPU_PPC 1 -+#elif defined POSH_CPU_STRONGARM -+# define NV_CPU_ARM 1 - #else - # error "Unsupported CPU" - #endif Index: ps/trunk/libraries/source/nvtt/patches/arm64-fix.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/arm64-fix.patch +++ ps/trunk/libraries/source/nvtt/patches/arm64-fix.patch @@ -1,67 +0,0 @@ -Patch from http://trac.wildfiregames.com/ticket/3344 -Backport from upstream https://github.com/castano/nvidia-texture-tools/commit/58617584d4d2541ff9fcfe23a9a492af86b11efb - -Index: src/src/nvcore/Debug.cpp -=================================================================== ---- src/src/nvcore/Debug.cpp (revision 16870) -+++ src/src/nvcore/Debug.cpp (working copy) -@@ -232,6 +232,9 @@ - # elif NV_CPU_PPC - ucontext_t * ucp = (ucontext_t *)secret; - return (void *) ucp->uc_mcontext.regs->nip; -+# elif NV_CPU_AARCH64 -+ ucontext_t * ucp = (ucontext_t *)secret; -+ return (void *) ucp->uc_mcontext.pc; - # endif - # endif - -Index: src/src/nvcore/nvcore.h -=================================================================== ---- src/src/nvcore/nvcore.h (revision 16870) -+++ src/src/nvcore/nvcore.h (working copy) -@@ -68,6 +68,7 @@ - // NV_CPU_X86_64 - // NV_CPU_PPC - // NV_CPU_ARM -+// NV_CPU_AARCH64 - - #define NV_CPU_STRING POSH_CPU_STRING - -@@ -79,6 +80,8 @@ - # define NV_CPU_PPC 1 - #elif defined POSH_CPU_STRONGARM - # define NV_CPU_ARM 1 -+#elif defined POSH_CPU_AARCH64 -+# define NV_CPU_AARCH64 1 - #else - # error "Unsupported CPU" - #endif -Index: src/src/nvcore/poshlib/posh.h -=================================================================== ---- src/src/nvcore/poshlib/posh.h (revision 16870) -+++ src/src/nvcore/poshlib/posh.h (working copy) -@@ -485,6 +485,11 @@ - # define POSH_CPU_STRING "ARM" - #endif - -+#if defined __aarch64__ -+# define POSH_CPU_AARCH64 1 -+# define POSH_CPU_STRING "ARM64" -+#endif -+ - #if defined mips || defined __mips__ || defined __MIPS__ || defined _MIPS - # define POSH_CPU_MIPS 1 - # if defined _R5900 -@@ -658,7 +663,7 @@ - ** the MIPS series, so we have to be careful about those. - ** ---------------------------------------------------------------------------- - */ --#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__ -+#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_CPU_AARCH64 || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__ - # define POSH_ENDIAN_STRING "little" - # define POSH_LITTLE_ENDIAN 1 - #else -Index: libraries/source/spidermonkey/mozjs-31.2.0.rc0.tar.bz2 -=================================================================== -Cannot display: file marked as a binary type. -svn:mime-type = application/octet-stream Index: ps/trunk/libraries/source/nvtt/patches/clang-cpp11-error.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/clang-cpp11-error.patch +++ ps/trunk/libraries/source/nvtt/patches/clang-cpp11-error.patch @@ -1,67 +0,0 @@ -Index: src/src/nvimage/ImageIO.cpp -=================================================================== ---- src/src/nvimage/ImageIO.cpp (revision 16371) -+++ src/src/nvimage/ImageIO.cpp (working copy) -@@ -132,13 +132,13 @@ - { - nvDebugCheck(fileName != NULL); - -- StdInputStream stream(fileName); -- -- if (stream.isError()) { -- return false; -- } -- -- return loadFloat(fileName, stream); -+ StdInputStream stream(fileName); -+ -+ if (stream.isError()) { -+ return NULL; -+ } -+ -+ return loadFloat(fileName, stream); - } - - FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s) -@@ -230,13 +230,13 @@ - case TGA_TYPE_RLE_INDEXED: - rle = true; - // no break is intended! -- case TGA_TYPE_INDEXED: -- if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) { -- nvDebug( "*** ImageIO::loadTGA: Error, only 24bit paletted images are supported.\n" ); -- return false; -- } -- pal = true; -- break; -+ case TGA_TYPE_INDEXED: -+ if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) { -+ nvDebug( "*** ImageIO::loadTGA: Error, only 24bit paletted images are supported.\n" ); -+ return NULL; -+ } -+ pal = true; -+ break; - - case TGA_TYPE_RLE_RGB: - rle = true; -@@ -251,13 +251,13 @@ - case TGA_TYPE_GREY: - grey = true; - break; -- -- default: -- nvDebug( "*** ImageIO::loadTGA: Error, unsupported image type.\n" ); -- return false; -- } -- -- const uint pixel_size = (tga.pixel_size/8); -+ -+ default: -+ nvDebug( "*** ImageIO::loadTGA: Error, unsupported image type.\n" ); -+ return NULL; -+ } -+ -+ const uint pixel_size = (tga.pixel_size/8); - nvDebugCheck(pixel_size <= 4); - - const uint size = tga.width * tga.height * pixel_size; Index: ps/trunk/libraries/source/nvtt/patches/cmake-build.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/cmake-build.patch +++ ps/trunk/libraries/source/nvtt/patches/cmake-build.patch @@ -0,0 +1,243 @@ +--- + CMakeLists.txt | 16 ++++---- + src/CMakeLists.txt | 82 ++++++++++++++++++------------------- + src/nvcore/CMakeLists.txt | 6 +-- + src/nvimage/CMakeLists.txt | 6 +-- + src/nvmath/CMakeLists.txt | 6 +-- + src/nvthread/CMakeLists.txt | 6 +-- + src/nvtt/CMakeLists.txt | 6 +-- + 7 files changed, 64 insertions(+), 64 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index ab4dcb6..9c80369 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -11,19 +11,19 @@ SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${NV_CMAKE_DIR}") + #ENDIF(CMAKE_COMPILER_IS_GNUCC) + set (CMAKE_CXX_STANDARD 11) + +-IF(WIN32) ++#IF(WIN32) + # gnuwin32 paths: +- SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32") +- SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include") +- SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib") ++ #SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32") ++ #SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include") ++ #SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib") + + # Set GLUT path: +- SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut") ++ #SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut") + + # Set FreeImage path: +- SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage") +- +-ENDIF(WIN32) ++ #SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage") ++ ++#ENDIF(WIN32) + + INCLUDE(${NV_CMAKE_DIR}/OptimalOptions.cmake) + MESSAGE(STATUS "Setting optimal options") +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index f64b263..ec97402 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -11,13 +11,16 @@ + SUBDIRS(bc6h) + SUBDIRS(bc7) + ++# Make PNG optional (we disable it on macOS) ++SET(PNG TRUE CACHE BOOL "") ++ + # OpenGL +-#INCLUDE(FindOpenGL) +-#IF(OPENGL_FOUND) +-# MESSAGE(STATUS "Looking for OpenGL - found") +-#ELSE(OPENGL_FOUND) +-# MESSAGE(STATUS "Looking for OpenGL - not found") +-#ENDIF(OPENGL_FOUND) ++INCLUDE(FindOpenGL) ++IF(OPENGL_FOUND) ++ MESSAGE(STATUS "Looking for OpenGL - found") ++ELSE(OPENGL_FOUND) ++ MESSAGE(STATUS "Looking for OpenGL - not found") ++ENDIF(OPENGL_FOUND) + + # GLUT + #INCLUDE(FindGLUT) +@@ -28,12 +31,12 @@ + #ENDIF(GLUT_FOUND) + + # DirectX +-#INCLUDE(${NV_CMAKE_DIR}/FindDirectX.cmake) +-#IF(DX10_FOUND) +-# MESSAGE(STATUS "Looking for DirectX - found") +-#ELSE(DX10_FOUND) +-# MESSAGE(STATUS "Looking for DirectX - not found") +-#ENDIF(DX10_FOUND) ++INCLUDE(${NV_CMAKE_DIR}/FindDirectX.cmake) ++IF(DX10_FOUND) ++ MESSAGE(STATUS "Looking for DirectX - found") ++ELSE(DX10_FOUND) ++ MESSAGE(STATUS "Looking for DirectX - not found") ++ENDIF(DX10_FOUND) + + # GLEW + #INCLUDE(${NV_CMAKE_DIR}/FindGLEW.cmake) +@@ -53,18 +56,18 @@ + + # CUDA + #FIND_PACKAGE(CUDA) +-IF(CUDA_FOUND) +- IF(MINGW) +- MESSAGE(STATUS "Looking for CUDA - not supported on MinGW") +- UNSET(CUDA_FOUND) +- ENDIF(MINGW) +- IF(CUDA_FOUND) +- SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise") +- MESSAGE(STATUS "Looking for CUDA - found") +- ENDIF(CUDA_FOUND) +-ELSE(CUDA_FOUND) +- MESSAGE(STATUS "Looking for CUDA - not found") +-ENDIF(CUDA_FOUND) ++#IF(CUDA_FOUND) ++# IF(MINGW) ++# MESSAGE(STATUS "Looking for CUDA - not supported on MinGW") ++# UNSET(CUDA_FOUND) ++# ENDIF(MINGW) ++# IF(CUDA_FOUND) ++# SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise") ++# MESSAGE(STATUS "Looking for CUDA - found") ++# ENDIF(CUDA_FOUND) ++#ELSE(CUDA_FOUND) ++# MESSAGE(STATUS "Looking for CUDA - not found") ++#ENDIF(CUDA_FOUND) + + # Maya + #INCLUDE(${NV_CMAKE_DIR}/FindMaya.cmake) +@@ -94,13 +97,15 @@ + #ENDIF(JPEG_FOUND) + + # PNG +-#INCLUDE(FindPNG) +-#IF(PNG_FOUND) +-# SET(HAVE_PNG ${PNG_FOUND} CACHE BOOL "Set to TRUE if PNG is found, FALSE otherwise") +-# MESSAGE(STATUS "Looking for PNG - found") +-#ELSE(PNG_FOUND) +-# MESSAGE(STATUS "Looking for PNG - not found") +-#ENDIF(PNG_FOUND) ++IF(PNG) ++ INCLUDE(FindPNG) ++ IF(PNG_FOUND) ++ SET(HAVE_PNG ${PNG_FOUND} CACHE BOOL "Set to TRUE if PNG is found, FALSE otherwise") ++ MESSAGE(STATUS "Looking for PNG - found") ++ ELSE(PNG_FOUND) ++ MESSAGE(STATUS "Looking for PNG - not found") ++ ENDIF(PNG_FOUND) ++ENDIF(PNG) + + # TIFF + #SET(TIFF_NAMES libtiff) +@@ -122,15 +127,15 @@ + #ENDIF(OPENEXR_FOUND) + + # OpenMP +-INCLUDE(FindOpenMP) +-IF(OPENMP_FOUND) +- SET(HAVE_OPENMP ${OPENMP_FOUND} CACHE BOOL "Set to TRUE if OpenMP is found, FALSE otherwise") +- MESSAGE(STATUS "Looking for OpenMP - found") +- SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +-ELSE(OPENMP_FOUND) +- MESSAGE(STATUS "Looking for OpenMP - not found") +-ENDIF(OPENMP_FOUND) ++#INCLUDE(FindOpenMP) ++#IF(OPENMP_FOUND) ++# SET(HAVE_OPENMP ${OPENMP_FOUND} CACHE BOOL "Set to TRUE if OpenMP is found, FALSE otherwise") ++# MESSAGE(STATUS "Looking for OpenMP - found") ++# SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") ++# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") ++#ELSE(OPENMP_FOUND) ++# MESSAGE(STATUS "Looking for OpenMP - not found") ++#ENDIF(OPENMP_FOUND) + + # Threads + FIND_PACKAGE(Threads REQUIRED) + MESSAGE(STATUS "Use thread library: ${CMAKE_THREAD_LIBS_INIT}") +diff --git a/src/nvcore/CMakeLists.txt b/src/nvcore/CMakeLists.txt +index 3dfcb5d..a0bec38 100644 +--- a/src/nvcore/CMakeLists.txt ++++ b/src/nvcore/CMakeLists.txt +@@ -44,6 +44,6 @@ if (CMAKE_SYSTEM_NAME MATCHES "NetBSD" OR CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + endif() + + INSTALL(TARGETS nvcore +- RUNTIME DESTINATION bin +- LIBRARY DESTINATION lib +- ARCHIVE DESTINATION lib/static) ++ RUNTIME DESTINATION ${BINDIR} ++ LIBRARY DESTINATION ${LIBDIR} ++ ARCHIVE DESTINATION ${LIBDIR}) +diff --git a/src/nvimage/CMakeLists.txt b/src/nvimage/CMakeLists.txt +index dce627d..420d9a6 100644 +--- a/src/nvimage/CMakeLists.txt ++++ b/src/nvimage/CMakeLists.txt +@@ -56,7 +56,7 @@ ENDIF(NVIMAGE_SHARED) + TARGET_LINK_LIBRARIES(nvimage ${LIBS} nvcore posh bc6h bc7 nvmath) + + INSTALL(TARGETS nvimage +- RUNTIME DESTINATION bin +- LIBRARY DESTINATION lib +- ARCHIVE DESTINATION lib/static) ++ RUNTIME DESTINATION ${BINDIR} ++ LIBRARY DESTINATION ${LIBDIR} ++ ARCHIVE DESTINATION ${LIBDIR}) + +diff --git a/src/nvmath/CMakeLists.txt b/src/nvmath/CMakeLists.txt +index abeb05f..e63df63 100644 +--- a/src/nvmath/CMakeLists.txt ++++ b/src/nvmath/CMakeLists.txt +@@ -28,6 +28,6 @@ ENDIF(NVMATH_SHARED) + TARGET_LINK_LIBRARIES(nvmath ${LIBS} nvcore) + + INSTALL(TARGETS nvmath +- RUNTIME DESTINATION bin +- LIBRARY DESTINATION lib +- ARCHIVE DESTINATION lib/static) ++ RUNTIME DESTINATION ${BINDIR} ++ LIBRARY DESTINATION ${LIBDIR} ++ ARCHIVE DESTINATION ${LIBDIR}) +diff --git a/src/nvthread/CMakeLists.txt b/src/nvthread/CMakeLists.txt +index 15dbc4e..a2b3654 100644 +--- a/src/nvthread/CMakeLists.txt ++++ b/src/nvthread/CMakeLists.txt +@@ -23,6 +23,6 @@ ENDIF(NVTHREAD_SHARED) + TARGET_LINK_LIBRARIES(nvthread ${LIBS} nvcore) + + INSTALL(TARGETS nvthread +- RUNTIME DESTINATION bin +- LIBRARY DESTINATION lib +- ARCHIVE DESTINATION lib/static) ++ RUNTIME DESTINATION ${BINDIR} ++ LIBRARY DESTINATION ${LIBDIR} ++ ARCHIVE DESTINATION ${LIBDIR}) +diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt +index 7923159..df77c86 100644 +--- a/src/nvtt/CMakeLists.txt ++++ b/src/nvtt/CMakeLists.txt +@@ -50,9 +50,9 @@ ENDIF(NVTT_SHARED) + TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath) + + INSTALL(TARGETS nvtt +- RUNTIME DESTINATION bin +- LIBRARY DESTINATION lib +- ARCHIVE DESTINATION lib/static) ++ RUNTIME DESTINATION ${BINDIR} ++ LIBRARY DESTINATION ${LIBDIR} ++ ARCHIVE DESTINATION ${LIBDIR}) + + INSTALL(FILES nvtt.h DESTINATION include/nvtt) + +-- Index: ps/trunk/libraries/source/nvtt/patches/cmake-devflags.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/cmake-devflags.patch +++ ps/trunk/libraries/source/nvtt/patches/cmake-devflags.patch @@ -1,176 +0,0 @@ -From: hasufell -Date: Wed Apr 11 21:49:58 UTC 2012 -Subject: various cmake fixes - -fix hardcoded install paths for BINDIR and LIBDIR -make cg, cuda, glew, glut and openexr controllable and not automagic - ---- src/nvcore/CMakeLists.txt -+++ src/nvcore/CMakeLists.txt -@@ -42,6 +42,6 @@ - TARGET_LINK_LIBRARIES(nvcore ${LIBS}) - - INSTALL(TARGETS nvcore -- RUNTIME DESTINATION bin -- LIBRARY DESTINATION lib -- ARCHIVE DESTINATION lib/static) -+ RUNTIME DESTINATION ${BINDIR} -+ LIBRARY DESTINATION ${LIBDIR} -+ ARCHIVE DESTINATION ${LIBDIR}) ---- src/nvimage/CMakeLists.txt -+++ src/nvimage/CMakeLists.txt -@@ -62,7 +62,7 @@ - TARGET_LINK_LIBRARIES(nvimage ${LIBS} nvcore nvmath posh) - - INSTALL(TARGETS nvimage -- RUNTIME DESTINATION bin -- LIBRARY DESTINATION lib -- ARCHIVE DESTINATION lib/static) -+ RUNTIME DESTINATION ${BINDIR} -+ LIBRARY DESTINATION ${LIBDIR} -+ ARCHIVE DESTINATION ${LIBDIR}) - ---- src/nvmath/CMakeLists.txt -+++ src/nvmath/CMakeLists.txt -@@ -28,6 +28,6 @@ - TARGET_LINK_LIBRARIES(nvmath ${LIBS} nvcore) - - INSTALL(TARGETS nvmath -- RUNTIME DESTINATION bin -- LIBRARY DESTINATION lib -- ARCHIVE DESTINATION lib/static) -+ RUNTIME DESTINATION ${BINDIR} -+ LIBRARY DESTINATION ${LIBDIR} -+ ARCHIVE DESTINATION ${LIBDIR}) ---- src/nvtt/CMakeLists.txt -+++ src/nvtt/CMakeLists.txt -@@ -53,9 +53,9 @@ - TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish) - - INSTALL(TARGETS nvtt -- RUNTIME DESTINATION bin -- LIBRARY DESTINATION lib -- ARCHIVE DESTINATION lib/static) -+ RUNTIME DESTINATION ${BINDIR} -+ LIBRARY DESTINATION ${LIBDIR} -+ ARCHIVE DESTINATION ${LIBDIR}) - - INSTALL(FILES nvtt.h DESTINATION include/nvtt) - ---- src/CMakeLists.txt -+++ src/CMakeLists.txt -@@ -5,6 +5,13 @@ - SUBDIRS(nvtt) - - INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -+ -+# initial variables -+SET(GLUT TRUE CACHE BOOL "") -+SET(GLEW TRUE CACHE BOOL "") -+SET(CG TRUE CACHE BOOL "") -+SET(CUDA TRUE CACHE BOOL "") -+SET(OPENEXR TRUE CACHE BOOL "") - - # OpenGL - INCLUDE(FindOpenGL) -@@ -15,13 +22,15 @@ - ENDIF(OPENGL_FOUND) - - # GLUT --INCLUDE(${NV_CMAKE_DIR}/FindGLUT.cmake) --#INCLUDE(FindGLUT) --IF(GLUT_FOUND) -- MESSAGE(STATUS "Looking for GLUT - found") --ELSE(GLUT_FOUND) -- MESSAGE(STATUS "Looking for GLUT - not found") --ENDIF(GLUT_FOUND) -+IF(GLUT) -+ INCLUDE(${NV_CMAKE_DIR}/FindGLUT.cmake) -+ #INCLUDE(FindGLUT) -+ IF(GLUT_FOUND) -+ MESSAGE(STATUS "Looking for GLUT - found") -+ ELSE(GLUT_FOUND) -+ MESSAGE(STATUS "Looking for GLUT - not found") -+ ENDIF(GLUT_FOUND) -+ENDIF(GLUT) - - # DirectX - INCLUDE(${NV_CMAKE_DIR}/FindDirectX.cmake) -@@ -32,29 +41,35 @@ - ENDIF(DX10_FOUND) - - # GLEW --INCLUDE(${NV_CMAKE_DIR}/FindGLEW.cmake) --IF(GLEW_FOUND) -- MESSAGE(STATUS "Looking for GLEW - found") --ELSE(GLEW_FOUND) -- MESSAGE(STATUS "Looking for GLEW - not found") --ENDIF(GLEW_FOUND) -+IF(GLEW) -+ INCLUDE(${NV_CMAKE_DIR}/FindGLEW.cmake) -+ IF(GLEW_FOUND) -+ MESSAGE(STATUS "Looking for GLEW - found") -+ ELSE(GLEW_FOUND) -+ MESSAGE(STATUS "Looking for GLEW - not found") -+ ENDIF(GLEW_FOUND) -+ENDIF(GLEW) - - # Cg --INCLUDE(${NV_CMAKE_DIR}/FindCg.cmake) --IF(CG_FOUND) -- MESSAGE(STATUS "Looking for Cg - found") --ELSE(CG_FOUND) -- MESSAGE(STATUS "Looking for Cg - not found") --ENDIF(CG_FOUND) -+IF(CG) -+ INCLUDE(${NV_CMAKE_DIR}/FindCg.cmake) -+ IF(CG_FOUND) -+ MESSAGE(STATUS "Looking for Cg - found") -+ ELSE(CG_FOUND) -+ MESSAGE(STATUS "Looking for Cg - not found") -+ ENDIF(CG_FOUND) -+ENDIF(CG) - - # CUDA --INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake) --IF(CUDA_FOUND) -- SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise") -- MESSAGE(STATUS "Looking for CUDA - found") --ELSE(CUDA_FOUND) -- MESSAGE(STATUS "Looking for CUDA - not found") --ENDIF(CUDA_FOUND) -+IF(CUDA) -+ INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake) -+ IF(CUDA_FOUND) -+ SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise") -+ MESSAGE(STATUS "Looking for CUDA - found") -+ ELSE(CUDA_FOUND) -+ MESSAGE(STATUS "Looking for CUDA - not found") -+ ENDIF(CUDA_FOUND) -+ENDIF(CUDA) - - # Maya - INCLUDE(${NV_CMAKE_DIR}/FindMaya.cmake) -@@ -93,13 +108,15 @@ - ENDIF(TIFF_FOUND) - - # OpenEXR --INCLUDE(${NV_CMAKE_DIR}/FindOpenEXR.cmake) --IF(OPENEXR_FOUND) -- SET(HAVE_OPENEXR ${OPENEXR_FOUND} CACHE BOOL "Set to TRUE if OpenEXR is found, FALSE otherwise") -- MESSAGE(STATUS "Looking for OpenEXR - found") --ELSE(OPENEXR_FOUND) -- MESSAGE(STATUS "Looking for OpenEXR - not found") --ENDIF(OPENEXR_FOUND) -+IF(OPENEXR) -+ INCLUDE(${NV_CMAKE_DIR}/FindOpenEXR.cmake) -+ IF(OPENEXR_FOUND) -+ SET(HAVE_OPENEXR ${OPENEXR_FOUND} CACHE BOOL "Set to TRUE if OpenEXR is found, FALSE otherwise") -+ MESSAGE(STATUS "Looking for OpenEXR - found") -+ ELSE(OPENEXR_FOUND) -+ MESSAGE(STATUS "Looking for OpenEXR - not found") -+ ENDIF(OPENEXR_FOUND) -+ENDIF(OPENEXR) - - # Qt - FIND_PACKAGE(Qt4) Index: ps/trunk/libraries/source/nvtt/patches/cmake-devflags2.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/cmake-devflags2.patch +++ ps/trunk/libraries/source/nvtt/patches/cmake-devflags2.patch @@ -1,101 +0,0 @@ -Index: src/CMakeLists.txt -=================================================================== ---- src/CMakeLists.txt (revision 13168) -+++ src/CMakeLists.txt (working copy) -@@ -9,12 +9,15 @@ - # initial variables - SET(GLUT TRUE CACHE BOOL "") - SET(GLEW TRUE CACHE BOOL "") --SET(CG TRUE CACHE BOOL "") --SET(CUDA TRUE CACHE BOOL "") --SET(OPENEXR TRUE CACHE BOOL "") -- --# OpenGL --INCLUDE(FindOpenGL) -+SET(CG TRUE CACHE BOOL "") -+SET(CUDA TRUE CACHE BOOL "") -+SET(OPENEXR TRUE CACHE BOOL "") -+SET(JPEG TRUE CACHE BOOL "") -+SET(PNG TRUE CACHE BOOL "") -+SET(TIFF TRUE CACHE BOOL "") -+ -+# OpenGL -+INCLUDE(FindOpenGL) - IF(OPENGL_FOUND) - MESSAGE(STATUS "Looking for OpenGL - found") - ELSE(OPENGL_FOUND) -@@ -78,37 +81,43 @@ - MESSAGE(STATUS "Looking for Maya - found") - ELSE(MAYA_FOUND) - MESSAGE(STATUS "Looking for Maya - not found") --ENDIF(MAYA_FOUND) -- --# JPEG --INCLUDE(FindJPEG) --IF(JPEG_FOUND) -- SET(HAVE_JPEG ${JPEG_FOUND} CACHE BOOL "Set to TRUE if JPEG is found, FALSE otherwise") -- MESSAGE(STATUS "Looking for JPEG - found") --ELSE(JPEG_FOUND) -- MESSAGE(STATUS "Looking for JPEG - not found") --ENDIF(JPEG_FOUND) -- --# PNG --INCLUDE(FindPNG) --IF(PNG_FOUND) -- SET(HAVE_PNG ${PNG_FOUND} CACHE BOOL "Set to TRUE if PNG is found, FALSE otherwise") -- MESSAGE(STATUS "Looking for PNG - found") --ELSE(PNG_FOUND) -- MESSAGE(STATUS "Looking for PNG - not found") --ENDIF(PNG_FOUND) -- --# TIFF --INCLUDE(FindTIFF) --IF(TIFF_FOUND) -- SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise") -- MESSAGE(STATUS "Looking for TIFF - found") --ELSE(TIFF_FOUND) -- MESSAGE(STATUS "Looking for TIFF - not found") --ENDIF(TIFF_FOUND) -- --# OpenEXR --IF(OPENEXR) -+ENDIF(MAYA_FOUND) -+ -+# JPEG -+IF(JPEG) -+ INCLUDE(FindJPEG) -+ IF(JPEG_FOUND) -+ SET(HAVE_JPEG ${JPEG_FOUND} CACHE BOOL "Set to TRUE if JPEG is found, FALSE otherwise") -+ MESSAGE(STATUS "Looking for JPEG - found") -+ ELSE(JPEG_FOUND) -+ MESSAGE(STATUS "Looking for JPEG - not found") -+ ENDIF(JPEG_FOUND) -+ENDIF(JPEG) -+ -+# PNG -+IF(PNG) -+ INCLUDE(FindPNG) -+ IF(PNG_FOUND) -+ SET(HAVE_PNG ${PNG_FOUND} CACHE BOOL "Set to TRUE if PNG is found, FALSE otherwise") -+ MESSAGE(STATUS "Looking for PNG - found") -+ ELSE(PNG_FOUND) -+ MESSAGE(STATUS "Looking for PNG - not found") -+ ENDIF(PNG_FOUND) -+ENDIF(PNG) -+ -+# TIFF -+IF(TIFF) -+ INCLUDE(FindTIFF) -+ IF(TIFF_FOUND) -+ SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise") -+ MESSAGE(STATUS "Looking for TIFF - found") -+ ELSE(TIFF_FOUND) -+ MESSAGE(STATUS "Looking for TIFF - not found") -+ ENDIF(TIFF_FOUND) -+ENDIF(TIFF) -+ -+# OpenEXR -+IF(OPENEXR) - INCLUDE(${NV_CMAKE_DIR}/FindOpenEXR.cmake) - IF(OPENEXR_FOUND) - SET(HAVE_OPENEXR ${OPENEXR_FOUND} CACHE BOOL "Set to TRUE if OpenEXR is found, FALSE otherwise") Index: ps/trunk/libraries/source/nvtt/patches/cmake-freebsd.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/cmake-freebsd.patch +++ ps/trunk/libraries/source/nvtt/patches/cmake-freebsd.patch @@ -1,15 +1,22 @@ -Index: nvtt/CMakeLists.txt -=================================================================== ---- nvtt/CMakeLists.txt (revision 10975) -+++ nvtt/CMakeLists.txt (working copy) -@@ -44,6 +44,10 @@ +--- + src/nvtt/CMakeLists.txt | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt +index df77c86..e543807 100644 +--- a/src/nvtt/CMakeLists.txt ++++ b/src/nvtt/CMakeLists.txt +@@ -41,7 +41,11 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + ADD_DEFINITIONS(-DNVTT_EXPORTS) - IF(NVTT_SHARED) -+ IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") -+ SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,origin") -+ ENDIF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") +-IF(NVTT_SHARED) ++IF(NVTT_SHARED) ++ IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") ++ SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,origin") ++ ENDIF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + - ADD_DEFINITIONS(-DNVTT_SHARED=1) - ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS}) + ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS}) ELSE(NVTT_SHARED) + ADD_LIBRARY(nvtt ${NVTT_SRCS}) +-- Index: ps/trunk/libraries/source/nvtt/patches/cmake-noqt4.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/cmake-noqt4.patch +++ ps/trunk/libraries/source/nvtt/patches/cmake-noqt4.patch @@ -1,14 +0,0 @@ -Index: src/src/CMakeLists.txt -=================================================================== ---- src/src/CMakeLists.txt (revision 13170) -+++ src/src/CMakeLists.txt (revision 13635) -@@ -128,7 +128,8 @@ - ENDIF(OPENEXR) - - # Qt --FIND_PACKAGE(Qt4) -+# We don't actually use this and it requires having Qt4 installed, so why is this in here? -+#FIND_PACKAGE(Qt4) - - # Threads - FIND_PACKAGE(Threads REQUIRED) Index: ps/trunk/libraries/source/nvtt/patches/gcc47-unistd.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/gcc47-unistd.patch +++ ps/trunk/libraries/source/nvtt/patches/gcc47-unistd.patch @@ -1,23 +0,0 @@ -Index: src/src/nvcore/Debug.cpp -=================================================================== ---- src/src/nvcore/Debug.cpp (revision 11373) -+++ src/src/nvcore/Debug.cpp (working copy) -@@ -27,6 +27,10 @@ - # include - #endif - -+#if NV_OS_LINUX || NV_OS_DARWIN || NV_OS_FREEBSD -+# include // getpid -+#endif -+ - #if NV_OS_LINUX && defined(HAVE_EXECINFO_H) - # include // backtrace - # if NV_CC_GNUC // defined(HAVE_CXXABI_H) -@@ -35,7 +39,6 @@ - #endif - - #if NV_OS_DARWIN || NV_OS_FREEBSD --# include // getpid - # include - # include // sysctl - # include Index: ps/trunk/libraries/source/nvtt/patches/gcc6-fix.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/gcc6-fix.patch +++ ps/trunk/libraries/source/nvtt/patches/gcc6-fix.patch @@ -1,31 +0,0 @@ -Index: libraries/source/nvtt/src/src/nvimage/ImageIO.cpp -=================================================================== ---- libraries/source/nvtt/src/src/nvimage/ImageIO.cpp (revision 18164) -+++ libraries/source/nvtt/src/src/nvimage/ImageIO.cpp (working copy) -@@ -621,7 +621,7 @@ - png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - if (png_ptr == NULL) { - // nvDebug( "*** LoadPNG: Error allocating read buffer in file '%s'.\n", name ); -- return false; -+ return NULL; - } - - // Allocate/initialize a memory block for the image information -@@ -629,7 +629,7 @@ - if (info_ptr == NULL) { - png_destroy_read_struct(&png_ptr, NULL, NULL); - // nvDebug( "*** LoadPNG: Error allocating image information for '%s'.\n", name ); -- return false; -+ return NULL; - } - - // Set up the error handling -@@ -636,7 +636,7 @@ - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - // nvDebug( "*** LoadPNG: Error reading png file '%s'.\n", name ); -- return false; -+ return NULL; - } - - // Set up the I/O functions. Index: ps/trunk/libraries/source/nvtt/patches/issue139.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/issue139.patch +++ ps/trunk/libraries/source/nvtt/patches/issue139.patch @@ -1,55 +0,0 @@ -Index: src/src/nvmath/Vector.h -=================================================================== ---- src/src/nvmath/Vector.h (revision 8311) -+++ src/src/nvmath/Vector.h (working copy) -@@ -68,7 +68,7 @@ - scalar y() const; - scalar z() const; - -- const Vector2 & xy() const; -+ Vector2 xy() const; - - scalar component(uint idx) const; - -@@ -111,8 +111,8 @@ - scalar z() const; - scalar w() const; - -- const Vector2 & xy() const; -- const Vector3 & xyz() const; -+ Vector2 xy() const; -+ Vector3 xyz() const; - - scalar component(uint idx) const; - -@@ -231,9 +231,9 @@ - inline scalar Vector3::y() const { return m_y; } - inline scalar Vector3::z() const { return m_z; } - --inline const Vector2 & Vector3::xy() const -+inline Vector2 Vector3::xy() const - { -- return *(Vector2 *)this; -+ return Vector2(m_x, m_y); - } - - inline scalar Vector3::component(uint idx) const -@@ -332,14 +332,14 @@ - inline scalar Vector4::z() const { return m_z; } - inline scalar Vector4::w() const { return m_w; } - --inline const Vector2 & Vector4::xy() const -+inline Vector2 Vector4::xy() const - { -- return *(Vector2 *)this; -+ return Vector2(m_x, m_y); - } - --inline const Vector3 & Vector4::xyz() const -+inline Vector3 Vector4::xyz() const - { -- return *(Vector3 *)this; -+ return Vector3(m_x, m_y, m_z); - } - - inline scalar Vector4::component(uint idx) const Index: ps/trunk/libraries/source/nvtt/patches/issue176.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/issue176.patch +++ ps/trunk/libraries/source/nvtt/patches/issue176.patch @@ -1,105 +0,0 @@ -Index: src/src/nvcore/nvcore.h -=================================================================== ---- src/src/nvcore/nvcore.h (revision 11943) -+++ src/src/nvcore/nvcore.h (working copy) -@@ -41,6 +41,9 @@ - #elif defined POSH_OS_FREEBSD - # define NV_OS_FREEBSD 1 - # define NV_OS_UNIX 1 -+#elif defined POSH_OS_OPENBSD -+# define NV_OS_OPENBSD 1 -+# define NV_OS_UNIX 1 - #elif defined POSH_OS_CYGWIN32 - # define NV_OS_CYGWIN 1 - #elif defined POSH_OS_MINGW -@@ -178,7 +181,7 @@ - #elif NV_CC_GNUC - # if NV_OS_LINUX - # include "DefsGnucLinux.h" --# elif NV_OS_DARWIN || NV_OS_FREEBSD -+# elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD - # include "DefsGnucDarwin.h" - # elif NV_OS_MINGW - # include "DefsGnucWin32.h" -Index: src/src/nvcore/Debug.cpp -=================================================================== ---- src/src/nvcore/Debug.cpp (revision 11943) -+++ src/src/nvcore/Debug.cpp (working copy) -@@ -27,7 +27,7 @@ - # include - #endif - --#if NV_OS_LINUX || NV_OS_DARWIN || NV_OS_FREEBSD -+#if NV_OS_LINUX || NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD - # include // getpid - #endif - -@@ -38,10 +38,13 @@ - # endif - #endif - --#if NV_OS_DARWIN || NV_OS_FREEBSD -+#if NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD - # include -+# include - # include // sysctl --# include -+# if !NV_OS_OPENBSD -+# include -+# endif - # undef HAVE_EXECINFO_H - # if defined(HAVE_EXECINFO_H) // only after OSX 10.5 - # include // backtrace -@@ -210,6 +213,14 @@ - ucontext_t * ucp = (ucontext_t *)secret; - return (void *)ucp->uc_mcontext.mc_eip; - # endif -+# elif NV_OS_OPENBSD -+# if NV_CPU_X86_64 -+ ucontext_t * ucp = (ucontext_t *)secret; -+ return (void *)ucp->sc_rip; -+# elif NV_CPU_X86 -+ ucontext_t * ucp = (ucontext_t *)secret; -+ return (void *)ucp->sc_eip; -+# endif - # else - # if NV_CPU_X86_64 - // #define REG_RIP REG_INDEX(rip) // seems to be 16 -Index: src/src/nvcore/poshlib/posh.h -=================================================================== ---- src/src/nvcore/poshlib/posh.h (revision 11943) -+++ src/src/nvcore/poshlib/posh.h (working copy) -@@ -298,6 +298,11 @@ - # define POSH_OS_STRING "FreeBSD" - #endif - -+#if defined __OpenBSD__ -+# define POSH_OS_OPENBSD 1 -+# define POSH_OS_STRING "OpenBSD" -+#endif -+ - #if defined __CYGWIN32__ - # define POSH_OS_CYGWIN32 1 - # define POSH_OS_STRING "Cygwin" -Index: src/src/nvmath/nvmath.h -=================================================================== ---- src/src/nvmath/nvmath.h (revision 11943) -+++ src/src/nvmath/nvmath.h (working copy) -@@ -115,7 +115,7 @@ - { - #if NV_OS_WIN32 - return _finite(f) != 0; --#elif NV_OS_DARWIN || NV_OS_FREEBSD -+#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD - return isfinite(f); - #elif NV_OS_LINUX - return finitef(f); -@@ -130,7 +130,7 @@ - { - #if NV_OS_WIN32 - return _isnan(f) != 0; --#elif NV_OS_DARWIN || NV_OS_FREEBSD -+#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD - return isnan(f); - #elif NV_OS_LINUX - return isnanf(f); Index: ps/trunk/libraries/source/nvtt/patches/issue182.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/issue182.patch +++ ps/trunk/libraries/source/nvtt/patches/issue182.patch @@ -1,18 +0,0 @@ -Index: src/src/nvtt/squish/CMakeLists.txt -=================================================================== ---- src/src/nvtt/squish/CMakeLists.txt (revision 13060) -+++ src/src/nvtt/squish/CMakeLists.txt (working copy) -@@ -22,7 +22,11 @@ - - ADD_LIBRARY(squish STATIC ${SQUISH_SRCS}) - --IF(CMAKE_COMPILER_IS_GNUCXX) -+IF("${CMAKE_CXX_COMPILER}" MATCHES "clang(\\+\\+)?$" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") -+ SET(CMAKE_COMPILER_IS_CLANGXX 1) -+ENDIF() -+ -+IF(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) - SET_TARGET_PROPERTIES(squish PROPERTIES COMPILE_FLAGS -fPIC) --ENDIF(CMAKE_COMPILER_IS_GNUCXX) -+ENDIF(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) - Index: ps/trunk/libraries/source/nvtt/patches/issue188.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/issue188.patch +++ ps/trunk/libraries/source/nvtt/patches/issue188.patch @@ -1,8 +1,12 @@ -Index: src/cmake/OptimalOptions.cmake -=================================================================== ---- src/cmake/OptimalOptions.cmake (revision 13805) -+++ src/cmake/OptimalOptions.cmake (working copy) -@@ -15,7 +15,7 @@ +--- + cmake/OptimalOptions.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmake/OptimalOptions.cmake b/cmake/OptimalOptions.cmake +index ac450c9..4993dd5 100644 +--- a/cmake/OptimalOptions.cmake ++++ b/cmake/OptimalOptions.cmake +@@ -16,7 +16,7 @@ IF(CMAKE_COMPILER_IS_GNUCXX) ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "i686") IF(NV_SYSTEM_PROCESSOR STREQUAL "x86_64") @@ -11,3 +15,4 @@ #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=athlon64 -msse3") ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "x86_64") +-- Index: ps/trunk/libraries/source/nvtt/patches/issue261.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/issue261.patch +++ ps/trunk/libraries/source/nvtt/patches/issue261.patch @@ -0,0 +1,23 @@ + src/nvthread/Atomic.h | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/src/nvthread/Atomic.h b/src/nvthread/Atomic.h +index 657b16763a..3010a5f5f4 100644 +--- a/libraries/source/nvtt/src/src/nvthread/Atomic.h ++++ b/libraries/source/nvtt/src/src/nvthread/Atomic.h +@@ -183,7 +183,6 @@ namespace nv { + + + #elif NV_CC_CLANG && (NV_OS_IOS || NV_OS_DARWIN) +- NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long)); + + //ACS: Use Apple's atomics instead? I don't know if these are better in any way; there are non-barrier versions too. There's no OSAtomicSwap32 tho' + /* +@@ -254,7 +253,6 @@ namespace nv { + + + #elif NV_CC_CLANG && POSH_CPU_STRONGARM +- NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long)); + + inline uint32 atomicIncrement(uint32 * value) + { Index: ps/trunk/libraries/source/nvtt/patches/musl-build.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/musl-build.patch +++ ps/trunk/libraries/source/nvtt/patches/musl-build.patch @@ -0,0 +1,36 @@ +--- + src/nvmath/nvmath.h | 4 +--- + src/nvthread/nvthread.cpp | 2 ++ + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h +index 439e599575..1f1ff1fcbc 100644 +--- a/src/nvmath/nvmath.h ++++ b/src/nvmath/nvmath.h +@@ -187,10 +187,8 @@ namespace nv + { + #if NV_OS_WIN32 || NV_OS_XBOX + return _isnan(f) != 0; +-#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD || NV_OS_ORBIS ++#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD || NV_OS_ORBIS || NV_OS_LINUX + return isnan(f); +-#elif NV_OS_LINUX +- return isnanf(f); + #else + # error "isNan not supported" + #endif +diff --git a/libraries/source/nvtt/src/src/nvthread/nvthread.cpp b/libraries/source/nvtt/src/src/nvthread/nvthread.cpp +index d8564d391b..967b886d0f 100644 +--- a/src/nvthread/nvthread.cpp ++++ b/src/nvthread/nvthread.cpp +@@ -8,7 +8,9 @@ + #include "Win32.h" + #elif NV_OS_UNIX + #include ++#if !NV_OS_LINUX + #include ++#endif + #include + #elif NV_OS_DARWIN + #import +-- Index: ps/trunk/libraries/source/nvtt/patches/png-api.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/png-api.patch +++ ps/trunk/libraries/source/nvtt/patches/png-api.patch @@ -1,13 +0,0 @@ -Index: src/src/nvimage/ImageIO.cpp -=================================================================== ---- src/src/nvimage/ImageIO.cpp (revision 9895) -+++ src/src/nvimage/ImageIO.cpp (working copy) -@@ -603,7 +603,7 @@ - { - nvDebugCheck(png_ptr != NULL); - -- Stream * s = (Stream *)png_ptr->io_ptr; -+ Stream * s = (Stream *)png_get_io_ptr(png_ptr); - s->serialize(data, (int)length); - - if (s->isError()) { Index: ps/trunk/libraries/source/nvtt/patches/r1025.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/r1025.patch +++ ps/trunk/libraries/source/nvtt/patches/r1025.patch @@ -1,16 +0,0 @@ -Index: extern/poshlib/posh.h -=================================================================== ---- extern/poshlib/posh.h (revision 1024) -+++ extern/poshlib/posh.h (revision 1025) -@@ -293,6 +293,11 @@ - # define POSH_OS_STRING "Linux" - #endif - -+#if defined __FreeBSD__ -+# define POSH_OS_FREEBSD 1 -+# define POSH_OS_STRING "FreeBSD" -+#endif -+ - #if defined __CYGWIN32__ - # define POSH_OS_CYGWIN32 1 - # define POSH_OS_STRING "Cygwin" Index: ps/trunk/libraries/source/nvtt/patches/r1156.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/r1156.patch +++ ps/trunk/libraries/source/nvtt/patches/r1156.patch @@ -1,114 +0,0 @@ -Index: branches/2.0/src/nvcore/nvcore.h -=================================================================== ---- branches/2.0/src/nvcore/nvcore.h (revision 1155) -+++ branches/2.0/src/nvcore/nvcore.h (revision 1156) -@@ -99,6 +99,23 @@ - #define NV_ENDIAN_STRING POSH_ENDIAN_STRING - - -+// Type definitions: -+typedef posh_u8_t uint8; -+typedef posh_i8_t int8; -+ -+typedef posh_u16_t uint16; -+typedef posh_i16_t int16; -+ -+typedef posh_u32_t uint32; -+typedef posh_i32_t int32; -+ -+typedef posh_u64_t uint64; -+typedef posh_i64_t int64; -+ -+// Aliases -+typedef uint32 uint; -+ -+ - // Version string: - #define NV_VERSION_STRING \ - NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \ -Index: branches/2.0/src/nvcore/DefsVcWin32.h -=================================================================== ---- branches/2.0/src/nvcore/DefsVcWin32.h (revision 1155) -+++ branches/2.0/src/nvcore/DefsVcWin32.h (revision 1156) -@@ -39,7 +39,7 @@ - #define __FUNC__ __FUNCTION__ - #endif - -- -+/* - // Type definitions - typedef unsigned char uint8; - typedef signed char int8; -@@ -55,8 +55,8 @@ - - // Aliases - typedef uint32 uint; -+*/ - -- - // Unwanted VC++ warnings to disable. - /* - #pragma warning(disable : 4244) // conversion to float, possible loss of data -Index: branches/2.0/src/nvcore/DefsGnucDarwin.h -=================================================================== ---- branches/2.0/src/nvcore/DefsGnucDarwin.h (revision 1155) -+++ branches/2.0/src/nvcore/DefsGnucDarwin.h (revision 1156) -@@ -2,7 +2,7 @@ - #error "Do not include this file directly." - #endif - --#include // uint8_t, int8_t, ... -+//#include // uint8_t, int8_t, ... - - // Function linkage - #define DLL_IMPORT -@@ -48,7 +48,7 @@ - - #define restrict __restrict__ - -- -+/* - // Type definitions - typedef uint8_t uint8; - typedef int8_t int8; -@@ -64,3 +64,4 @@ - - // Aliases - typedef uint32 uint; -+*/ -Index: branches/2.0/src/nvcore/DefsGnucLinux.h -=================================================================== ---- branches/2.0/src/nvcore/DefsGnucLinux.h (revision 1155) -+++ branches/2.0/src/nvcore/DefsGnucLinux.h (revision 1156) -@@ -47,7 +47,7 @@ - - #define restrict __restrict__ - -- -+/* - // Type definitions - typedef unsigned char uint8; - typedef signed char int8; -@@ -63,3 +63,4 @@ - - // Aliases - typedef uint32 uint; -+*/ -Index: branches/2.0/src/nvcore/DefsGnucWin32.h -=================================================================== ---- branches/2.0/src/nvcore/DefsGnucWin32.h (revision 1155) -+++ branches/2.0/src/nvcore/DefsGnucWin32.h (revision 1156) -@@ -41,7 +41,7 @@ - - #define restrict __restrict__ - -- -+/* - // Type definitions - typedef unsigned char uint8; - typedef signed char int8; -@@ -57,3 +57,4 @@ - - // Aliases - typedef uint32 uint; -+*/ Index: ps/trunk/libraries/source/nvtt/patches/r1157.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/r1157.patch +++ ps/trunk/libraries/source/nvtt/patches/r1157.patch @@ -1,13 +0,0 @@ -Index: branches/2.0/cmake/FindCUDA.cmake -=================================================================== ---- branches/2.0/cmake/FindCUDA.cmake (revision 1156) -+++ branches/2.0/cmake/FindCUDA.cmake (revision 1157) -@@ -120,7 +120,7 @@ - FOREACH (CUFILE ${ARGN}) - GET_FILENAME_COMPONENT (CUFILE ${CUFILE} ABSOLUTE) - GET_FILENAME_COMPONENT (CFILE ${CUFILE} NAME_WE) -- SET (CFILE ${CMAKE_CURRENT_BINARY_DIR}/${CFILE}.gen.c) -+ SET (CFILE ${CMAKE_CURRENT_BINARY_DIR}/${CFILE}.gen.cpp) - - GET_CUFILE_DEPENDENCIES(CUDEPS ${CUFILE}) - #MESSAGE("${CUDEPS}") Index: ps/trunk/libraries/source/nvtt/patches/r1172.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/r1172.patch +++ ps/trunk/libraries/source/nvtt/patches/r1172.patch @@ -1,589 +0,0 @@ -Index: branches/2.0/src/nvimage/FloatImage.cpp -=================================================================== ---- branches/2.0/src/nvimage/FloatImage.cpp (revision 1171) -+++ branches/2.0/src/nvimage/FloatImage.cpp (revision 1172) -@@ -151,13 +151,13 @@ - m_height = h; - m_componentNum = c; - m_count = w * h * c; -- m_mem = reinterpret_cast(nv::mem::malloc(m_count * sizeof(float))); -+ m_mem = reinterpret_cast(::malloc(m_count * sizeof(float))); - } - - /// Free the image, but don't clear the members. - void FloatImage::free() - { -- nv::mem::free( reinterpret_cast(m_mem) ); -+ ::free( reinterpret_cast(m_mem) ); - m_mem = NULL; - } - -Index: branches/2.0/src/nvimage/ImageIO.cpp -=================================================================== ---- branches/2.0/src/nvimage/ImageIO.cpp (revision 1171) -+++ branches/2.0/src/nvimage/ImageIO.cpp (revision 1172) -@@ -954,7 +954,7 @@ - fimage->allocate(spp, width, height); - - int linesize = TIFFScanlineSize(tif); -- tdata_t buf = (::uint8 *)nv::mem::malloc(linesize); -+ tdata_t buf = (::uint8 *)::malloc(linesize); - - for (uint y = 0; y < height; y++) - { -@@ -991,7 +991,7 @@ - } - } - -- nv::mem::free(buf); -+ ::free(buf); - - TIFFClose(tif); - -Index: branches/2.0/src/nvimage/Image.cpp -=================================================================== ---- branches/2.0/src/nvimage/Image.cpp (revision 1171) -+++ branches/2.0/src/nvimage/Image.cpp (revision 1172) -@@ -78,7 +78,7 @@ - - void Image::free() - { -- nv::mem::free(m_data); -+ ::free(m_data); - m_data = NULL; - } - -Index: branches/2.0/src/nvtt/CompressRGB.cpp -=================================================================== ---- branches/2.0/src/nvtt/CompressRGB.cpp (revision 1171) -+++ branches/2.0/src/nvtt/CompressRGB.cpp (revision 1172) -@@ -82,7 +82,7 @@ - // Determine pitch. - uint pitch = computePitch(w, compressionOptions.bitcount, 8); - -- uint8 * dst = (uint8 *)mem::malloc(pitch + 4); -+ uint8 * dst = (uint8 *)::malloc(pitch + 4); - - for (uint y = 0; y < h; y++) - { -@@ -127,6 +127,6 @@ - } - } - -- mem::free(dst); -+ ::free(dst); - } - -Index: branches/2.0/src/nvtt/cuda/CudaCompressDXT.cpp -=================================================================== ---- branches/2.0/src/nvtt/cuda/CudaCompressDXT.cpp (revision 1171) -+++ branches/2.0/src/nvtt/cuda/CudaCompressDXT.cpp (revision 1172) -@@ -137,7 +137,7 @@ - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); -- uint * blockLinearImage = (uint *) malloc(imageSize); -+ uint * blockLinearImage = (uint *) ::malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! - - const uint blockNum = w * h; -@@ -207,14 +207,14 @@ - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); -- uint * blockLinearImage = (uint *) malloc(imageSize); -+ uint * blockLinearImage = (uint *) ::malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); - - const uint blockNum = w * h; - const uint compressedSize = blockNum * 8; - - AlphaBlockDXT3 * alphaBlocks = NULL; -- alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, MAX_BLOCKS * 8U)); -+ alphaBlocks = (AlphaBlockDXT3 *)::malloc(min(compressedSize, MAX_BLOCKS * 8U)); - - setupCompressKernel(compressionOptions.colorWeight.ptr()); - -@@ -298,14 +298,14 @@ - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); -- uint * blockLinearImage = (uint *) malloc(imageSize); -+ uint * blockLinearImage = (uint *) ::malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); - - const uint blockNum = w * h; - const uint compressedSize = blockNum * 8; - - AlphaBlockDXT5 * alphaBlocks = NULL; -- alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U)); -+ alphaBlocks = (AlphaBlockDXT5 *)::malloc(min(compressedSize, MAX_BLOCKS * 8U)); - - setupCompressKernel(compressionOptions.colorWeight.ptr()); - -Index: branches/2.0/src/nvcore/StrLib.cpp -=================================================================== ---- branches/2.0/src/nvcore/StrLib.cpp (revision 1171) -+++ branches/2.0/src/nvcore/StrLib.cpp (revision 1172) -@@ -21,17 +21,17 @@ - { - static char * strAlloc(uint size) - { -- return static_cast(mem::malloc(size)); -+ return static_cast(::malloc(size)); - } - - static char * strReAlloc(char * str, uint size) - { -- return static_cast(mem::realloc(str, size)); -+ return static_cast(::realloc(str, size)); - } - - static void strFree(const char * str) - { -- return mem::free(const_cast(str)); -+ return ::free(const_cast(str)); - } - - /*static char * strDup( const char * str ) -Index: branches/2.0/src/nvcore/StrLib.h -=================================================================== ---- branches/2.0/src/nvcore/StrLib.h (revision 1171) -+++ branches/2.0/src/nvcore/StrLib.h (revision 1172) -@@ -294,7 +294,7 @@ - const uint16 count = getRefCount(); - setRefCount(count - 1); - if (count - 1 == 0) { -- mem::free(data - 2); -+ free(data - 2); - data = NULL; - } - } -@@ -323,7 +323,7 @@ - - void allocString(const char * str, int len) - { -- const char * ptr = static_cast(mem::malloc(2 + len + 1)); -+ const char * ptr = static_cast(::malloc(2 + len + 1)); - - setData( ptr ); - setRefCount( 0 ); -Index: branches/2.0/src/nvcore/Memory.cpp -=================================================================== ---- branches/2.0/src/nvcore/Memory.cpp (revision 1171) -+++ branches/2.0/src/nvcore/Memory.cpp (revision 1172) -@@ -1,3 +1,4 @@ -+// This code is in the public domain -- Ignacio Castaño - - #include "Memory.h" - #include "Debug.h" -@@ -2,33 +3,114 @@ - --//#if HAVE_MALLOC_H --//#include --//#endif -- - #include - -+#define USE_EFENCE 0 - -+#if USE_EFENCE -+extern "C" void *EF_malloc(size_t size); -+extern "C" void *EF_realloc(void * oldBuffer, size_t newSize); -+extern "C" void EF_free(void * address); -+#endif -+ - using namespace nv; - --void * nv::mem::malloc(size_t size) -+#if NV_OVERRIDE_ALLOC -+ -+void * malloc(size_t size) - { -- return ::malloc(size); -+#if USE_EFENCE -+ return EF_malloc(size); -+#else -+ return ::malloc(size); -+#endif - } - --void * nv::mem::malloc(size_t size, const char * file, int line) -+void * debug_malloc(size_t size, const char * file, int line) - { -- NV_UNUSED(file); -- NV_UNUSED(line); -- return ::malloc(size); -+ NV_UNUSED(file); -+ NV_UNUSED(line); -+#if USE_EFENCE -+ return EF_malloc(size); -+#else -+ return ::malloc(size); -+#endif - } - --void nv::mem::free(const void * ptr) -+void free(void * ptr) - { -- ::free(const_cast(ptr)); -+#if USE_EFENCE -+ return EF_free(const_cast(ptr)); -+#else -+ ::free(const_cast(ptr)); -+#endif - } - --void * nv::mem::realloc(void * ptr, size_t size) -+void * realloc(void * ptr, size_t size) - { -- nvDebugCheck(ptr != NULL || size != 0); // undefined realloc behavior. -- return ::realloc(ptr, size); -+ nvDebugCheck(ptr != NULL || size != 0); // undefined realloc behavior. -+#if USE_EFENCE -+ return EF_realloc(ptr, size); -+#else -+ return ::realloc(ptr, size); -+#endif - } - -+/* No need to override this unless we want line info. -+void * operator new (size_t size) throw() -+{ -+ return malloc(size); -+} -+ -+void operator delete (void *p) throw() -+{ -+ free(p); -+} -+ -+void * operator new [] (size_t size) throw() -+{ -+ return malloc(size); -+} -+ -+void operator delete [] (void * p) throw() -+{ -+ free(p); -+} -+*/ -+ -+#if 0 // Code from Apple: -+void* operator new(std::size_t sz) throw (std::bad_alloc) -+{ -+ void *result = std::malloc (sz == 0 ? 1 : sz); -+ if (result == NULL) -+ throw std::bad_alloc(); -+ gNewCounter++; -+ return result; -+} -+void operator delete(void* p) throw() -+{ -+ if (p == NULL) -+ return; -+ std::free (p); -+ gDeleteCounter++; -+} -+ -+/* These are the 'nothrow' versions of the above operators. -+ The system version will try to call a std::new_handler if they -+ fail, but your overriding versions are not required to do this. */ -+void* operator new(std::size_t sz, const std::nothrow_t&) throw() -+{ -+ try { -+ void * result = ::operator new (sz); // calls our overridden operator new -+ return result; -+ } catch (std::bad_alloc &) { -+ return NULL; -+ } -+} -+void operator delete(void* p, const std::nothrow_t&) throw() -+{ -+ ::operator delete (p); -+} -+ -+#endif // 0 -+ -+ -+#endif // NV_OVERRIDE_ALLOC -Index: branches/2.0/src/nvcore/Containers.h -=================================================================== ---- branches/2.0/src/nvcore/Containers.h (revision 1171) -+++ branches/2.0/src/nvcore/Containers.h (revision 1172) -@@ -16,9 +16,9 @@ - - - // nvcore --#include --#include --#include -+#include "nvcore.h" -+#include "Memory.h" -+#include "Debug.h" - - #include // memmove - #include // for placement new -@@ -589,15 +589,15 @@ - // free the buffer. - if( m_buffer_size == 0 ) { - if( m_buffer ) { -- mem::free( m_buffer ); -+ free( m_buffer ); - m_buffer = NULL; - } - } - - // realloc the buffer - else { -- if( m_buffer ) m_buffer = (T *) mem::realloc( m_buffer, sizeof(T) * m_buffer_size ); -- else m_buffer = (T *) mem::malloc( sizeof(T) * m_buffer_size ); -+ if( m_buffer ) m_buffer = (T *) realloc(m_buffer, sizeof(T) * m_buffer_size); -+ else m_buffer = (T *) ::malloc(sizeof(T) * m_buffer_size); - } - } - -@@ -778,7 +778,7 @@ - e->clear(); - } - } -- mem::free(table); -+ free(table); - table = NULL; - entry_count = 0; - size_mask = -1; -@@ -1001,7 +1001,7 @@ - new_size = nextPowerOfTwo(new_size); - - HashMap new_hash; -- new_hash.table = (Entry *) mem::malloc(sizeof(Entry) * new_size); -+ new_hash.table = (Entry *) ::malloc(sizeof(Entry) * new_size); - nvDebugCheck(new_hash.table != NULL); - - new_hash.entry_count = 0; -@@ -1026,7 +1026,7 @@ - } - - // Delete our old data buffer. -- mem::free(table); -+ free(table); - } - - // Steal new_hash's data. -Index: branches/2.0/src/nvcore/Memory.h -=================================================================== ---- branches/2.0/src/nvcore/Memory.h (revision 1171) -+++ branches/2.0/src/nvcore/Memory.h (revision 1172) -@@ -1,186 +1,52 @@ --// This code is in the public domain -- castanyo@yahoo.es -+// This code is in the public domain -- Ignacio Castaño - -+#pragma once - #ifndef NV_CORE_MEMORY_H - #define NV_CORE_MEMORY_H - --#include -+#include "nvcore.h" - - #include // malloc(), realloc() and free() --#include // size_t -+#include // size_t - - #include // new and delete - --// Custom memory allocator --namespace nv --{ -- namespace mem -- { -- NVCORE_API void * malloc(size_t size); -- NVCORE_API void * malloc(size_t size, const char * file, int line); -- -- NVCORE_API void free(const void * ptr); -- NVCORE_API void * realloc(void * ptr, size_t size); -- -- } // mem namespace -- --} // nv namespace -+#define NV_OVERRIDE_ALLOC 0 - -+#if NV_OVERRIDE_ALLOC - --// Override new/delete -- --inline void * operator new (size_t size) throw() --{ -- return nv::mem::malloc(size); -+// Custom memory allocator -+extern "C" { -+ NVCORE_API void * malloc(size_t size); -+ NVCORE_API void * debug_malloc(size_t size, const char * file, int line); -+ NVCORE_API void free(void * ptr); -+ NVCORE_API void * realloc(void * ptr, size_t size); - } - --inline void operator delete (void *p) throw() --{ -- nv::mem::free(p); --} -- --inline void * operator new [] (size_t size) throw() --{ -- return nv::mem::malloc(size); --} -- --inline void operator delete [] (void * p) throw() --{ -- nv::mem::free(p); --} -- - /* - #ifdef _DEBUG - #define new new(__FILE__, __LINE__) --#define malloc(i) malloc(i, __FILE__, __LINE__) -+#define malloc(i) debug_malloc(i, __FILE__, __LINE__) - #endif - */ - --#if 0 --/* -- File: main.cpp -- -- Version: 1.0 -+#endif - -- Abstract: Overrides the C++ 'operator new' and 'operator delete'. -+namespace nv { - -- Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Computer, Inc. -- ("Apple") in consideration of your agreement to the following terms, and your -- use, installation, modification or redistribution of this Apple software -- constitutes acceptance of these terms. If you do not agree with these terms, -- please do not use, install, modify or redistribute this Apple software. -+ // C++ helpers. -+ template T * malloc(size_t count) { -+ return (T *)::malloc(sizeof(T) * count); -+ } - -- In consideration of your agreement to abide by the following terms, and subject -- to these terms, Apple grants you a personal, non-exclusive license, under Apple’s -- copyrights in this original Apple software (the "Apple Software"), to use, -- reproduce, modify and redistribute the Apple Software, with or without -- modifications, in source and/or binary forms; provided that if you redistribute -- the Apple Software in its entirety and without modifications, you must retain -- this notice and the following text and disclaimers in all such redistributions of -- the Apple Software. Neither the name, trademarks, service marks or logos of -- Apple Computer, Inc. may be used to endorse or promote products derived from the -- Apple Software without specific prior written permission from Apple. Except as -- expressly stated in this notice, no other rights or licenses, express or implied, -- are granted by Apple herein, including but not limited to any patent rights that -- may be infringed by your derivative works or by other works in which the Apple -- Software may be incorporated. -+ template T * realloc(T * ptr, size_t count) { -+ return (T *)::realloc(ptr, sizeof(T) * count); -+ } - -- The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO -- WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED -- WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR -- PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION ALONE OR IN -- COMBINATION WITH YOUR PRODUCTS. -+ template void free(const T * ptr) { -+ ::free((void *)ptr); -+ } - -- IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR -- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -- ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION -- OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, TORT -- (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN -- ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+} // nv namespace - -- Copyright © 2006 Apple Computer, Inc., All Rights Reserved --*/ -- --/* This sample shows how to override the C++ global 'new' and 'delete' operators. */ --#include --#include --#include --#include --#include -- --/* Some variables and code to make the example do something. */ --namespace { -- unsigned long long gNewCounter; // number of times 'new' was called -- unsigned long long gDeleteCounter; // number of times 'delete' was called -- -- void printCounters() // print the counters above -- { -- std::cout << "new was called " << gNewCounter << " times and delete was called " << gDeleteCounter << " times\n"; -- } --} -- --/* These are the overridden new and delete routines. -- Most applications will want to override at least these four versions of new/delete if they override any of them. -- -- In Mac OS, it's not necessary to override the array versions of operator new and delete if all -- they would do is call the non-array versions; the C++ standard library, as an extension -- to the C++ standard, does this for you. -- -- Developers should consult the section [lib.support.dynamic] in the C++ standard to see the requirements -- on the generic operators new and delete; the system may expect that your overridden operators meet all these -- requirements. -- -- Your operators may be called by the system, even early in start-up before constructors have been executed. */ --void* operator new(std::size_t sz) throw (std::bad_alloc) --{ -- void *result = std::malloc (sz == 0 ? 1 : sz); -- if (result == NULL) -- throw std::bad_alloc(); -- gNewCounter++; -- return result; --} --void operator delete(void* p) throw() --{ -- if (p == NULL) -- return; -- std::free (p); -- gDeleteCounter++; --} -- --/* These are the 'nothrow' versions of the above operators. -- The system version will try to call a std::new_handler if they -- fail, but your overriding versions are not required to do this. */ --void* operator new(std::size_t sz, const std::nothrow_t&) throw() --{ -- try { -- void * result = ::operator new (sz); // calls our overridden operator new -- return result; -- } catch (std::bad_alloc &) { -- return NULL; -- } --} --void operator delete(void* p, const std::nothrow_t&) throw() --{ -- ::operator delete (p); --} -- --/* Bug 4067110 is that if your program has no weak symbols at all, the linker will not set the -- WEAK_DEFINES bit in the Mach-O header and as a result the new and delete operators above won't -- be seen by system libraries. This is mostly a problem for test programs and small examples, -- since almost all real C++ programs complicated enough to override new and delete will have at -- least one weak symbol. However, this is a small example, so: */ --void __attribute__((weak, visibility("default"))) workaroundFor4067110 () { } -- --/* This is a simple test program that causes the runtime library to call new and delete. */ --int main() --{ -- atexit (printCounters); -- try { -- std::locale example("does_not_exist"); -- } catch (std::runtime_error &x) { -- } -- return 0; --} --#endif // 0 -- - #endif // NV_CORE_MEMORY_H Index: ps/trunk/libraries/source/nvtt/patches/r907.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/r907.patch +++ ps/trunk/libraries/source/nvtt/patches/r907.patch @@ -1,73 +0,0 @@ -Index: src/nvcore/nvcore.h -=================================================================== ---- src/nvcore/nvcore.h (revision 906) -+++ src/nvcore/nvcore.h (revision 907) -@@ -38,6 +38,9 @@ - #if defined POSH_OS_LINUX - # define NV_OS_LINUX 1 - # define NV_OS_UNIX 1 -+#elif defined POSH_OS_FREEBSD -+# define NV_OS_FREEBSD 1 -+# define NV_OS_UNIX 1 - #elif defined POSH_OS_CYGWIN32 - # define NV_OS_CYGWIN 1 - #elif defined POSH_OS_MINGW -@@ -161,7 +164,7 @@ - #elif NV_CC_GNUC - # if NV_OS_LINUX - # include "DefsGnucLinux.h" --# elif NV_OS_DARWIN -+# elif NV_OS_DARWIN || NV_OS_FREEBSD - # include "DefsGnucDarwin.h" - # elif NV_OS_MINGW - # include "DefsGnucWin32.h" -Index: src/nvcore/Debug.cpp -=================================================================== ---- src/nvcore/Debug.cpp (revision 906) -+++ src/nvcore/Debug.cpp (revision 907) -@@ -34,7 +34,7 @@ - # endif - #endif - --#if NV_OS_DARWIN -+#if NV_OS_DARWIN || NV_OS_FREEBSD - # include // getpid - # include - # include // sysctl -@@ -199,6 +199,14 @@ - return (void *) ucp->uc_mcontext->ss.eip; - # endif - # endif -+# elif NV_OS_FREEBSD -+# if NV_CPU_X86_64 -+ ucontext_t * ucp = (ucontext_t *)secret; -+ return (void *)ucp->uc_mcontext.mc_rip; -+# elif NV_CPU_X86 -+ ucontext_t * ucp = (ucontext_t *)secret; -+ return (void *)ucp->uc_mcontext.mc_eip; -+# endif - # else - # if NV_CPU_X86_64 - // #define REG_RIP REG_INDEX(rip) // seems to be 16 -Index: src/nvmath/nvmath.h -=================================================================== ---- src/nvmath/nvmath.h (revision 906) -+++ src/nvmath/nvmath.h (revision 907) -@@ -115,7 +115,7 @@ - { - #if NV_OS_WIN32 - return _finite(f) != 0; --#elif NV_OS_DARWIN -+#elif NV_OS_DARWIN || NV_OS_FREEBSD - return isfinite(f); - #elif NV_OS_LINUX - return finitef(f); -@@ -130,7 +130,7 @@ - { - #if NV_OS_WIN32 - return _isnan(f) != 0; --#elif NV_OS_DARWIN -+#elif NV_OS_DARWIN || NV_OS_FREEBSD - return isnan(f); - #elif NV_OS_LINUX - return isnanf(f); Index: ps/trunk/libraries/source/nvtt/patches/rpath.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/rpath.patch +++ ps/trunk/libraries/source/nvtt/patches/rpath.patch @@ -1,8 +1,12 @@ -Index: libraries/nvtt/src/CMakeLists.txt -=================================================================== ---- libraries/nvtt/src/CMakeLists.txt (revision 8295) -+++ libraries/nvtt/src/CMakeLists.txt (working copy) -@@ -22,6 +22,10 @@ +--- + CMakeLists.txt | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 9c80369..9e77386 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -42,6 +42,12 @@ IF(NVTT_SHARED) SET(NVIMAGE_SHARED TRUE) ENDIF(NVTT_SHARED) @@ -11,6 +15,8 @@ +SET(CMAKE_INSTALL_RPATH "$ORIGIN") +SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +SET(CMAKE_INSTALL_NAME_DIR "@executable_path") ++ + ADD_SUBDIRECTORY(extern) ADD_SUBDIRECTORY(src) - +-- Index: ps/trunk/libraries/source/nvtt/patches/win-shared-build.patch =================================================================== --- ps/trunk/libraries/source/nvtt/patches/win-shared-build.patch +++ ps/trunk/libraries/source/nvtt/patches/win-shared-build.patch @@ -0,0 +1,362 @@ +--- + src/nvcore/StrLib.h | 8 ++++---- + src/nvimage/BlockDXT.h | 16 ++++++++-------- + src/nvimage/ColorBlock.h | 6 ++++-- + src/nvimage/DirectDrawSurface.cpp | 4 ++-- + src/nvimage/DirectDrawSurface.h | 4 ++-- + src/nvimage/ErrorMetric.h | 8 ++++---- + src/nvimage/FloatImage.h | 4 ++-- + src/nvimage/NormalMap.h | 8 ++++---- + src/nvmath/Fitting.h | 12 ++++++------ + src/nvmath/Gamma.h | 4 ++-- + src/nvmath/Half.cpp | 10 ++++++++++ + src/nvmath/Half.h | 23 ++++------------------- + src/nvtt/CMakeLists.txt | 1 + + 13 files changed, 53 insertions(+), 55 deletions(-) + +diff --git a/src/nvcore/StrLib.h b/src/nvcore/StrLib.h +index 1d6d13a..1ae8e91 100644 +--- a/src/nvcore/StrLib.h ++++ b/src/nvcore/StrLib.h +@@ -197,11 +197,11 @@ namespace nv + void stripExtension(); + + // statics +- NVCORE_API static char separator(); +- NVCORE_API static const char * fileName(const char *); +- NVCORE_API static const char * extension(const char *); ++ static char separator(); ++ static const char * fileName(const char *); ++ static const char * extension(const char *); + +- NVCORE_API static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR); ++ static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR); + }; + + +diff --git a/src/nvimage/BlockDXT.h b/src/nvimage/BlockDXT.h +index 18a3b65..8ad5bed 100644 +--- a/src/nvimage/BlockDXT.h ++++ b/src/nvimage/BlockDXT.h +@@ -39,7 +39,7 @@ namespace nv + + + /// DXT1 block. +- struct BlockDXT1 ++ struct NVIMAGE_CLASS BlockDXT1 + { + Color16 col0; + Color16 col1; +@@ -105,7 +105,7 @@ namespace nv + + + /// DXT3 block. +- struct BlockDXT3 ++ struct NVIMAGE_CLASS BlockDXT3 + { + AlphaBlockDXT3 alpha; + BlockDXT1 color; +@@ -119,7 +119,7 @@ namespace nv + + + /// DXT5 alpha block. +- struct AlphaBlockDXT5 ++ struct NVIMAGE_CLASS AlphaBlockDXT5 + { + union { + struct { +@@ -162,7 +162,7 @@ namespace nv + + + /// DXT5 block. +- struct BlockDXT5 ++ struct NVIMAGE_CLASS BlockDXT5 + { + AlphaBlockDXT5 alpha; + BlockDXT1 color; +@@ -175,7 +175,7 @@ namespace nv + }; + + /// ATI1 block. +- struct BlockATI1 ++ struct NVIMAGE_CLASS BlockATI1 + { + AlphaBlockDXT5 alpha; + +@@ -186,7 +186,7 @@ namespace nv + }; + + /// ATI2 block. +- struct BlockATI2 ++ struct NVIMAGE_CLASS BlockATI2 + { + AlphaBlockDXT5 x; + AlphaBlockDXT5 y; +@@ -217,14 +217,14 @@ namespace nv + }; + + /// BC6 block. +- struct BlockBC6 ++ struct NVIMAGE_CLASS BlockBC6 + { + uint8 data[16]; // Not even going to try to write a union for this thing. + void decodeBlock(Vector3 colors[16]) const; + }; + + /// BC7 block. +- struct BlockBC7 ++ struct NVIMAGE_CLASS BlockBC7 + { + uint8 data[16]; // Not even going to try to write a union for this thing. + void decodeBlock(ColorBlock * block) const; +diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h +index 6638f56..d63d5a5 100644 +--- a/src/nvimage/ColorBlock.h ++++ b/src/nvimage/ColorBlock.h +@@ -4,6 +4,8 @@ + #ifndef NV_IMAGE_COLORBLOCK_H + #define NV_IMAGE_COLORBLOCK_H + ++#include "nvimage.h" ++ + #include "nvmath/Color.h" + #include "nvmath/Vector.h" + +@@ -14,7 +16,7 @@ namespace nv + + + /// Uncompressed 4x4 color block. +- struct ColorBlock ++ struct NVIMAGE_CLASS ColorBlock + { + ColorBlock(); + ColorBlock(const uint * linearImage); +@@ -128,7 +130,7 @@ namespace nv + + + /// Uncompressed 4x4 alpha block. +- struct AlphaBlock4x4 ++ struct NVIMAGE_CLASS AlphaBlock4x4 + { + void init(uint8 value); + void init(const ColorBlock & src, uint channel); +diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp +index 2daaea5..ffa132e 100644 +--- a/src/nvimage/DirectDrawSurface.cpp ++++ b/src/nvimage/DirectDrawSurface.cpp +@@ -461,7 +461,7 @@ namespace + + } // namespace + +-uint nv::findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) ++NVIMAGE_API uint nv::findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) + { + for (int i = 0; i < s_formatCount; i++) + { +@@ -478,7 +478,7 @@ uint nv::findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint + return 0; + } + +-uint nv::findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) ++NVIMAGE_API uint nv::findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) + { + for (int i = 0; i < s_formatCount; i++) + { +diff --git a/src/nvimage/DirectDrawSurface.h b/src/nvimage/DirectDrawSurface.h +index d63fdde..6513b14 100644 +--- a/src/nvimage/DirectDrawSurface.h ++++ b/src/nvimage/DirectDrawSurface.h +@@ -263,9 +263,9 @@ namespace nv + DXGI_FORMAT_BC7_UNORM_SRGB = 99, + }; + +- extern uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); ++ NVIMAGE_API extern uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + +- extern uint findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); ++ NVIMAGE_API extern uint findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + + struct RGBAPixelFormat + { +diff --git a/src/nvimage/ErrorMetric.h b/src/nvimage/ErrorMetric.h +index b875802..df025b5 100644 +--- a/src/nvimage/ErrorMetric.h ++++ b/src/nvimage/ErrorMetric.h +@@ -6,10 +6,10 @@ namespace nv + { + class FloatImage; + +- float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); +- float rmsAlphaError(const FloatImage * ref, const FloatImage * img); ++ NVIMAGE_API float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); ++ NVIMAGE_API float rmsAlphaError(const FloatImage * ref, const FloatImage * img); + +- float cieLabError(const FloatImage * ref, const FloatImage * img); ++ NVIMAGE_API float cieLabError(const FloatImage * ref, const FloatImage * img); + float cieLab94Error(const FloatImage * ref, const FloatImage * img); + float spatialCieLabError(const FloatImage * ref, const FloatImage * img); + +@@ -17,6 +17,6 @@ namespace nv + float averageAlphaError(const FloatImage * ref, const FloatImage * img); + + float averageAngularError(const FloatImage * img0, const FloatImage * img1); +- float rmsAngularError(const FloatImage * img0, const FloatImage * img1); ++ NVIMAGE_API float rmsAngularError(const FloatImage * img0, const FloatImage * img1); + + } // nv namespace +diff --git a/src/nvimage/FloatImage.h b/src/nvimage/FloatImage.h +index 1015aec..104baf0 100644 +--- a/src/nvimage/FloatImage.h ++++ b/src/nvimage/FloatImage.h +@@ -152,7 +152,7 @@ namespace nv + float sampleNearestRepeat(uint c, float x, float y, float z) const; + float sampleNearestMirror(uint c, float x, float y, float z) const; + +- float sampleLinearClamp(uint c, float x, float y) const; ++ NVIMAGE_API float sampleLinearClamp(uint c, float x, float y) const; + float sampleLinearRepeat(uint c, float x, float y) const; + float sampleLinearMirror(uint c, float x, float y) const; + +@@ -162,7 +162,7 @@ namespace nv + //@} + + +- FloatImage* clone() const; ++ NVIMAGE_API FloatImage* clone() const; + + public: + +diff --git a/src/nvimage/NormalMap.h b/src/nvimage/NormalMap.h +index 3f13d42..39a27ad 100644 +--- a/src/nvimage/NormalMap.h ++++ b/src/nvimage/NormalMap.h +@@ -44,12 +44,12 @@ namespace nv + }; + + // @@ These two functions should be deprecated: +- FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); +- FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); ++ NVIMAGE_API FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); ++ NVIMAGE_API FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); + +- FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights); ++ NVIMAGE_API FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights); + +- void normalizeNormalMap(FloatImage * img); ++ NVIMAGE_API void normalizeNormalMap(FloatImage * img); + + // @@ Add generation of DU/DV maps. + +diff --git a/src/nvmath/Fitting.h b/src/nvmath/Fitting.h +index 7a88cd2..5ffb50a 100644 +--- a/src/nvmath/Fitting.h ++++ b/src/nvmath/Fitting.h +@@ -23,14 +23,14 @@ namespace nv + Vector4 computeCovariance(int n, const Vector4 * points, float * covariance); + Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance); + +- Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points); +- Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric); ++ NVMATH_API Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points); ++ NVMATH_API Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + +- Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); +- Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); ++ NVMATH_API Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); ++ NVMATH_API Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + +- Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points); +- Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric); ++ NVMATH_API Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points); ++ NVMATH_API Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric); + + Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points); + Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points); +diff --git a/src/nvmath/Gamma.h b/src/nvmath/Gamma.h +index e990a79..f59dd05 100644 +--- a/src/nvmath/Gamma.h ++++ b/src/nvmath/Gamma.h +@@ -30,8 +30,8 @@ + namespace nv { + + // gamma conversion of float array (in-place is allowed) +- void powf_5_11(const float* src, float* dst, int count); +- void powf_11_5(const float* src, float* dst, int count); ++ NVMATH_API void powf_5_11(const float* src, float* dst, int count); ++ NVMATH_API void powf_11_5(const float* src, float* dst, int count); + + } // nv namespace + +diff --git a/src/nvmath/Half.cpp b/src/nvmath/Half.cpp +index 953cc7c..efb4ab8 100644 +--- a/src/nvmath/Half.cpp ++++ b/src/nvmath/Half.cpp +@@ -633,6 +633,16 @@ void nv::half_init_tables() + } + } + ++// Fast half to float conversion based on: ++// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf ++uint32 nv::fast_half_to_float(uint16 h) ++{ ++ // Initialize table if necessary. ++ if (mantissa_table[0] != 0) ++ half_init_tables(); ++ uint exp = h >> 10; ++ return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; ++} + + #if 0 + +diff --git a/src/nvmath/Half.h b/src/nvmath/Half.h +index 6f5b8ad..77dff5a 100644 +--- a/src/nvmath/Half.h ++++ b/src/nvmath/Half.h +@@ -6,30 +6,15 @@ + + namespace nv { + +- uint32 half_to_float( uint16 h ); +- uint16 half_from_float( uint32 f ); ++ NVMATH_API uint32 half_to_float( uint16 h ); ++ NVMATH_API uint16 half_from_float( uint32 f ); + + // vin,vout must be 16 byte aligned. count must be a multiple of 8. + // implement a non-SSE version if we need it. For now, this naming makes it clear this is only available when SSE2 is + void half_to_float_array_SSE2(const uint16 * vin, float * vout, int count); + +- void half_init_tables(); +- +- extern uint32 mantissa_table[2048]; +- extern uint32 exponent_table[64]; +- extern uint32 offset_table[64]; +- +- // Fast half to float conversion based on: +- // http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf +- inline uint32 fast_half_to_float(uint16 h) +- { +- // Initialize table if necessary. +- if (mantissa_table[0] != 0) +- half_init_tables(); +- uint exp = h >> 10; +- return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; +- } +- ++ NVMATH_API void half_init_tables(); ++ NVMATH_API uint32 fast_half_to_float(uint16 h); + + inline uint16 to_half(float c) { + union { float f; uint32 u; } f; +diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt +index e543807..a0d8aa9 100644 +--- a/src/nvtt/CMakeLists.txt ++++ b/src/nvtt/CMakeLists.txt +@@ -46,6 +46,7 @@ IF(NVTT_SHARED) + SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,origin") + ENDIF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + ++ ADD_DEFINITIONS(-DNVTT_SHARED=1) + ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS}) + ELSE(NVTT_SHARED) + ADD_LIBRARY(nvtt ${NVTT_SRCS}) +-- Index: ps/trunk/libraries/source/nvtt/src/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/CMakeLists.txt @@ -1,21 +1,41 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0) +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0) PROJECT(NV) ENABLE_TESTING() SET(NV_CMAKE_DIR "${NV_SOURCE_DIR}/cmake") SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${NV_CMAKE_DIR}") -IF(WIN32) - SET(GNUWIN32 "${NV_SOURCE_DIR}/gnuwin32") - SET(CMAKE_INCLUDE_PATH "${GNUWIN32}/include") - SET(CMAKE_LIBRARY_PATH "${GNUWIN32}/lib") -ENDIF(WIN32) +# GCC check (needs -std:c++11 flag) +#if(CMAKE_COMPILER_IS_GNUCC) +# ADD_DEFINITIONS("-std=c++11") +#ENDIF(CMAKE_COMPILER_IS_GNUCC) +set (CMAKE_CXX_STANDARD 11) + +#IF(WIN32) + # gnuwin32 paths: + #SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32") + #SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include") + #SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib") + + # Set GLUT path: + #SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut") + + # Set FreeImage path: + #SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage") + +#ENDIF(WIN32) INCLUDE(${NV_CMAKE_DIR}/OptimalOptions.cmake) MESSAGE(STATUS "Setting optimal options") MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}") MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}") +IF(CMAKE_BUILD_TYPE MATCHES "debug") + SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.") + ADD_DEFINITIONS(-D_DEBUG=1) +ENDIF() + + IF(NVTT_SHARED) SET(NVCORE_SHARED TRUE) SET(NVMATH_SHARED TRUE) @@ -28,8 +48,41 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) SET(CMAKE_INSTALL_NAME_DIR "@executable_path") +ADD_SUBDIRECTORY(extern) + ADD_SUBDIRECTORY(src) +# These files should only be installed when creating packages. +INSTALL(FILES + LICENSE + README.md + DESTINATION share/doc/nvtt) + +# Add packaging support +INCLUDE(InstallRequiredSystemLibraries) + +IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + SET(CPACK_GENERATOR "TGZ;DEB") +ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + +SET(CPACK_PACKAGE_NAME "nvidia-texture-tools") +SET(CPACK_PACKAGE_VERSION_MAJOR "2") +SET(CPACK_PACKAGE_VERSION_MINOR "1") +SET(CPACK_PACKAGE_VERSION_PATCH "0") +SET(CPACK_PACKAGE_VERSION "2.1.0") +SET(CPACK_PACKAGE_CONTACT "Ignacio Castaño ") +#SET(CPACK_PACKAGE_VENDOR "NVIDIA Corporation") +SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Texture processing tools with support for Direct3D 10 and 11 formats.") + +SET(CPACK_PACKAGE_DESCRIPTION_FILE "${NV_SOURCE_DIR}/README.md") +SET(CPACK_RESOURCE_FILE_LICENSE "${NV_SOURCE_DIR}/LICENSE") + +# NSIS options: IF(WIN32) - ADD_SUBDIRECTORY(gnuwin32) + SET(CPACK_NSIS_DISPLAY_NAME "${CPACK_PACKAGE_VENDOR}\\\\NVIDIA Texture Tools 2.1") + SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${CPACK_PACKAGE_VENDOR}\\\\NVIDIA Texture Tools 2.1") + SET(CPACK_PACKAGE_ICON "${NV_SOURCE_DIR}\\\\project\\\\vc8\\\\nvcompress\\\\nvidia.ico") ENDIF(WIN32) + +INCLUDE(CPack) + Index: ps/trunk/libraries/source/nvtt/src/ChangeLog =================================================================== --- ps/trunk/libraries/source/nvtt/src/ChangeLog +++ ps/trunk/libraries/source/nvtt/src/ChangeLog @@ -1,15 +1,25 @@ -NVIDIA Texture Tools version 2.0.8 - * Fix float to fixed image conversion. Patch provided by Alex Pfaffe. Fixes issue 121. - * ColorBlock::isSingleColor compares only RGB channels. Fixes issue 115. - * Fix cmake build in msvc. Fixes issue 111. - * Better estimate principal component. Fixes issue 120. +NVIDIA Texture Tools version 2.1.1 + * Various fixes. + +NVIDIA Texture Tools version 2.1.0 + * Too many changes to list here. + * CTX1 CUDA compressor. + * DXT1n CUDA compressor. + * Support alpha premultiplication by Charles Nicholson. See issue 30. + * Improved decompressor tool submitted by Amorilia. See issue 41. + * Add support for YCoCg color transform. Fixes issue 18. + * Add support for linear and swizzle transforms. Fixes issue 4. + * Fix loading of EXR files using OpenEXR. + * Use FreeImage as primary image loading library. Fixes issue 31. Reverted. + * Output swizzle codes like AMD's tools. + * Added support for saving PNGs by Frank Richter. Fixes issue 79 and 80. + * Added gnome thumbnailer by Frank Richter. Fixes issue 82. + * Cleanup sources removing files that are not strictly required. NVIDIA Texture Tools version 2.0.7 * Output correct exit codes. Fixes issue 92. * Fix thread-safety errors. Fixes issue 90. * Add SIMD power method. Fixes issue 94. - * Interact better with applications that already use CUDA. - * Faster CPU compression. NVIDIA Texture Tools version 2.0.6 * Fix dll version checking. Index: ps/trunk/libraries/source/nvtt/src/LICENSE =================================================================== --- ps/trunk/libraries/source/nvtt/src/LICENSE +++ ps/trunk/libraries/source/nvtt/src/LICENSE @@ -0,0 +1,25 @@ +NVIDIA Texture Tools is licensed under the MIT license. + +Copyright (c) 2009-2016 Ignacio Castano +Copyright (c) 2007-2009 NVIDIA Corporation + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. Index: ps/trunk/libraries/source/nvtt/src/NVIDIA_Texture_Tools_LICENSE.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/NVIDIA_Texture_Tools_LICENSE.txt +++ ps/trunk/libraries/source/nvtt/src/NVIDIA_Texture_Tools_LICENSE.txt @@ -1,24 +0,0 @@ -NVIDIA Texture Tools 2.0 is licensed under the MIT license. - -Copyright (c) 2007 NVIDIA Corporation - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. Index: ps/trunk/libraries/source/nvtt/src/NVIDIA_Texture_Tools_README.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/NVIDIA_Texture_Tools_README.txt +++ ps/trunk/libraries/source/nvtt/src/NVIDIA_Texture_Tools_README.txt @@ -1,167 +0,0 @@ --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- -NVIDIA Texture Tools -README.txt -Version 2.0 --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- - --------------------------------------------------------------------------------- -TABLE OF CONTENTS --------------------------------------------------------------------------------- -I. Instructions -II. Contents -III. Compilation Instructions -IV. Using NVIDIA Texture Tools in your own applications -V. Known Issues -VI. Frequently Asked Questions --------------------------------------------------------------------------------- - -I. Introduction --------------------------------------------------------------------------------- - -This is our first alpha release of our new Texture Tools. The main highlights of -this release are support for all DX10 texture formats, higher speed and improved -compression quality. - -In addition to that it also comes with a hardware accelerated compressor that -uses CUDA to compress blocks in parallel on the GPU and runs around 10 times -faster than the CPU counterpart. - -You can obtain CUDA from our developer site at: - -http://developer.nvidia.com/object/cuda.html - -The source code of the Texture Tools is being released under the terms of -the MIT license. - - -II. Contents --------------------------------------------------------------------------------- - -This release contains only the source code of the texture compression library -and an example commandline application that shows its use. - - -III. Compilation Instructions --------------------------------------------------------------------------------- - -The compression library and the example can be compiled with Visual Studio 8 on -Windows using the following solution file: - -project\vc8\nvtt.sln - -On most other platforms you can also use cmake. For more information about -cmake, visit: - -http://www.cmake.org/ - -On unix systems you can use the standard build procedure (assuming cmake is -installed on your system): - -$ ./configure -$ make -$ sudo make install - - -IV. Using NVIDIA Texture Tools --------------------------------------------------------------------------------- - -To use the NVIDIA Texture Tools in your own applications you just have to -include the following header file: - -src/nvimage/nvtt/nvtt.h - -And include the nvtt library in your projects. - -The following file contains a simple example that shows how to use the library: - -src/nvimage/nvtt/compress.cpp - -The usage of the commandline tool is the following: - -$ nvcompress [options] infile [outfile] - -where 'infile' is and TGA, PNG, PSD, DDS or JPG file, 'outfile' is a DDS file -and 'options' is one or more of the following: - -Input options: - -color The input image is a color map (default). - -normal The input image is a normal map. - -tonormal Convert input to normal map. - -clamp Clamp wrapping mode (default). - -repeat Repeat wrapping mode. - -nomips Disable mipmap generation. - -Compression options: - -fast Fast compression. - -nocuda Do not use cuda compressor. - -rgb RGBA format - -bc1 BC1 format (DXT1) - -bc2 BC2 format (DXT3) - -bc3 BC3 format (DXT5) - -bc3n BC3 normal map format (DXT5n/RXGB) - -bc4 BC4 format (ATI1) - -bc5 BC5 format (3Dc/ATI2) - -In order to run the compiled example on a PC that doesn't have Microsoft Visual -Studio 2003 installed, you will have to install the Microsoft Visual Studio 2003 -redistributable package that you can download at: - -http://go.microsoft.com/fwlink/?linkid=65127&clcid=0x409 - - -V. Known Issues --------------------------------------------------------------------------------- - -None so far. Please send suggestions and bug reports to: - -TextureTools@nvidia.com - -or report them at: - -http://code.google.com/p/nvidia-texture-tools/issues/list - - -VI. Frequently Asked Questions --------------------------------------------------------------------------------- - -- Do the NVIDIA Texture Tools work on OSX? -It currently compiles and runs properly, but it has not been tested extensively. -In particular there may be endiannes errors in the code. - - -- Do the NVIDIA Texture Tools work on Linux? -Yes. - - -- Do the NVIDIA Texture Tools work on Vista? -Yes, but note that CUDA is not supported on Vista yet, so the tool is not hardware -accelerated. - - -- Is CUDA required? -No. The Visual Studio solution file contains a configuration that allows you -to compile the texture tools without CUDA support. The cmake scripts automatically -detect the CUDA installation and use it only when available. - - -- Where can I get CUDA? -http://developer.nvidia.com/object/cuda.html - - -- Why is feature XYZ not supported? -In order to keep the code small and reduce maintenance costs we have limited the -features available in our new texture tools. We also have open sourced the code, so -that people can modify it and add their own favourite features. - - -- Can I use the NVIDIA Texture Tools in my commercial application? -Yes, the NVIDIA Texture Tools are licensed under the MIT license. - - -- Can I use the NVIDIA Texture Tools in my GPL application? -Yes, the MIT license is compatible with the GPL and LGPL licenses. - - - Index: ps/trunk/libraries/source/nvtt/src/README.md =================================================================== --- ps/trunk/libraries/source/nvtt/src/README.md +++ ps/trunk/libraries/source/nvtt/src/README.md @@ -0,0 +1,46 @@ +NVIDIA Texture Tools +==================== + +The NVIDIA Texture Tools is a collection of image processing and texture +manipulation tools, designed to be integrated in game tools and asset +processing pipelines. + +The primary features of the library are mipmap and normal map generation, format +conversion and DXT compression. + + +### How to build (Windows) + +Open `project/vc12/thekla.sln` using Visual Studio. + +Solutions for previous versions are also available, but they may not be up to date. + + +### How to build (Linux/OSX) + +Use [cmake](http://www.cmake.org/) and the provided configure script: + +```bash +$ ./configure +$ make +$ sudo make install +``` + + +### Using NVIDIA Texture Tools + +To use the NVIDIA Texture Tools in your own applications you just have to +include the following header file: + +src/nvimage/nvtt/nvtt.h + +And include the nvtt library in your projects. + +The following file contains a simple example that shows how to use the library: + +src/nvimage/nvtt/compress.cpp + +Detailed documentation of the API can be found at: + +http://code.google.com/p/nvidia-texture-tools/wiki/ApiDocumentation + Index: ps/trunk/libraries/source/nvtt/src/VERSION =================================================================== --- ps/trunk/libraries/source/nvtt/src/VERSION +++ ps/trunk/libraries/source/nvtt/src/VERSION @@ -1 +1 @@ -2.0.8 +2.1.1 Index: ps/trunk/libraries/source/nvtt/src/cmake/DetermineProcessor.cmake =================================================================== --- ps/trunk/libraries/source/nvtt/src/cmake/DetermineProcessor.cmake +++ ps/trunk/libraries/source/nvtt/src/cmake/DetermineProcessor.cmake @@ -5,24 +5,68 @@ IF(UNIX) FIND_PROGRAM(CMAKE_UNAME uname /bin /usr/bin /usr/local/bin ) IF(CMAKE_UNAME) - EXEC_PROGRAM(uname ARGS -p OUTPUT_VARIABLE NV_SYSTEM_PROCESSOR RETURN_VALUE val) + #EXEC_PROGRAM(uname ARGS -p OUTPUT_VARIABLE NV_SYSTEM_PROCESSOR RETURN_VALUE val) - IF("${val}" GREATER 0 OR NV_SYSTEM_PROCESSOR STREQUAL "unknown") + #IF("${val}" GREATER 0 OR NV_SYSTEM_PROCESSOR STREQUAL "unknown") EXEC_PROGRAM(uname ARGS -m OUTPUT_VARIABLE NV_SYSTEM_PROCESSOR RETURN_VALUE val) - ENDIF("${val}" GREATER 0 OR NV_SYSTEM_PROCESSOR STREQUAL "unknown") + #ENDIF("${val}" GREATER 0 OR NV_SYSTEM_PROCESSOR STREQUAL "unknown") + + IF(NV_SYSTEM_PROCESSOR STREQUAL "Power Macintosh") + SET(NV_SYSTEM_PROCESSOR "powerpc") + ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "Power Macintosh") # processor may have double quote in the name, and that needs to be removed STRING(REGEX REPLACE "\"" "" NV_SYSTEM_PROCESSOR "${NV_SYSTEM_PROCESSOR}") STRING(REGEX REPLACE "/" "_" NV_SYSTEM_PROCESSOR "${NV_SYSTEM_PROCESSOR}") ENDIF(CMAKE_UNAME) - # Get extended processor information with: - # `cat /proc/cpuinfo` +#~ # Get extended processor information from /proc/cpuinfo +#~ IF(EXISTS "/proc/cpuinfo") + +#~ FILE(READ /proc/cpuinfo PROC_CPUINFO) + +#~ SET(VENDOR_ID_RX "vendor_id[ \t]*:[ \t]*([a-zA-Z]+)\n") +#~ STRING(REGEX MATCH "${VENDOR_ID_RX}" VENDOR_ID "${PROC_CPUINFO}") +#~ STRING(REGEX REPLACE "${VENDOR_ID_RX}" "\\1" VENDOR_ID "${VENDOR_ID}") + +#~ SET(CPU_FAMILY_RX "cpu family[ \t]*:[ \t]*([0-9]+)") +#~ STRING(REGEX MATCH "${CPU_FAMILY_RX}" CPU_FAMILY "${PROC_CPUINFO}") +#~ STRING(REGEX REPLACE "${CPU_FAMILY_RX}" "\\1" CPU_FAMILY "${CPU_FAMILY}") + +#~ SET(MODEL_RX "model[ \t]*:[ \t]*([0-9]+)") +#~ STRING(REGEX MATCH "${MODEL_RX}" MODEL "${PROC_CPUINFO}") +#~ STRING(REGEX REPLACE "${MODEL_RX}" "\\1" MODEL "${MODEL}") + +#~ SET(FLAGS_RX "flags[ \t]*:[ \t]*([a-zA-Z0-9 _]+)\n") +#~ STRING(REGEX MATCH "${FLAGS_RX}" FLAGS "${PROC_CPUINFO}") +#~ STRING(REGEX REPLACE "${FLAGS_RX}" "\\1" FLAGS "${FLAGS}") + +#~ # Debug output. +#~ IF(LINUX_CPUINFO) +#~ MESSAGE(STATUS "LinuxCPUInfo.cmake:") +#~ MESSAGE(STATUS "VENDOR_ID : ${VENDOR_ID}") +#~ MESSAGE(STATUS "CPU_FAMILY : ${CPU_FAMILY}") +#~ MESSAGE(STATUS "MODEL : ${MODEL}") +#~ MESSAGE(STATUS "FLAGS : ${FLAGS}") +#~ ENDIF(LINUX_CPUINFO) + +#~ ENDIF(EXISTS "/proc/cpuinfo") + +#~ # Information on how to decode CPU_FAMILY and MODEL: +#~ # http://balusc.xs4all.nl/srv/har-cpu-int-pm.php ELSE(UNIX) + IF(WIN32) - SET (NV_SYSTEM_PROCESSOR "$ENV{PROCESSOR_ARCHITECTURE}") + # It's not OK to trust $ENV{PROCESSOR_ARCHITECTURE}: its value depends on the type of executable being run, + # so a 32-bit cmake (the default binary distribution) will always say "x86" regardless of the actual target. + IF (CMAKE_SIZEOF_VOID_P EQUAL 8) + SET (NV_SYSTEM_PROCESSOR "x86_64") + ELSE(CMAKE_SIZEOF_VOID_P EQUAL 8) + SET (NV_SYSTEM_PROCESSOR "x86") + ENDIF(CMAKE_SIZEOF_VOID_P EQUAL 8) ENDIF(WIN32) + ENDIF(UNIX) Index: ps/trunk/libraries/source/nvtt/src/cmake/FindCUDA.cmake =================================================================== --- ps/trunk/libraries/source/nvtt/src/cmake/FindCUDA.cmake +++ ps/trunk/libraries/source/nvtt/src/cmake/FindCUDA.cmake @@ -1,142 +0,0 @@ -# -# Try to find CUDA compiler, runtime libraries, and include path. -# Once done this will define -# -# CUDA_FOUND -# CUDA_INCLUDE_PATH -# CUDA_RUNTIME_LIBRARY -# CUDA_COMPILER -# -# It will also define the following macro: -# -# WRAP_CUDA -# - -IF (WIN32) - FIND_PROGRAM (CUDA_COMPILER nvcc.exe - $ENV{CUDA_BIN_PATH} - DOC "The CUDA Compiler") -ELSE(WIN32) - FIND_PROGRAM (CUDA_COMPILER nvcc - $ENV{CUDA_BIN_PATH} - /usr/local/cuda/bin - DOC "The CUDA Compiler") -ENDIF(WIN32) - -IF (CUDA_COMPILER) - GET_FILENAME_COMPONENT (CUDA_COMPILER_DIR ${CUDA_COMPILER} PATH) - GET_FILENAME_COMPONENT (CUDA_COMPILER_SUPER_DIR ${CUDA_COMPILER_DIR} PATH) -ELSE (CUDA_COMPILER) - SET (CUDA_COMPILER_DIR .) - SET (CUDA_COMPILER_SUPER_DIR ..) -ENDIF (CUDA_COMPILER) - -FIND_PATH (CUDA_INCLUDE_PATH cuda_runtime.h - $ENV{CUDA_INC_PATH} - ${CUDA_COMPILER_SUPER_DIR}/include - ${CUDA_COMPILER_DIR} - DOC "The directory where CUDA headers reside") - -FIND_LIBRARY (CUDA_RUNTIME_LIBRARY - NAMES cudart - PATHS - $ENV{CUDA_LIB_PATH} - ${CUDA_COMPILER_SUPER_DIR}/lib - ${CUDA_COMPILER_DIR} - DOC "The CUDA runtime library") - -IF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY) - SET (CUDA_FOUND TRUE) -ELSE (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY) - SET (CUDA_FOUND FALSE) -ENDIF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY) - -SET (CUDA_LIBRARIES ${CUDA_RUNTIME_LIBRARY}) - -MARK_AS_ADVANCED (CUDA_FOUND CUDA_COMPILER CUDA_RUNTIME_LIBRARY) - - -#SET(CUDA_OPTIONS "-ncfe") -SET(CUDA_OPTIONS "--host-compilation=C") - -IF (CUDA_EMULATION) - SET (CUDA_OPTIONS "${CUDA_OPTIONS} -deviceemu") -ENDIF (CUDA_EMULATION) - - -# Get include directories. -MACRO(GET_CUDA_INC_DIRS _cuda_INC_DIRS) - SET(${_cuda_INC_DIRS}) - GET_DIRECTORY_PROPERTY(_inc_DIRS INCLUDE_DIRECTORIES) - - FOREACH(_current ${_inc_DIRS}) - SET(${_cuda_INC_DIRS} ${${_cuda_INC_DIRS}} "-I" ${_current}) - ENDFOREACH(_current ${_inc_DIRS}) - - SET(${_cuda_INC_DIRS} ${${_cuda_INC_DIRS}} "-I" ${CUDA_INCLUDE_PATH}) - -# IF (CMAKE_SYTEM_INCLUDE_PATH) -# SET(${_cuda_INC_DIRS} ${${_cuda_INC_DIRS}} "-I" ${CMAKE_SYSTEM_INCLUDE_PATH}) -# ENDIF (CMAKE_SYTEM_INCLUDE_PATH) -# IF (CMAKE_INCLUDE_PATH) -# SET(${_cuda_INC_DIRS} ${${_cuda_INC_DIRS}} "-I" ${CMAKE_INCLUDE_PATH}) -# ENDIF (CMAKE_INCLUDE_PATH) - -ENDMACRO(GET_CUDA_INC_DIRS) - - -# Get file dependencies. -MACRO (GET_CUFILE_DEPENDENCIES dependencies file) - GET_FILENAME_COMPONENT(filepath ${file} PATH) - - # parse file for dependencies - FILE(READ "${file}" CONTENTS) - #STRING(REGEX MATCHALL "#[ \t]*include[ \t]+[<\"][^>\"]*" DEPS "${CONTENTS}") - STRING(REGEX MATCHALL "#[ \t]*include[ \t]+\"[^\"]*" DEPS "${CONTENTS}") - - SET(${dependencies}) - - FOREACH(DEP ${DEPS}) - STRING(REGEX REPLACE "#[ \t]*include[ \t]+\"" "" DEP "${DEP}") - - FIND_PATH(PATH_OF_${DEP} ${DEP} - ${filepath}) - - IF(NOT ${PATH_OF_${DEP}} STREQUAL PATH_OF_${DEP}-NOTFOUND) - #MESSAGE("${file} : ${PATH_OF_${DEP}}/${DEP}") - SET(${dependencies} ${${dependencies}} ${PATH_OF_${DEP}}/${DEP}) - ENDIF(NOT ${PATH_OF_${DEP}} STREQUAL PATH_OF_${DEP}-NOTFOUND) - - ENDFOREACH(DEP) - -ENDMACRO (GET_CUFILE_DEPENDENCIES) - - -# WRAP_CUDA(outfile ...) -MACRO (WRAP_CUDA outfiles) - GET_CUDA_INC_DIRS(cuda_includes) - #MESSAGE(${cuda_includes}) - - FOREACH (CUFILE ${ARGN}) - GET_FILENAME_COMPONENT (CUFILE ${CUFILE} ABSOLUTE) - GET_FILENAME_COMPONENT (CFILE ${CUFILE} NAME_WE) - SET (CFILE ${CMAKE_CURRENT_BINARY_DIR}/${CFILE}.gen.cpp) - - GET_CUFILE_DEPENDENCIES(CUDEPS ${CUFILE}) - #MESSAGE("${CUDEPS}") - - ADD_CUSTOM_COMMAND ( - OUTPUT ${CFILE} - COMMAND ${CUDA_COMPILER} - ARGS -cuda ${cuda_includes} ${CUDA_OPTIONS} -o ${CFILE} ${CUFILE} - MAIN_DEPENDENCY ${CUFILE} - DEPENDS ${CUDEPS}) - - #MACRO_ADD_FILE_DEPENDENCIES(${CUFILE} ${CFILE}) - - SET (${outfiles} ${${outfiles}} ${CFILE}) - ENDFOREACH (CUFILE) - - SET_SOURCE_FILES_PROPERTIES(${outfiles} PROPERTIES GENERATED 1) - -ENDMACRO (WRAP_CUDA) Index: ps/trunk/libraries/source/nvtt/src/cmake/FindCg.cmake =================================================================== --- ps/trunk/libraries/source/nvtt/src/cmake/FindCg.cmake +++ ps/trunk/libraries/source/nvtt/src/cmake/FindCg.cmake @@ -1,129 +1,172 @@ -# -# Try to find NVIDIA's Cg compiler, runtime libraries, and include path. -# Once done this will define -# -# CG_FOUND =system has NVIDIA Cg and it can be used. -# CG_INCLUDE_PATH = directory where cg.h resides -# CG_LIBRARY = full path to libCg.so (Cg.DLL on win32) -# CG_GL_LIBRARY = full path to libCgGL.so (CgGL.dll on win32) -# CG_COMPILER = full path to cgc (cgc.exe on win32) -# - -# On OSX default to using the framework version of Cg. - -IF (APPLE) - INCLUDE(${CMAKE_ROOT}/Modules/CMakeFindFrameworks.cmake) - SET(CG_FRAMEWORK_INCLUDES) - CMAKE_FIND_FRAMEWORKS(Cg) - IF (Cg_FRAMEWORKS) - FOREACH(dir ${Cg_FRAMEWORKS}) - SET(CG_FRAMEWORK_INCLUDES ${CG_FRAMEWORK_INCLUDES} - ${dir}/Headers ${dir}/PrivateHeaders) - ENDFOREACH(dir) - - # Find the include dir - FIND_PATH(CG_INCLUDE_PATH cg.h - ${CG_FRAMEWORK_INCLUDES} - ) - - # Since we are using Cg framework, we must link to it. - # Note, we use weak linking, so that it works even when Cg is not available. - SET(CG_LIBRARY "-weak_framework Cg" CACHE STRING "Cg library") - SET(CG_GL_LIBRARY "-weak_framework Cg" CACHE STRING "Cg GL library") - ENDIF (Cg_FRAMEWORKS) - FIND_PROGRAM(CG_COMPILER cgc - /usr/bin - /usr/local/bin - DOC "The Cg compiler" - ) -ELSE (APPLE) - IF (WIN32) - FIND_PROGRAM( CG_COMPILER cgc - $ENV{CG_BIN_PATH} - $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/bin - $ENV{PROGRAMFILES}/Cg - ${PROJECT_SOURCE_DIR}/../Cg - DOC "The Cg Compiler" - ) - IF (CG_COMPILER) - GET_FILENAME_COMPONENT(CG_COMPILER_DIR ${CG_COMPILER} PATH) - GET_FILENAME_COMPONENT(CG_COMPILER_SUPER_DIR ${CG_COMPILER_DIR} PATH) - ELSE (CG_COMPILER) - SET (CG_COMPILER_DIR .) - SET (CG_COMPILER_SUPER_DIR ..) - ENDIF (CG_COMPILER) - FIND_PATH( CG_INCLUDE_PATH Cg/cg.h - $ENV{CG_INC_PATH} - $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/include - $ENV{PROGRAMFILES}/Cg - ${PROJECT_SOURCE_DIR}/../Cg - ${CG_COMPILER_SUPER_DIR}/include - ${CG_COMPILER_DIR} - DOC "The directory where Cg/cg.h resides" - ) - FIND_LIBRARY( CG_LIBRARY - NAMES Cg - PATHS - $ENV{CG_LIB_PATH} - $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib - $ENV{PROGRAMFILES}/Cg - ${PROJECT_SOURCE_DIR}/../Cg - ${CG_COMPILER_SUPER_DIR}/lib - ${CG_COMPILER_DIR} - DOC "The Cg runtime library" - ) - FIND_LIBRARY( CG_GL_LIBRARY - NAMES CgGL - PATHS - $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib - $ENV{PROGRAMFILES}/Cg - ${PROJECT_SOURCE_DIR}/../Cg - ${CG_COMPILER_SUPER_DIR}/lib - ${CG_COMPILER_DIR} - DOC "The Cg runtime library" - ) - ELSE (WIN32) - FIND_PROGRAM( CG_COMPILER cgc - /usr/bin - /usr/local/bin - DOC "The Cg Compiler" - ) - GET_FILENAME_COMPONENT(CG_COMPILER_DIR "${CG_COMPILER}" PATH) - GET_FILENAME_COMPONENT(CG_COMPILER_SUPER_DIR "${CG_COMPILER_DIR}" PATH) - FIND_PATH( CG_INCLUDE_PATH Cg/cg.h - /usr/include - /usr/local/include - ${CG_COMPILER_SUPER_DIR}/include - DOC "The directory where Cg/cg.h resides" - ) - FIND_LIBRARY( CG_LIBRARY Cg - PATHS - /usr/lib64 - /usr/lib - /usr/local/lib64 - /usr/local/lib - ${CG_COMPILER_SUPER_DIR}/lib64 - ${CG_COMPILER_SUPER_DIR}/lib - DOC "The Cg runtime library" - ) - SET(CG_LIBRARY ${CG_LIBRARY} -lpthread) - FIND_LIBRARY( CG_GL_LIBRARY CgGL - PATHS - /usr/lib64 - /usr/lib - /usr/local/lib64 - /usr/local/lib - ${CG_COMPILER_SUPER_DIR}/lib64 - ${CG_COMPILER_SUPER_DIR}/lib - DOC "The Cg runtime library" - ) - ENDIF (WIN32) -ENDIF (APPLE) - -IF (CG_INCLUDE_PATH) - SET( CG_FOUND 1 CACHE STRING "Set to 1 if CG is found, 0 otherwise") -ELSE (CG_INCLUDE_PATH) - SET( CG_FOUND 0 CACHE STRING "Set to 1 if CG is found, 0 otherwise") -ENDIF (CG_INCLUDE_PATH) - -MARK_AS_ADVANCED( CG_FOUND ) +# +# Try to find NVIDIA's Cg compiler, runtime libraries, and include path. +# Once done this will define +# +# CG_FOUND =system has NVIDIA Cg and it can be used. +# CG_INCLUDE_DIR = directory where cg.h resides +# CG_LIBRARY = full path to libCg.so (Cg.DLL on win32) +# CG_GL_LIBRARY = full path to libCgGL.so (CgGL.dll on win32) +# CG_COMPILER = full path to cgc (cgc.exe on win32) +# + +# On OSX default to using the framework version of Cg. +IF (APPLE) + INCLUDE(${CMAKE_ROOT}/Modules/CMakeFindFrameworks.cmake) + SET(CG_FRAMEWORK_INCLUDES) + CMAKE_FIND_FRAMEWORKS(Cg) + IF (Cg_FRAMEWORKS) + FOREACH(dir ${Cg_FRAMEWORKS}) + SET(CG_FRAMEWORK_INCLUDES ${CG_FRAMEWORK_INCLUDES} + ${dir}/Headers ${dir}/PrivateHeaders) + ENDFOREACH(dir) + + # Find the include dir + FIND_PATH(CG_INCLUDE_DIR cg.h + ${CG_FRAMEWORK_INCLUDES} + ) + + # Since we are using Cg framework, we must link to it. + # Note, we use weak linking, so that it works even when Cg is not available. + SET(CG_LIBRARY "-weak_framework Cg" CACHE STRING "Cg library") + SET(CG_GL_LIBRARY "-weak_framework Cg" CACHE STRING "Cg GL library") + ENDIF (Cg_FRAMEWORKS) + FIND_PROGRAM(CG_COMPILER cgc + /usr/bin + /usr/local/bin + DOC "The Cg compiler" + ) +ELSE (APPLE) + IF (WIN32) + + # When compiling 64-bit programs, the binaries and libs are in bin.x64 and lib.x64 directories, + + # This will have only effect for 64bit versions of cmake, when running the default 32bit version + # both ProgramFiles and ProgramFiles(x86) point to the same place in Win64 + SET(PFx86_VARNAME "ProgramFiles(x86)") + SET(PFx86 $ENV{${PFx86_VARNAME}}) + + # Let's play safe in case we are cross compiling to 64 bit: for cgc it doesn't really matter + FIND_PROGRAM( CG_COMPILER cgc + $ENV{CG_BIN64_PATH} + $ENV{CG_BIN_PATH} + $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/bin + $ENV{PFx86}/NVIDIA\ Corporation/Cg/bin + $ENV{PROGRAMFILES}/Cg + ${PROJECT_SOURCE_DIR}/../Cg + DOC "The Cg Compiler" + ) + + IF (CG_COMPILER) + GET_FILENAME_COMPONENT(CG_COMPILER_DIR ${CG_COMPILER} PATH) + GET_FILENAME_COMPONENT(CG_COMPILER_SUPER_DIR ${CG_COMPILER_DIR} PATH) + ELSE (CG_COMPILER) + SET (CG_COMPILER_DIR .) + SET (CG_COMPILER_SUPER_DIR ..) + ENDIF (CG_COMPILER) + FIND_PATH( CG_INCLUDE_DIR Cg/cg.h + $ENV{CG_INC_PATH} + $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/include + $ENV{PROGRAMFILES}/Cg + ${PROJECT_SOURCE_DIR}/../Cg + ${CG_COMPILER_SUPER_DIR}/include + ${CG_COMPILER_DIR} + DOC "The directory where Cg/cg.h resides" + ) + + IF (NV_SYSTEM_PROCESSOR STREQUAL "x86_64") + FIND_LIBRARY( CG_LIBRARY + NAMES Cg + PATHS + $ENV{CG_LIB64_PATH} + $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib.x64 + $ENV{PFx86}/NVIDIA\ Corporation/Cg/lib.x64 + $ENV{PROGRAMFILES}/Cg + $ENV{PFx86}/Cg + ${PROJECT_SOURCE_DIR}/../Cg + ${CG_COMPILER_SUPER_DIR}/lib.x64 + ${CG_COMPILER_DIR} + DOC "The Cg runtime library (64-bit)" + ) + FIND_LIBRARY( CG_GL_LIBRARY + NAMES CgGL + PATHS + $ENV{CG_LIB64_PATH} + $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib.x64 + $ENV{PFx86}/NVIDIA\ Corporation/Cg/lib.x64 + $ENV{PROGRAMFILES}/Cg + $ENV{PFx86}/Cg + ${PROJECT_SOURCE_DIR}/../Cg + ${CG_COMPILER_SUPER_DIR}/lib.x64 + ${CG_COMPILER_DIR} + DOC "The Cg GL runtime library (64-bit)" + ) + ELSE(NV_SYSTEM_PROCESSOR STREQUAL "x86_64") + FIND_LIBRARY( CG_LIBRARY + NAMES Cg + PATHS + $ENV{CG_LIB_PATH} + $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib + $ENV{PROGRAMFILES}/Cg + ${PROJECT_SOURCE_DIR}/../Cg + ${CG_COMPILER_SUPER_DIR}/lib + ${CG_COMPILER_DIR} + DOC "The Cg runtime library" + ) + FIND_LIBRARY( CG_GL_LIBRARY + NAMES CgGL + PATHS + $ENV{CG_LIB_PATH} + $ENV{PROGRAMFILES}/NVIDIA\ Corporation/Cg/lib + $ENV{PROGRAMFILES}/Cg + ${PROJECT_SOURCE_DIR}/../Cg + ${CG_COMPILER_SUPER_DIR}/lib + ${CG_COMPILER_DIR} + DOC "The Cg GL runtime library" + ) + ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "x86_64") + + ELSE (WIN32) + FIND_PROGRAM( CG_COMPILER cgc + /usr/bin + /usr/local/bin + DOC "The Cg Compiler" + ) + GET_FILENAME_COMPONENT(CG_COMPILER_DIR "${CG_COMPILER}" PATH) + GET_FILENAME_COMPONENT(CG_COMPILER_SUPER_DIR "${CG_COMPILER_DIR}" PATH) + FIND_PATH( CG_INCLUDE_DIR Cg/cg.h + /usr/include + /usr/local/include + ${CG_COMPILER_SUPER_DIR}/include + DOC "The directory where Cg/cg.h resides" + ) + FIND_LIBRARY( CG_LIBRARY Cg + PATHS + /usr/lib64 + /usr/lib + /usr/local/lib64 + /usr/local/lib + ${CG_COMPILER_SUPER_DIR}/lib64 + ${CG_COMPILER_SUPER_DIR}/lib + DOC "The Cg runtime library" + ) + SET(CG_LIBRARY ${CG_LIBRARY} -lpthread) + FIND_LIBRARY( CG_GL_LIBRARY CgGL + PATHS + /usr/lib64 + /usr/lib + /usr/local/lib64 + /usr/local/lib + ${CG_COMPILER_SUPER_DIR}/lib64 + ${CG_COMPILER_SUPER_DIR}/lib + DOC "The Cg runtime library" + ) + ENDIF (WIN32) +ENDIF (APPLE) + +IF (CG_INCLUDE_DIR) + SET( CG_FOUND 1 CACHE STRING "Set to 1 if CG is found, 0 otherwise") +ELSE (CG_INCLUDE_DIR) + SET( CG_FOUND 0 CACHE STRING "Set to 1 if CG is found, 0 otherwise") +ENDIF (CG_INCLUDE_DIR) + +MARK_AS_ADVANCED( CG_FOUND ) Index: ps/trunk/libraries/source/nvtt/src/cmake/FindFreeImage.cmake =================================================================== --- ps/trunk/libraries/source/nvtt/src/cmake/FindFreeImage.cmake +++ ps/trunk/libraries/source/nvtt/src/cmake/FindFreeImage.cmake @@ -0,0 +1,53 @@ +# +# Try to find the FreeImage library and include path. +# Once done this will define +# +# FREEIMAGE_FOUND +# FREEIMAGE_INCLUDE_PATH +# FREEIMAGE_LIBRARY +# + +IF (WIN32) + FIND_PATH( FREEIMAGE_INCLUDE_PATH FreeImage.h + ${FREEIMAGE_ROOT_DIR}/include + ${FREEIMAGE_ROOT_DIR} + DOC "The directory where FreeImage.h resides") + FIND_LIBRARY( FREEIMAGE_LIBRARY + NAMES FreeImage freeimage + PATHS + ${FREEIMAGE_ROOT_DIR}/lib + ${FREEIMAGE_ROOT_DIR} + DOC "The FreeImage library") +ELSE (WIN32) + FIND_PATH( FREEIMAGE_INCLUDE_PATH FreeImage.h + /usr/include + /usr/local/include + /sw/include + /opt/local/include + DOC "The directory where FreeImage.h resides") + FIND_LIBRARY( FREEIMAGE_LIBRARY + NAMES FreeImage freeimage + PATHS + /usr/lib64 + /usr/lib + /usr/local/lib64 + /usr/local/lib + /sw/lib + /opt/local/lib + DOC "The FreeImage library") +ENDIF (WIN32) + +SET(FREEIMAGE_LIBRARIES ${FREEIMAGE_LIBRARY}) + +IF (FREEIMAGE_INCLUDE_PATH AND FREEIMAGE_LIBRARY) + SET( FREEIMAGE_FOUND TRUE CACHE BOOL "Set to TRUE if FreeImage is found, FALSE otherwise") +ELSE (FREEIMAGE_INCLUDE_PATH AND FREEIMAGE_LIBRARY) + SET( FREEIMAGE_FOUND FALSE CACHE BOOL "Set to TRUE if FreeImage is found, FALSE otherwise") +ENDIF (FREEIMAGE_INCLUDE_PATH AND FREEIMAGE_LIBRARY) + +MARK_AS_ADVANCED( + FREEIMAGE_FOUND + FREEIMAGE_LIBRARY + FREEIMAGE_LIBRARIES + FREEIMAGE_INCLUDE_PATH) + Index: ps/trunk/libraries/source/nvtt/src/cmake/FindGLEW.cmake =================================================================== --- ps/trunk/libraries/source/nvtt/src/cmake/FindGLEW.cmake +++ ps/trunk/libraries/source/nvtt/src/cmake/FindGLEW.cmake @@ -10,24 +10,28 @@ IF (WIN32) FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h $ENV{PROGRAMFILES}/GLEW/include - ${PROJECT_SOURCE_DIR}/src/nvgl/glew/include + ${GLEW_ROOT_DIR}/include DOC "The directory where GL/glew.h resides") - FIND_LIBRARY( GLEW_LIBRARY - NAMES glew GLEW glew32 glew32s - PATHS - $ENV{PROGRAMFILES}/GLEW/lib - ${PROJECT_SOURCE_DIR}/src/nvgl/glew/bin - ${PROJECT_SOURCE_DIR}/src/nvgl/glew/lib - DOC "The GLEW library") + + FIND_LIBRARY( GLEW_LIBRARY + NAMES glew GLEW glew32 glew32s + PATHS + $ENV{PROGRAMFILES}/GLEW/lib + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/bin + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/lib + DOC "The GLEW library") ELSE (WIN32) FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h /usr/include /usr/local/include /sw/include /opt/local/include + ${GLEW_ROOT_DIR}/include DOC "The directory where GL/glew.h resides") + + # Prefer the static library. FIND_LIBRARY( GLEW_LIBRARY - NAMES GLEW glew + NAMES libGLEW.a GLEW PATHS /usr/lib64 /usr/lib @@ -35,13 +39,12 @@ /usr/local/lib /sw/lib /opt/local/lib + ${GLEW_ROOT_DIR}/lib DOC "The GLEW library") ENDIF (WIN32) -IF (GLEW_INCLUDE_PATH) - SET( GLEW_FOUND 1 CACHE STRING "Set to 1 if GLEW is found, 0 otherwise") -ELSE (GLEW_INCLUDE_PATH) - SET( GLEW_FOUND 0 CACHE STRING "Set to 1 if GLEW is found, 0 otherwise") -ENDIF (GLEW_INCLUDE_PATH) - -MARK_AS_ADVANCED( GLEW_FOUND ) +SET(GLEW_FOUND "NO") +IF (GLEW_INCLUDE_PATH AND GLEW_LIBRARY) + SET(GLEW_LIBRARIES ${GLEW_LIBRARY}) + SET(GLEW_FOUND "YES") +ENDIF (GLEW_INCLUDE_PATH AND GLEW_LIBRARY) Index: ps/trunk/libraries/source/nvtt/src/cmake/FindGLUT.cmake =================================================================== --- ps/trunk/libraries/source/nvtt/src/cmake/FindGLUT.cmake +++ ps/trunk/libraries/source/nvtt/src/cmake/FindGLUT.cmake @@ -1,127 +0,0 @@ -# - try to find glut library and include files -# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc. -# GLUT_LIBRARIES, the libraries to link against -# GLUT_FOUND, If false, do not try to use GLUT. -# Also defined, but not for general use are: -# GLUT_glut_LIBRARY = the full path to the glut library. -# GLUT_Xmu_LIBRARY = the full path to the Xmu library. -# GLUT_Xi_LIBRARY = the full path to the Xi Library. - -IF (WIN32) - - IF(CYGWIN) - - FIND_PATH( GLUT_INCLUDE_DIR GL/glut.h - /usr/include - ) - - FIND_LIBRARY( GLUT_glut_LIBRARY glut32 - ${OPENGL_LIBRARY_DIR} - /usr/lib - /usr/lib/w32api - /usr/local/lib - /usr/X11R6/lib - ) - - - ELSE(CYGWIN) - -# FIND_PATH( GLUT_INCLUDE_DIR GL/glut.h -# ${GLUT_ROOT_PATH}/include -# ) - -# FIND_LIBRARY( GLUT_glut_LIBRARY glut32 -# ${GLUT_ROOT_PATH}/lib -# ${OPENGL_LIBRARY_DIR} -# ) - - FIND_PATH( GLUT_INCLUDE_DIR GL/glut.h - ${GLUT_ROOT_PATH}/include - ${PROJECT_SOURCE_DIR}/src/nvgl/glut/include - DOC "The directory where GL/glut.h resides") - FIND_LIBRARY( GLUT_glut_LIBRARY - NAMES glut GLUT glut32 glut32s - PATHS - ${GLUT_ROOT_PATH}/lib - ${PROJECT_SOURCE_DIR}/src/nvgl/glut/bin - ${PROJECT_SOURCE_DIR}/src/nvgl/glut/lib - ${OPENGL_LIBRARY_DIR} - DOC "The GLUT library") - - ENDIF(CYGWIN) - -ELSE (WIN32) - - IF (APPLE) -# These values for Apple could probably do with improvement. - FIND_PATH( GLUT_INCLUDE_DIR glut.h - /System/Library/Frameworks/GLUT.framework/Versions/A/Headers - ${OPENGL_LIBRARY_DIR} - ) - SET(GLUT_glut_LIBRARY "-framework Glut" CACHE STRING "GLUT library for OSX") - SET(GLUT_cocoa_LIBRARY "-framework Cocoa" CACHE STRING "Cocoa framework for OSX") - ELSE (APPLE) - - FIND_PATH( GLUT_INCLUDE_DIR GL/glut.h - /usr/include - /usr/include/GL - /usr/local/include - /usr/openwin/share/include - /usr/openwin/include - /usr/X11R6/include - /usr/include/X11 - /opt/graphics/OpenGL/include - /opt/graphics/OpenGL/contrib/libglut - ) - - FIND_LIBRARY( GLUT_glut_LIBRARY glut - /usr/lib - /usr/local/lib - /usr/openwin/lib - /usr/X11R6/lib - ) - - FIND_LIBRARY( GLUT_Xi_LIBRARY Xi - /usr/lib - /usr/local/lib - /usr/openwin/lib - /usr/X11R6/lib - ) - - FIND_LIBRARY( GLUT_Xmu_LIBRARY Xmu - /usr/lib - /usr/local/lib - /usr/openwin/lib - /usr/X11R6/lib - ) - - ENDIF (APPLE) - -ENDIF (WIN32) - -SET( GLUT_FOUND "NO" ) -IF(GLUT_INCLUDE_DIR) - IF(GLUT_glut_LIBRARY) - # Is -lXi and -lXmu required on all platforms that have it? - # If not, we need some way to figure out what platform we are on. - SET( GLUT_LIBRARIES - ${GLUT_glut_LIBRARY} - ${GLUT_Xmu_LIBRARY} - ${GLUT_Xi_LIBRARY} - ${GLUT_cocoa_LIBRARY} - ) - SET( GLUT_FOUND "YES" ) - -#The following deprecated settings are for backwards compatibility with CMake1.4 - SET (GLUT_LIBRARY ${GLUT_LIBRARIES}) - SET (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR}) - - ENDIF(GLUT_glut_LIBRARY) -ENDIF(GLUT_INCLUDE_DIR) - -MARK_AS_ADVANCED( - GLUT_INCLUDE_DIR - GLUT_glut_LIBRARY - GLUT_Xmu_LIBRARY - GLUT_Xi_LIBRARY -) Index: ps/trunk/libraries/source/nvtt/src/cmake/OptimalOptions.cmake =================================================================== --- ps/trunk/libraries/source/nvtt/src/cmake/OptimalOptions.cmake +++ ps/trunk/libraries/source/nvtt/src/cmake/OptimalOptions.cmake @@ -9,9 +9,10 @@ ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "i586") IF(NV_SYSTEM_PROCESSOR STREQUAL "i686") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686") + #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686") #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpmath=sse -mtune=i686 -msse3") #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=pentium4") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=prescott") ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "i686") IF(NV_SYSTEM_PROCESSOR STREQUAL "x86_64") @@ -20,12 +21,30 @@ ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "x86_64") IF(NV_SYSTEM_PROCESSOR STREQUAL "powerpc") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=powerpc -maltivec -mabi=altivec -mpowerpc-gfxopt") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=powerpc -faltivec -maltivec -mabi=altivec -mpowerpc-gfxopt") # ibook G4: - #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=7450 -mtune=7450 -maltivec -mabi=altivec -mpowerpc-gfxopt") + #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=7450 -mtune=7450 -faltivec -maltivec -mabi=altivec -mpowerpc-gfxopt") + + # G5 + #SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=G5 -faltivec -maltivec -mabi=altivec -mpowerpc-gfxopt") + ENDIF(NV_SYSTEM_PROCESSOR STREQUAL "powerpc") +# IF(DARWIN) +# SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.5 -isysroot /Developer/SDKs/MacOSX10.5.sdk") +# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.5 -isysroot /Developer/SDKs/MacOSX10.5.sdk") +# ENDIF(DARWIN) + IF(APPLE) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -arch i586 -arch x86_64 -msse3 -mmacosx-version-min=10.5") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -arch i586 -arch x86_64 -msse3 -mmacosx-version-min=10.5") + ENDIF(APPLE) + + IF(CMAKE_BUILD_TYPE STREQUAL "debug") + ADD_DEFINITIONS(-D_DEBUG) + ENDIF(CMAKE_BUILD_TYPE STREQUAL "debug") + + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") ENDIF(CMAKE_COMPILER_IS_GNUCXX) IF(MSVC) Index: ps/trunk/libraries/source/nvtt/src/configure =================================================================== --- ps/trunk/libraries/source/nvtt/src/configure +++ ps/trunk/libraries/source/nvtt/src/configure @@ -18,7 +18,7 @@ help=false -build="Debug" # release +build="debug" # release prefix=/usr/local # Parse the args @@ -26,9 +26,8 @@ do case $i in --help ) help=true ;; - --debug ) build="Debug" ;; - --release ) build="Release" ;; - --prefix=* ) prefix="${i#--prefix=}" ;; + --debug ) build="debug" ;; + --release ) build="release" ;; --prefix=* ) prefix="${i#--prefix=}" ;; * ) echo "Unrecognised argument $i" ;; esac @@ -51,9 +50,9 @@ echo "-- Configuring nvidia-texture-tools "`cat VERSION` -mkdir -p ./build -cd ./build -$CMAKE .. -DNVTT_SHARED=1 -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1 +mkdir -p ./build-$build +cd ./build-$build +$CMAKE .. -DNVTT_SHARED=0 -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1 cd .. echo "" @@ -62,11 +61,15 @@ cat > Makefile << EOF all: - @make --no-print-directory -C build/ + @+make --no-print-directory -C build-$build/ install: - @make install --no-print-directory -C build/ + @+make install --no-print-directory -C build-$build/ +package: + @+make package --no-print-directory -C build-$build/ +test: + @+make test --no-print-directory -C build-$build/ clean: - @make clean --no-print-directory -C build/ + @+make clean --no-print-directory -C build-$build/ distclean: - @rm -Rf build/ + @rm -Rf build-$build/ EOF Index: ps/trunk/libraries/source/nvtt/src/extern/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/extern/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/extern/CMakeLists.txt @@ -0,0 +1,13 @@ + +#IF(WIN32) + #ADD_SUBDIRECTORY(gnuwin32) +#ENDIF(WIN32) + +ADD_SUBDIRECTORY(poshlib) + +#ADD_SUBDIRECTORY(EtcLib) +#ADD_SUBDIRECTORY(rg_etc1_v104) +#ADD_SUBDIRECTORY(etcpack) + +#ADD_SUBDIRECTORY(butteraugli) + Index: ps/trunk/libraries/source/nvtt/src/extern/poshlib/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/extern/poshlib/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/extern/poshlib/CMakeLists.txt @@ -0,0 +1,7 @@ + +SET(POSHLIB_SRCS + posh.c + posh.h) + +ADD_LIBRARY(posh STATIC ${POSHLIB_SRCS}) + Index: ps/trunk/libraries/source/nvtt/src/extern/poshlib/posh.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/extern/poshlib/posh.h +++ ps/trunk/libraries/source/nvtt/src/extern/poshlib/posh.h @@ -0,0 +1,1034 @@ +/** +@file posh.h +@author Brian Hook +@version 1.3.001 + +Header file for POSH, the Portable Open Source Harness project. + +NOTE: Unlike most header files, this one is designed to be included +multiple times, which is why it does not have the @#ifndef/@#define +preamble. + +POSH relies on environment specified preprocessor symbols in order +to infer as much as possible about the target OS/architecture and +the host compiler capabilities. + +NOTE: POSH is simple and focused. It attempts to provide basic +functionality and information, but it does NOT attempt to emulate +missing functionality. I am also not willing to make POSH dirty +and hackish to support truly ancient and/or outmoded and/or bizarre +technologies such as non-ANSI compilers, systems with non-IEEE +floating point formats, segmented 16-bit operating systems, etc. + +Please refer to the accompanying HTML documentation or visit +http://www.poshlib.org for more information on how to use POSH. + +LICENSE: + +Copyright (c) 2004, Brian Hook +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * The names of this package'ss contributors contributors may not + be used to endorse or promote products derived from this + software without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +REVISION: + +I've been lax about revision histories, so this starts at, um, 1.3.001. +Sorry for any inconveniences. + +1.3.001 - 2/23/2006 - Incorporated fix for bug reported by Bill Cary, + where I was not detecting Visual Studio + compilation on x86-64 systems. Added check for + _M_X64 which should fix that. + +*/ +/* +I have yet to find an authoritative reference on preprocessor +symbols, but so far this is what I've gleaned: + +GNU GCC/G++: + - __GNUC__: GNU C version + - __GNUG__: GNU C++ compiler + - __sun__ : on Sun platforms + - __svr4__: on Solaris and other SysV R4 platforms + - __mips__: on MIPS processor platforms + - __sparc_v9__: on Sparc 64-bit CPUs + - __sparcv9: 64-bit Solaris + - __MIPSEL__: mips processor, compiled for little endian + - __MIPSEB__: mips processor, compiled for big endian + - _R5900: MIPS/Sony/Toshiba R5900 (PS2) + - mc68000: 68K + - m68000: 68K + - m68k: 68K + - __palmos__: PalmOS + +Intel C/C++ Compiler: + - __ECC : compiler version, IA64 only + - __EDG__ + - __ELF__ + - __GXX_ABI_VERSION + - __i386 : IA-32 only + - __i386__ : IA-32 only + - i386 : IA-32 only + - __ia64 : IA-64 only + - __ia64__ : IA-64 only + - ia64 : IA-64 only + - __ICC : IA-32 only + - __INTEL_COMPILER : IA-32 or IA-64, newer versions only + +Apple's C/C++ Compiler for OS X: + - __APPLE_CC__ + - __APPLE__ + - __BIG_ENDIAN__ + - __APPLE__ + - __ppc__ + - __MACH__ + +DJGPP: + - __MSDOS__ + - __unix__ + - __unix + - __GNUC__ + - __GO32 + - DJGPP + - __i386, __i386, i386 + +Cray's C compiler: + - _ADDR64: if 64-bit pointers + - _UNICOS: + - __unix: + +SGI's CC compiler predefines the following (and more) with -ansi: + - __sgi + - __unix + - __host_mips + - _SYSTYPE_SVR4 + - __mips + - _MIPSEB + - anyone know if there is a predefined symbol for the compiler?! + +MinGW: + - as GnuC but also defines _WIN32, __WIN32, WIN32, _X86_, __i386, __i386__, and several others + - __MINGW32__ + +Cygwin: + - as Gnu C, but also + - __unix__ + - __CYGWIN32__ + +Microsoft Visual Studio predefines the following: + - _MSC_VER + - _WIN32: on Win32 + - _M_IX6 (on x86 systems) + - _M_X64: on x86-64 systems + - _M_ALPHA (on DEC AXP systems) + - _SH3: WinCE, Hitachi SH-3 + - _MIPS: WinCE, MIPS + - _ARM: WinCE, ARM + +Sun's C Compiler: + - sun and _sun + - unix and _unix + - sparc and _sparc (SPARC systems only) + - i386 and _i386 (x86 systems only) + - __SVR4 (Solaris only) + - __sparcv9: 64-bit solaris + - __SUNPRO_C + - _LP64: defined in 64-bit LP64 mode, but only if is included + +Borland C/C++ predefines the following: + - __BORLANDC__: + +DEC/Compaq C/C++ on Alpha: + - __alpha + - __arch64__ + - __unix__ (on Tru64 Unix) + - __osf__ + - __DECC + - __DECCXX (C++ compilation) + - __DECC_VER + - __DECCXX_VER + +IBM's AIX compiler: + - __64BIT__ if 64-bit mode + - _AIX + - __IBMC__: C compiler version + - __IBMCPP__: C++ compiler version + - _LONG_LONG: compiler allows long long + +Watcom: + - __WATCOMC__ + - __DOS__ : if targeting DOS + - __386__ : if 32-bit support + - __WIN32__ : if targetin 32-bit Windows + +HP-UX C/C++ Compiler: + - __hpux + - __unix + - __hppa (on PA-RISC) + - __LP64__: if compiled in 64-bit mode + +Metrowerks: + - __MWERKS__ + - __powerpc__ + - _powerc + - __MC68K__ + - macintosh when compiling for MacOS + - __INTEL__ for x86 targets + - __POWERPC__ + +LLVM: + - __llvm__ + - __clang__ +*/ + +/* +** ---------------------------------------------------------------------------- +** Include optionally +** ---------------------------------------------------------------------------- +*/ +#ifdef POSH_USE_LIMITS_H +# include +#endif + +/* +** ---------------------------------------------------------------------------- +** Determine compilation environment +** ---------------------------------------------------------------------------- +*/ +#if defined __ECC || defined __ICC || defined __INTEL_COMPILER +# define POSH_COMPILER_STRING "Intel C/C++" +# define POSH_COMPILER_INTEL 1 +#endif + +#if ( defined __host_mips || defined __sgi ) && !defined __GNUC__ +# define POSH_COMPILER_STRING "MIPSpro C/C++" +# define POSH_COMPILER_MIPSPRO 1 +#endif + +#if defined __hpux && !defined __GNUC__ +# define POSH_COMPILER_STRING "HP-UX CC" +# define POSH_COMPILER_HPCC 1 +#endif + +#if defined __clang__ +# define POSH_COMPILER_STRING "Clang" +# define POSH_COMPILER_CLANG 1 +#endif + +#if defined __GNUC__ && !defined __clang__ +# define POSH_COMPILER_STRING "Gnu GCC" +# define POSH_COMPILER_GCC 1 +#endif + +#if defined __APPLE_CC__ + /* we don't define the compiler string here, let it be GNU */ +# define POSH_COMPILER_APPLECC 1 +#endif + +#if defined __IBMC__ || defined __IBMCPP__ +# define POSH_COMPILER_STRING "IBM C/C++" +# define POSH_COMPILER_IBM 1 +#endif + +#if defined _MSC_VER +# define POSH_COMPILER_STRING "Microsoft Visual C++" +# define POSH_COMPILER_MSVC 1 +#endif + +#if defined __SUNPRO_C +# define POSH_COMPILER_STRING "Sun Pro" +# define POSH_COMPILER_SUN 1 +#endif + +#if defined __BORLANDC__ +# define POSH_COMPILER_STRING "Borland C/C++" +# define POSH_COMPILER_BORLAND 1 +#endif + +#if defined __MWERKS__ +# define POSH_COMPILER_STRING "MetroWerks CodeWarrior" +# define POSH_COMPILER_METROWERKS 1 +#endif + +#if defined __DECC || defined __DECCXX +# define POSH_COMPILER_STRING "Compaq/DEC C/C++" +# define POSH_COMPILER_DEC 1 +#endif + +#if defined __WATCOMC__ +# define POSH_COMPILER_STRING "Watcom C/C++" +# define POSH_COMPILER_WATCOM 1 +#endif + +#if !defined POSH_COMPILER_STRING +# define POSH_COMPILER_STRING "Unknown compiler" +#endif + +/* +** ---------------------------------------------------------------------------- +** Determine target operating system +** ---------------------------------------------------------------------------- +*/ +#if defined linux || defined __linux__ +# define POSH_OS_LINUX 1 +# define POSH_OS_STRING "Linux" +#endif + +#if defined __FreeBSD__ +# define POSH_OS_FREEBSD 1 +# define POSH_OS_STRING "FreeBSD" +#endif + +#if defined __NetBSD__ +# define POSH_OS_NETBSD 1 +# define POSH_OS_STRING "NetBSD" +#endif + +#if defined __OpenBSD__ +# define POSH_OS_OPENBSD 1 +# define POSH_OS_STRING "OpenBSD" +#endif + +#if defined __CYGWIN32__ +# define POSH_OS_CYGWIN32 1 +# define POSH_OS_STRING "Cygwin" +#endif + +#if defined GEKKO +# define POSH_OS_GAMECUBE +# define __powerpc__ +# define POSH_OS_STRING "GameCube" +#endif + +#if defined __MINGW32__ +# define POSH_OS_MINGW 1 +# define POSH_OS_STRING "MinGW" +#endif + +#if defined GO32 && defined DJGPP && defined __MSDOS__ +# define POSH_OS_GO32 1 +# define POSH_OS_STRING "GO32/MS-DOS" +#endif + +/* NOTE: make sure you use /bt=DOS if compiling for 32-bit DOS, + otherwise Watcom assumes host=target */ +#if defined __WATCOMC__ && defined __386__ && defined __DOS__ +# define POSH_OS_DOS32 1 +# define POSH_OS_STRING "DOS/32-bit" +#endif + +#if defined _UNICOS +# define POSH_OS_UNICOS 1 +# define POSH_OS_STRING "UNICOS" +#endif + +#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx +# define POSH_OS_OSX 1 +# define POSH_OS_STRING "MacOS X" +#endif + +#if defined __sun__ || defined sun || defined __sun || defined __solaris__ +# if defined __SVR4 || defined __svr4__ || defined __solaris__ +# define POSH_OS_STRING "Solaris" +# define POSH_OS_SOLARIS 1 +# endif +# if !defined POSH_OS_STRING +# define POSH_OS_STRING "SunOS" +# define POSH_OS_SUNOS 1 +# endif +#endif + +#if defined __sgi__ || defined sgi || defined __sgi +# define POSH_OS_IRIX 1 +# define POSH_OS_STRING "Irix" +#endif + +#if defined __hpux__ || defined __hpux +# define POSH_OS_HPUX 1 +# define POSH_OS_STRING "HP-UX" +#endif + +#if defined _AIX +# define POSH_OS_AIX 1 +# define POSH_OS_STRING "AIX" +#endif + +#if ( defined __alpha && defined __osf__ ) +# define POSH_OS_TRU64 1 +# define POSH_OS_STRING "Tru64" +#endif + +#if defined __BEOS__ || defined __beos__ +# define POSH_OS_BEOS 1 +# define POSH_OS_STRING "BeOS" +#endif + +#if defined amiga || defined amigados || defined AMIGA || defined _AMIGA +# define POSH_OS_AMIGA 1 +# define POSH_OS_STRING "Amiga" +#endif + +#if defined __unix__ +# define POSH_OS_UNIX 1 +# if !defined POSH_OS_STRING +# define POSH_OS_STRING "Unix-like(generic)" +# endif +#endif + +#if defined _WIN32_WCE +# define POSH_OS_WINCE 1 +# define POSH_OS_STRING "Windows CE" +#endif + +#if defined _XBOX || defined _XBOX_VER +# define POSH_OS_XBOX 1 +# define POSH_OS_STRING "XBOX" +#endif + +#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__ +# define POSH_OS_WIN32 1 +# if !defined POSH_OS_XBOX +# if defined _WIN64 +# define POSH_OS_WIN64 1 +# define POSH_OS_STRING "Win64" +# else +# if !defined POSH_OS_STRING +# define POSH_OS_STRING "Win32" +# endif +# endif +# endif +#endif + +#if defined __palmos__ +# define POSH_OS_PALM 1 +# define POSH_OS_STRING "PalmOS" +#endif + +#if defined THINK_C || defined macintosh +# define POSH_OS_MACOS 1 +# define POSH_OS_STRING "MacOS" +#endif + +/* +** ----------------------------------------------------------------------------- +** Determine target CPU +** ----------------------------------------------------------------------------- +*/ + +#if defined GEKKO +# define POSH_CPU_PPC750 1 +# define POSH_CPU_STRING "IBM PowerPC 750 (NGC)" +#endif + +#if defined mc68000 || defined m68k || defined __MC68K__ || defined m68000 +# define POSH_CPU_68K 1 +# define POSH_CPU_STRING "MC68000" +#endif + +#if defined __PPC__ || defined __POWERPC__ || defined powerpc || defined _POWER || defined __ppc__ || defined __powerpc__ || defined _M_PPC +# define POSH_CPU_PPC 1 +# if !defined POSH_CPU_STRING +# if defined __powerpc64__ +# define POSH_CPU_PPC64 1 +# define POSH_CPU_STRING "PowerPC64" +# else +# define POSH_CPU_STRING "PowerPC" +# endif +# endif +#endif + +#if defined _CRAYT3E || defined _CRAYMPP +# define POSH_CPU_CRAYT3E 1 /* target processor is a DEC Alpha 21164 used in a Cray T3E*/ +# define POSH_CPU_STRING "Cray T3E (Alpha 21164)" +#endif + +#if defined CRAY || defined _CRAY && !defined _CRAYT3E +# error Non-AXP Cray systems not supported +#endif + +#if defined _SH3 +# define POSH_CPU_SH3 1 +# define POSH_CPU_STRING "Hitachi SH-3" +#endif + +#if defined __sh4__ || defined __SH4__ +# define POSH_CPU_SH3 1 +# define POSH_CPU_SH4 1 +# define POSH_CPU_STRING "Hitachi SH-4" +#endif + +#if defined __sparc__ || defined __sparc +# if defined __arch64__ || defined __sparcv9 || defined __sparc_v9__ +# define POSH_CPU_SPARC64 1 +# define POSH_CPU_STRING "Sparc/64" +# else +# define POSH_CPU_STRING "Sparc/32" +# endif +# define POSH_CPU_SPARC 1 +#endif + +#if defined ARM || defined __arm__ || defined _ARM +# define POSH_CPU_STRONGARM 1 +# define POSH_CPU_STRING "ARM" +#endif + +#if defined __aarch64__ +# define POSH_CPU_AARCH64 1 +# define POSH_CPU_STRING "ARM64" +#endif + +#if defined mips || defined __mips__ || defined __MIPS__ || defined _MIPS +# define POSH_CPU_MIPS 1 +# if defined _R5900 +# define POSH_CPU_STRING "MIPS R5900 (PS2)" +# else +# define POSH_CPU_STRING "MIPS" +# endif +#endif + +#if defined __ia64 || defined _M_IA64 || defined __ia64__ +# define POSH_CPU_IA64 1 +# define POSH_CPU_STRING "IA64" +#endif + +#if defined __X86__ || defined __i386__ || defined i386 || defined _M_IX86 || defined __386__ || defined __x86_64__ || defined _M_X64 +# define POSH_CPU_X86 1 +# if defined __x86_64__ || defined _M_X64 +# define POSH_CPU_X86_64 1 +# endif +# if defined POSH_CPU_X86_64 +# define POSH_CPU_STRING "AMD x86-64" +# else +# define POSH_CPU_STRING "Intel 386+" +# endif +#endif + +#if defined __alpha || defined alpha || defined _M_ALPHA || defined __alpha__ +# define POSH_CPU_AXP 1 +# define POSH_CPU_STRING "AXP" +#endif + +#if defined __hppa || defined hppa +# define POSH_CPU_HPPA 1 +# define POSH_CPU_STRING "PA-RISC" +#endif + +#if !defined POSH_CPU_STRING +# error POSH cannot determine target CPU +# define POSH_CPU_STRING "Unknown" /* this is here for Doxygen's benefit */ +#endif + +/* +** ----------------------------------------------------------------------------- +** Attempt to autodetect building for embedded on Sony PS2 +** ----------------------------------------------------------------------------- +*/ +#if !defined POSH_OS_STRING +# if !defined FORCE_DOXYGEN +# define POSH_OS_EMBEDDED 1 +# endif +# if defined _R5900 +# define POSH_OS_STRING "Sony PS2(embedded)" +# else +# define POSH_OS_STRING "Embedded/Unknown" +# endif +#endif + +/* +** --------------------------------------------------------------------------- +** Handle cdecl, stdcall, fastcall, etc. +** --------------------------------------------------------------------------- +*/ +#if defined POSH_CPU_X86 && !defined POSH_CPU_X86_64 +# if defined __GNUC__ +# define POSH_CDECL __attribute__((cdecl)) +# define POSH_STDCALL __attribute__((stdcall)) +# define POSH_FASTCALL __attribute__((fastcall)) +# elif ( defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__ || defined __MWERKS__ ) +# define POSH_CDECL __cdecl +# define POSH_STDCALL __stdcall +# define POSH_FASTCALL __fastcall +# endif +#else +# define POSH_CDECL +# define POSH_STDCALL +# define POSH_FASTCALL +#endif + +/* +** --------------------------------------------------------------------------- +** Define POSH_IMPORTEXPORT signature based on POSH_DLL and POSH_BUILDING_LIB +** --------------------------------------------------------------------------- +*/ + +/* +** We undefine this so that multiple inclusions will work +*/ +#if defined POSH_IMPORTEXPORT +# undef POSH_IMPORTEXPORT +#endif + +#if defined POSH_DLL +# if defined POSH_OS_WIN32 +# if defined _MSC_VER +# if ( _MSC_VER >= 800 ) +# if defined POSH_BUILDING_LIB +# define POSH_IMPORTEXPORT __declspec( dllexport ) +# else +# define POSH_IMPORTEXPORT __declspec( dllimport ) +# endif +# else +# if defined POSH_BUILDING_LIB +# define POSH_IMPORTEXPORT __export +# else +# define POSH_IMPORTEXPORT +# endif +# endif +# endif /* defined _MSC_VER */ +# if defined __BORLANDC__ +# if ( __BORLANDC__ >= 0x500 ) +# if defined POSH_BUILDING_LIB +# define POSH_IMPORTEXPORT __declspec( dllexport ) +# else +# define POSH_IMPORTEXPORT __declspec( dllimport ) +# endif +# else +# if defined POSH_BUILDING_LIB +# define POSH_IMPORTEXPORT __export +# else +# define POSH_IMPORTEXPORT +# endif +# endif +# endif /* defined __BORLANDC__ */ + /* for all other compilers, we're just making a blanket assumption */ +# if defined __GNUC__ || defined __WATCOMC__ || defined __MWERKS__ +# if defined POSH_BUILDING_LIB +# define POSH_IMPORTEXPORT __declspec( dllexport ) +# else +# define POSH_IMPORTEXPORT __declspec( dllimport ) +# endif +# endif /* all other compilers */ +# if !defined POSH_IMPORTEXPORT +# error Building DLLs not supported on this compiler (poshlib@poshlib.org if you know how) +# endif +# endif /* defined POSH_OS_WIN32 */ +#endif + +/* On pretty much everything else, we can thankfully just ignore this */ +#if !defined POSH_IMPORTEXPORT +# define POSH_IMPORTEXPORT +#endif + +#if defined FORCE_DOXYGEN +# define POSH_DLL +# define POSH_BUILDING_LIB +# undef POSH_DLL +# undef POSH_BUILDING_LIB +#endif + +/* +** ---------------------------------------------------------------------------- +** (Re)define POSH_PUBLIC_API export signature +** ---------------------------------------------------------------------------- +*/ +#ifdef POSH_PUBLIC_API +# undef POSH_PUBLIC_API +#endif + +#if ( ( defined _MSC_VER ) && ( _MSC_VER < 800 ) ) || ( defined __BORLANDC__ && ( __BORLANDC__ < 0x500 ) ) +# define POSH_PUBLIC_API(rtype) extern rtype POSH_IMPORTEXPORT +#else +# define POSH_PUBLIC_API(rtype) extern POSH_IMPORTEXPORT rtype +#endif + +/* +** ---------------------------------------------------------------------------- +** Try to infer endianess. Basically we just go through the CPUs we know are +** little endian, and assume anything that isn't one of those is big endian. +** As a sanity check, we also do this with operating systems we know are +** little endian, such as Windows. Some processors are bi-endian, such as +** the MIPS series, so we have to be careful about those. +** ---------------------------------------------------------------------------- +*/ +#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_CPU_AARCH64 || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__ || defined __ORDER_LITTLE_ENDIAN__ +# define POSH_ENDIAN_STRING "little" +# define POSH_LITTLE_ENDIAN 1 +#else +# define POSH_ENDIAN_STRING "big" +# define POSH_BIG_ENDIAN 1 +#endif + +#if defined FORCE_DOXYGEN +# define POSH_LITTLE_ENDIAN +#endif + +/* +** ---------------------------------------------------------------------------- +** Cross-platform compile time assertion macro +** ---------------------------------------------------------------------------- +*/ +#define POSH_COMPILE_TIME_ASSERT(name, x) typedef int _POSH_dummy_ ## name[(x) ? 1 : -1 ] + +/* +** ---------------------------------------------------------------------------- +** 64-bit Integer +** +** We don't require 64-bit support, nor do we emulate its functionality, we +** simply export it if it's available. Since we can't count on +** for 64-bit support, we ignore the POSH_USE_LIMITS_H directive. +** ---------------------------------------------------------------------------- +*/ +#if defined ( __LP64__ ) || defined ( __powerpc64__ ) || defined POSH_CPU_SPARC64 +# define POSH_64BIT_INTEGER 1 +typedef long posh_i64_t; +typedef unsigned long posh_u64_t; +# define POSH_I64( x ) ((posh_i64_t)x) +# define POSH_U64( x ) ((posh_u64_t)x) +# define POSH_I64_PRINTF_PREFIX "l" +#elif defined _MSC_VER || defined __BORLANDC__ || defined __WATCOMC__ || ( defined __alpha && defined __DECC ) +# define POSH_64BIT_INTEGER 1 +typedef __int64 posh_i64_t; +typedef unsigned __int64 posh_u64_t; +# define POSH_I64( x ) ((posh_i64_t)(x##i64)) +# define POSH_U64( x ) ((posh_u64_t)(x##ui64)) +# define POSH_I64_PRINTF_PREFIX "I64" +#elif defined __GNUC__ || defined __MWERKS__ || defined __SUNPRO_C || defined __SUNPRO_CC || defined __APPLE_CC__ || defined POSH_OS_IRIX || defined _LONG_LONG || defined _CRAYC +# define POSH_64BIT_INTEGER 1 +typedef long long posh_i64_t; +typedef unsigned long long posh_u64_t; +# define POSH_U64( x ) ((posh_u64_t)(x##LL)) +# define POSH_I64( x ) ((posh_i64_t)(x##LL)) +# define POSH_I64_PRINTF_PREFIX "ll" +#endif + +/* hack */ +/*#ifdef __MINGW32__ +#undef POSH_I64 +#undef POSH_U64 +#undef POSH_I64_PRINTF_PREFIX +#define POSH_I64( x ) ((posh_i64_t)x) +#define POSH_U64( x ) ((posh_u64_t)x) +#define POSH_I64_PRINTF_PREFIX "I64" +#endif*/ + +#ifdef FORCE_DOXYGEN +typedef long long posh_i64_t; +typedef unsigned long posh_u64_t; +# define POSH_64BIT_INTEGER +# define POSH_I64_PRINTF_PREFIX +# define POSH_I64(x) +# define POSH_U64(x) +#endif + +/** Minimum value for a 64-bit signed integer */ +#define POSH_I64_MIN POSH_I64(0x8000000000000000) +/** Maximum value for a 64-bit signed integer */ +#define POSH_I64_MAX POSH_I64(0x7FFFFFFFFFFFFFFF) +/** Minimum value for a 64-bit unsigned integer */ +#define POSH_U64_MIN POSH_U64(0) +/** Maximum value for a 64-bit unsigned integer */ +#define POSH_U64_MAX POSH_U64(0xFFFFFFFFFFFFFFFF) + +/* ---------------------------------------------------------------------------- +** Basic Sized Types +** +** These types are expected to be EXACTLY sized so you can use them for +** serialization. +** ---------------------------------------------------------------------------- +*/ +#define POSH_FALSE 0 +#define POSH_TRUE 1 + +typedef int posh_bool_t; +typedef unsigned char posh_byte_t; + +/* NOTE: These assume that CHAR_BIT is 8!! */ +typedef unsigned char posh_u8_t; +typedef signed char posh_i8_t; + +#if defined POSH_USE_LIMITS_H +# if CHAR_BITS > 8 +# error This machine uses 9-bit characters. This is a warning, you can comment this out now. +# endif /* CHAR_BITS > 8 */ + +/* 16-bit */ +# if ( USHRT_MAX == 65535 ) + typedef unsigned short posh_u16_t; + typedef short posh_i16_t; +# else + /* Yes, in theory there could still be a 16-bit character type and shorts are + 32-bits in size...if you find such an architecture, let me know =P */ +# error No 16-bit type found +# endif + +/* 32-bit */ +# if ( INT_MAX == 2147483647 ) + typedef unsigned posh_u32_t; + typedef int posh_i32_t; +# elif ( LONG_MAX == 2147483647 ) + typedef unsigned long posh_u32_t; + typedef long posh_i32_t; +# else + error No 32-bit type found +# endif + +#else /* POSH_USE_LIMITS_H */ + + typedef unsigned short posh_u16_t; + typedef short posh_i16_t; + +# if !defined POSH_OS_PALM + typedef unsigned posh_u32_t; + typedef int posh_i32_t; +# else + typedef unsigned long posh_u32_t; + typedef long posh_i32_t; +# endif +#endif + +/** Minimum value for a byte */ +#define POSH_BYTE_MIN 0 +/** Maximum value for an 8-bit unsigned value */ +#define POSH_BYTE_MAX 255 +/** Minimum value for a byte */ +#define POSH_I16_MIN ( ( posh_i16_t ) 0x8000 ) +/** Maximum value for a 16-bit signed value */ +#define POSH_I16_MAX ( ( posh_i16_t ) 0x7FFF ) +/** Minimum value for a 16-bit unsigned value */ +#define POSH_U16_MIN 0 +/** Maximum value for a 16-bit unsigned value */ +#define POSH_U16_MAX ( ( posh_u16_t ) 0xFFFF ) +/** Minimum value for a 32-bit signed value */ +#define POSH_I32_MIN ( ( posh_i32_t ) 0x80000000 ) +/** Maximum value for a 32-bit signed value */ +#define POSH_I32_MAX ( ( posh_i32_t ) 0x7FFFFFFF ) +/** Minimum value for a 32-bit unsigned value */ +#define POSH_U32_MIN 0 +/** Maximum value for a 32-bit unsigned value */ +#define POSH_U32_MAX ( ( posh_u32_t ) 0xFFFFFFFF ) + +/* +** ---------------------------------------------------------------------------- +** Sanity checks on expected sizes +** ---------------------------------------------------------------------------- +*/ +#if !defined FORCE_DOXYGEN + +POSH_COMPILE_TIME_ASSERT(posh_byte_t, sizeof(posh_byte_t) == 1); +POSH_COMPILE_TIME_ASSERT(posh_u8_t, sizeof(posh_u8_t) == 1); +POSH_COMPILE_TIME_ASSERT(posh_i8_t, sizeof(posh_i8_t) == 1); +POSH_COMPILE_TIME_ASSERT(posh_u16_t, sizeof(posh_u16_t) == 2); +POSH_COMPILE_TIME_ASSERT(posh_i16_t, sizeof(posh_i16_t) == 2); +POSH_COMPILE_TIME_ASSERT(posh_u32_t, sizeof(posh_u32_t) == 4); +POSH_COMPILE_TIME_ASSERT(posh_i32_t, sizeof(posh_i32_t) == 4); + +#if !defined POSH_NO_FLOAT + POSH_COMPILE_TIME_ASSERT(posh_testfloat_t, sizeof(float)==4 ); + POSH_COMPILE_TIME_ASSERT(posh_testdouble_t, sizeof(double)==8); +#endif + +#if defined POSH_64BIT_INTEGER + POSH_COMPILE_TIME_ASSERT(posh_u64_t, sizeof(posh_u64_t) == 8); + POSH_COMPILE_TIME_ASSERT(posh_i64_t, sizeof(posh_i64_t) == 8); +#endif + +#endif + +/* +** ---------------------------------------------------------------------------- +** 64-bit pointer support +** ---------------------------------------------------------------------------- +*/ +#if defined POSH_CPU_AXP && ( defined POSH_OS_TRU64 || defined POSH_OS_LINUX ) +# define POSH_64BIT_POINTER 1 +#endif + +#if defined POSH_CPU_X86_64 && defined POSH_OS_LINUX +# define POSH_64BIT_POINTER 1 +#endif + +#if defined POSH_CPU_SPARC64 || defined POSH_OS_WIN64 || defined __64BIT__ || defined __LP64 || defined _LP64 || defined __LP64__ || defined _ADDR64 || defined _CRAYC +# define POSH_64BIT_POINTER 1 +#endif + +#if defined POSH_64BIT_POINTER + POSH_COMPILE_TIME_ASSERT( posh_64bit_pointer, sizeof( void * ) == 8 ); +#elif !defined FORCE_DOXYGEN +/* if this assertion is hit then you're on a system that either has 64-bit + addressing and we didn't catch it, or you're on a system with 16-bit + pointers. In the latter case, POSH doesn't actually care, we're just + triggering this assertion to make sure you're aware of the situation, + so feel free to delete it. + + If this assertion is triggered on a known 32 or 64-bit platform, + please let us know (poshlib@poshlib.org) */ + POSH_COMPILE_TIME_ASSERT( posh_32bit_pointer, sizeof( void * ) == 4 ); +#endif + +#if defined FORCE_DOXYGEN +# define POSH_64BIT_POINTER +#endif + +/* +** ---------------------------------------------------------------------------- +** POSH Utility Functions +** +** These are optional POSH utility functions that are not required if you don't +** need anything except static checking of your host and target environment. +** +** These functions are NOT wrapped with POSH_PUBLIC_API because I didn't want +** to enforce their export if your own library is only using them internally. +** ---------------------------------------------------------------------------- +*/ +#ifdef __cplusplus +extern "C" { +#endif + +const char *POSH_GetArchString( void ); + +#if !defined POSH_NO_FLOAT + +posh_u32_t POSH_LittleFloatBits( float f ); +posh_u32_t POSH_BigFloatBits( float f ); +float POSH_FloatFromLittleBits( posh_u32_t bits ); +float POSH_FloatFromBigBits( posh_u32_t bits ); + +void POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] ); +double POSH_DoubleFromBits( const posh_byte_t src[ 8 ] ); + +/* unimplemented +float *POSH_WriteFloatToLittle( void *dst, float f ); +float *POSH_WriteFloatToBig( void *dst, float f ); +float POSH_ReadFloatFromLittle( const void *src ); +float POSH_ReadFloatFromBig( const void *src ); + +double *POSH_WriteDoubleToLittle( void *dst, double d ); +double *POSH_WriteDoubleToBig( void *dst, double d ); +double POSH_ReadDoubleFromLittle( const void *src ); +double POSH_ReadDoubleFromBig( const void *src ); +*/ +#endif /* !defined POSH_NO_FLOAT */ + +#if defined FORCE_DOXYGEN +# define POSH_NO_FLOAT +# undef POSH_NO_FLOAT +#endif + +extern posh_u16_t POSH_SwapU16( posh_u16_t u ); +extern posh_i16_t POSH_SwapI16( posh_i16_t u ); +extern posh_u32_t POSH_SwapU32( posh_u32_t u ); +extern posh_i32_t POSH_SwapI32( posh_i32_t u ); + +#if defined POSH_64BIT_INTEGER + +extern posh_u64_t POSH_SwapU64( posh_u64_t u ); +extern posh_i64_t POSH_SwapI64( posh_i64_t u ); + +#endif /*POSH_64BIT_INTEGER */ + +extern posh_u16_t *POSH_WriteU16ToLittle( void *dst, posh_u16_t value ); +extern posh_i16_t *POSH_WriteI16ToLittle( void *dst, posh_i16_t value ); +extern posh_u32_t *POSH_WriteU32ToLittle( void *dst, posh_u32_t value ); +extern posh_i32_t *POSH_WriteI32ToLittle( void *dst, posh_i32_t value ); + +extern posh_u16_t *POSH_WriteU16ToBig( void *dst, posh_u16_t value ); +extern posh_i16_t *POSH_WriteI16ToBig( void *dst, posh_i16_t value ); +extern posh_u32_t *POSH_WriteU32ToBig( void *dst, posh_u32_t value ); +extern posh_i32_t *POSH_WriteI32ToBig( void *dst, posh_i32_t value ); + +extern posh_u16_t POSH_ReadU16FromLittle( const void *src ); +extern posh_i16_t POSH_ReadI16FromLittle( const void *src ); +extern posh_u32_t POSH_ReadU32FromLittle( const void *src ); +extern posh_i32_t POSH_ReadI32FromLittle( const void *src ); + +extern posh_u16_t POSH_ReadU16FromBig( const void *src ); +extern posh_i16_t POSH_ReadI16FromBig( const void *src ); +extern posh_u32_t POSH_ReadU32FromBig( const void *src ); +extern posh_i32_t POSH_ReadI32FromBig( const void *src ); + +#if defined POSH_64BIT_INTEGER +extern posh_u64_t *POSH_WriteU64ToLittle( void *dst, posh_u64_t value ); +extern posh_i64_t *POSH_WriteI64ToLittle( void *dst, posh_i64_t value ); +extern posh_u64_t *POSH_WriteU64ToBig( void *dst, posh_u64_t value ); +extern posh_i64_t *POSH_WriteI64ToBig( void *dst, posh_i64_t value ); + +extern posh_u64_t POSH_ReadU64FromLittle( const void *src ); +extern posh_i64_t POSH_ReadI64FromLittle( const void *src ); +extern posh_u64_t POSH_ReadU64FromBig( const void *src ); +extern posh_i64_t POSH_ReadI64FromBig( const void *src ); +#endif /* POSH_64BIT_INTEGER */ + +#if defined POSH_LITTLE_ENDIAN + +# define POSH_LittleU16(x) (x) +# define POSH_LittleU32(x) (x) +# define POSH_LittleI16(x) (x) +# define POSH_LittleI32(x) (x) +# if defined POSH_64BIT_INTEGER +# define POSH_LittleU64(x) (x) +# define POSH_LittleI64(x) (x) +# endif /* defined POSH_64BIT_INTEGER */ + +# define POSH_BigU16(x) POSH_SwapU16(x) +# define POSH_BigU32(x) POSH_SwapU32(x) +# define POSH_BigI16(x) POSH_SwapI16(x) +# define POSH_BigI32(x) POSH_SwapI32(x) +# if defined POSH_64BIT_INTEGER +# define POSH_BigU64(x) POSH_SwapU64(x) +# define POSH_BigI64(x) POSH_SwapI64(x) +# endif /* defined POSH_64BIT_INTEGER */ + +#else + +# define POSH_BigU16(x) (x) +# define POSH_BigU32(x) (x) +# define POSH_BigI16(x) (x) +# define POSH_BigI32(x) (x) + +# if defined POSH_64BIT_INTEGER +# define POSH_BigU64(x) (x) +# define POSH_BigI64(x) (x) +# endif /* POSH_64BIT_INTEGER */ + +# define POSH_LittleU16(x) POSH_SwapU16(x) +# define POSH_LittleU32(x) POSH_SwapU32(x) +# define POSH_LittleI16(x) POSH_SwapI16(x) +# define POSH_LittleI32(x) POSH_SwapI32(x) + +# if defined POSH_64BIT_INTEGER +# define POSH_LittleU64(x) POSH_SwapU64(x) +# define POSH_LittleI64(x) POSH_SwapI64(x) +# endif /* POSH_64BIT_INTEGER */ + +#endif + +#ifdef __cplusplus +} +#endif Index: ps/trunk/libraries/source/nvtt/src/extern/poshlib/posh.c =================================================================== --- ps/trunk/libraries/source/nvtt/src/extern/poshlib/posh.c +++ ps/trunk/libraries/source/nvtt/src/extern/poshlib/posh.c @@ -0,0 +1,1006 @@ +/* +LICENSE: + +Copyright (c) 2004, Brian Hook +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * The names of this package'ss contributors contributors may not + be used to endorse or promote products derived from this + software without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/** + @file posh.c + @author Brian Hook + @date 2002 + @brief Portable Open Source Harness primary source file +*/ +#include "posh.h" + +#if !defined FORCE_DOXYGEN + +#if !defined POSH_NO_FLOAT +# define POSH_FLOAT_STRING "enabled" +#else +# define POSH_FLOAT_STRING "disabled" +#endif + +#if defined POSH_64BIT_INTEGER +# define POSH_64BIT_INTEGER_STRING "yes" +#else +# define POSH_64BIT_INTEGER_STRING "no" +#endif + +#if defined POSH_64BIT_POINTER +# define POSH_POINTER_STRING "64-bits" +#else +# define POSH_POINTER_STRING "32-bits" +#endif + +#if defined POSH_LITTLE_ENDIAN +# define IS_BIG_ENDIAN 0 + +# define NATIVE16 POSH_LittleU16 +# define NATIVE32 POSH_LittleU32 +# define NATIVE64 POSH_LittleU64 +# define FOREIGN16 POSH_BigU16 +# define FOREIGN32 POSH_BigU32 +# define FOREIGN64 POSH_BigU64 +#else +# define IS_BIG_ENDIAN 1 + +# define NATIVE16 POSH_BigU16 +# define NATIVE32 POSH_BigU32 +# define NATIVE64 POSH_BigU64 +# define FOREIGN16 POSH_LittleU16 +# define FOREIGN32 POSH_LittleU32 +# define FOREIGN64 POSH_LittleU64 +#endif /* POSH_LITTLE_ENDIAN */ + +static +int +s_testBigEndian( void ) +{ + union + { + posh_byte_t c[ 4 ]; + posh_u32_t i; + } u; + + u.i= 1; + + if ( u.c[ 0 ] == 1 ) + { + return 0; + } + return 1; +} + +static +const char * +s_testSerialization( void ) +{ + posh_byte_t serbuf[ 8 ]; + posh_u16_t tmp16; + posh_u32_t tmp32; + + /* 16-bit serialization */ + POSH_WriteU16ToLittle( serbuf, 0xABCD ); + if ( ( tmp16 = POSH_ReadU16FromLittle( serbuf ) ) != 0xABCD ) + { + return "*ERROR: failed little-endian 16-bit serialization test"; + } + + POSH_WriteU16ToBig( serbuf, 0xABCD ); + if ( ( tmp16 = POSH_ReadU16FromBig( serbuf ) ) != 0xABCD ) + { + return "*ERROR: failed big-endian 16-bit serialization test"; + } + + /* 32-bit serialization */ + POSH_WriteU32ToLittle( serbuf, 0xABCD1234L ); + if ( ( tmp32 = POSH_ReadU32FromLittle( serbuf ) ) != 0xABCD1234 ) + { + return "*ERROR: failed little-endian 32-bit serialization test"; + } + + POSH_WriteU32ToBig( serbuf, 0xABCD1234L ); + if ( ( tmp32 = POSH_ReadU32FromBig( serbuf ) ) != 0xABCD1234 ) + { + return "*ERROR: failed big-endian 32-bit serialization test"; + } + +#if defined POSH_64BIT_INTEGER + { +#define REF64 POSH_U64(0xFEDCBA9876543210) + + posh_u64_t tmp64; + + POSH_WriteU64ToLittle( serbuf, REF64 ); + + if ( ( tmp64 = POSH_ReadU64FromLittle( serbuf ) ) != REF64 ) + { + return "*ERROR: failed little-endian 64-bit serialization test"; + } + + POSH_WriteU64ToBig( serbuf, REF64 ); + + if ( ( tmp64 = POSH_ReadU64FromBig( serbuf ) ) != REF64 ) + { + return "*ERROR: failed big-endian 64-bit serialization test"; + } + } +#endif + + return 0; +} + +#if !defined POSH_NO_FLOAT +static +const char * +s_testFloatingPoint( void ) +{ + float fRef = 10.0f/30.0f; + double dRef = 10.0/30.0; + posh_byte_t dbuf[ 8 ]; + float fTmp; + double dTmp; + + fTmp = POSH_FloatFromLittleBits( POSH_LittleFloatBits( fRef ) ); + + if ( fTmp != fRef ) + { + return "*ERROR: POSH little endian floating point conversion failed. Please report this to poshlib@poshlib.org!\n"; + } + + fTmp = POSH_FloatFromBigBits( POSH_BigFloatBits( fRef ) ); + if ( fTmp != fRef ) + { + return "*ERROR: POSH big endian floating point conversion failed. Please report this to poshlib@poshlib.org!\n"; + } + + POSH_DoubleBits( dRef, dbuf ); + + dTmp = POSH_DoubleFromBits( dbuf ); + + if ( dTmp != dRef ) + { + return "*ERROR: POSH double precision floating point serialization failed. Please report this to poshlib@poshlib.org!\n"; + } + + return 0; +} +#endif /* !defined POSH_NO_FLOAT */ + +static +const char * +s_testEndianess( void ) +{ + /* check endianess */ + if ( s_testBigEndian() != IS_BIG_ENDIAN ) + { + return "*ERROR: POSH compile time endianess does not match run-time endianess verification. Please report this to poshlib@poshlib.org!\n"; + } + + /* make sure our endian swap routines work */ + if ( ( NATIVE32( 0x11223344L ) != 0x11223344L ) || + ( FOREIGN32( 0x11223344L ) != 0x44332211L ) || + ( NATIVE16( 0x1234 ) != 0x1234 ) || + ( FOREIGN16( 0x1234 ) != 0x3412 ) ) + { + return "*ERROR: POSH endianess macro selection failed. Please report this to poshlib@poshlib.org!\n"; + } + + /* test serialization routines */ + + return 0; +} +#endif /* !defined FORCE_DOXYGEN */ + +/** + Returns a string describing this platform's basic attributes. + + POSH_GetArchString() reports on an architecture's statically determined + attributes. In addition, it will perform run-time verification checks + to make sure the various platform specific functions work. If an error + occurs, please contact me at poshlib@poshlib.org so we can try to resolve + what the specific failure case is. + @returns a string describing this platform on success, or a string in the + form "*ERROR: [text]" on failure. You can simply check to see if + the first character returned is '*' to verify an error condition. +*/ +const char * +POSH_GetArchString( void ) +{ + const char *err; + const char *s = "OS:.............."POSH_OS_STRING"\n" + "CPU:............."POSH_CPU_STRING"\n" + "endian:.........."POSH_ENDIAN_STRING"\n" + "ptr size:........"POSH_POINTER_STRING"\n" + "64-bit ints......"POSH_64BIT_INTEGER_STRING"\n" + "floating point..."POSH_FLOAT_STRING"\n" + "compiler........."POSH_COMPILER_STRING"\n"; + + /* test endianess */ + err = s_testEndianess(); + + if ( err != 0 ) + { + return err; + } + + /* test serialization */ + err = s_testSerialization(); + + if ( err != 0 ) + { + return err; + } + +#if !defined POSH_NO_FLOAT + /* check that our floating point support is correct */ + err = s_testFloatingPoint(); + + if ( err != 0 ) + { + return err; + } + +#endif + + return s; +} + +/* ---------------------------------------------------------------------------*/ +/* BYTE SWAPPING SUPPORT */ +/* ---------------------------------------------------------------------------*/ +/** + * Byte swaps a 16-bit unsigned value + * + @ingroup ByteSwapFunctions + @param v [in] unsigned 16-bit input value to swap + @returns a byte swapped version of v + */ +posh_u16_t +POSH_SwapU16( posh_u16_t v ) +{ + posh_u16_t swapped; + + swapped = v << 8; + swapped |= v >> 8; + + return swapped; +} + +/** + * Byte swaps a 16-bit signed value + * + @ingroup ByteSwapFunctions + @param v [in] signed 16-bit input value to swap + @returns a byte swapped version of v + @remarks This just calls back to the unsigned version, since byte swapping + is independent of sign. However, we still provide this function to + avoid signed/unsigned mismatch compiler warnings. + */ +posh_i16_t +POSH_SwapI16( posh_i16_t v ) +{ + return ( posh_i16_t ) POSH_SwapU16( v ); +} + +/** + * Byte swaps a 32-bit unsigned value + * + @ingroup ByteSwapFunctions + @param v [in] unsigned 32-bit input value to swap + @returns a byte swapped version of v + */ +posh_u32_t +POSH_SwapU32( posh_u32_t v ) +{ + posh_u32_t swapped; + + swapped = ( v & 0xFF ) << 24; + swapped |= ( v & 0xFF00 ) << 8; + swapped |= ( v >> 8 ) & 0xFF00; + swapped |= ( v >> 24 ); + + return swapped; +} + +/** + * Byte swaps a 32-bit signed value + * + @ingroup ByteSwapFunctions + @param v [in] signed 32-bit input value to swap + @returns a byte swapped version of v + @remarks This just calls back to the unsigned version, since byte swapping + is independent of sign. However, we still provide this function to + avoid signed/unsigned mismatch compiler warnings. + */ +posh_i32_t +POSH_SwapI32( posh_i32_t v ) +{ + return ( posh_i32_t ) POSH_SwapU32( ( posh_u32_t ) v ); +} + +#if defined POSH_64BIT_INTEGER +/** + * Byte swaps a 64-bit unsigned value + + @param v [in] a 64-bit input value to swap + @ingroup SixtyFourBit + @returns a byte swapped version of v +*/ +posh_u64_t +POSH_SwapU64( posh_u64_t v ) +{ + posh_byte_t tmp; + union { + posh_byte_t bytes[ 8 ]; + posh_u64_t u64; + } u; + + u.u64 = v; + + tmp = u.bytes[ 0 ]; u.bytes[ 0 ] = u.bytes[ 7 ]; u.bytes[ 7 ] = tmp; + tmp = u.bytes[ 1 ]; u.bytes[ 1 ] = u.bytes[ 6 ]; u.bytes[ 6 ] = tmp; + tmp = u.bytes[ 2 ]; u.bytes[ 2 ] = u.bytes[ 5 ]; u.bytes[ 5 ] = tmp; + tmp = u.bytes[ 3 ]; u.bytes[ 3 ] = u.bytes[ 4 ]; u.bytes[ 4 ] = tmp; + + return u.u64; +} + +/** + * Byte swaps a 64-bit signed value + + @param v [in] a 64-bit input value to swap + @ingroup SixtyFourBit + @returns a byte swapped version of v +*/ +posh_i64_t +POSH_SwapI64( posh_i64_t v ) +{ + return ( posh_i64_t ) POSH_SwapU64( ( posh_u64_t ) v ); +} + +#endif /* defined POSH_64BIT_INTEGER */ + +/* ---------------------------------------------------------------------------*/ +/* IN-MEMORY SERIALIZATION */ +/* ---------------------------------------------------------------------------*/ + +/** + * Writes an unsigned 16-bit value to a little endian buffer + + @ingroup MemoryBuffer + @param dst [out] pointer to the destination buffer, may not be NULL. Alignment doesn't matter. + @param value [in] host-endian unsigned 16-bit value + @returns a pointer to the location two bytes after dst + @remarks does no validation of the inputs +*/ +posh_u16_t * +POSH_WriteU16ToLittle( void *dst, posh_u16_t value ) +{ + posh_u16_t *p16 = ( posh_u16_t * ) dst; + posh_byte_t *p = ( posh_byte_t * ) dst; + + p[ 0 ] = value & 0xFF; + p[ 1 ] = ( value & 0xFF00) >> 8; + + return p16 + 1; +} + +/** + * Writes a signed 16-bit value to a little endian buffer + + @ingroup MemoryBuffer + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian signed 16-bit value + @returns a pointer to the location two bytes after dst + @remarks does no validation of the inputs. This simply calls + POSH_WriteU16ToLittle() with appropriate casting. +*/ +posh_i16_t * +POSH_WriteI16ToLittle( void *dst, posh_i16_t value ) +{ + return ( posh_i16_t * ) POSH_WriteU16ToLittle( dst, ( posh_u16_t ) value ); +} + +/** + * Writes an unsigned 32-bit value to a little endian buffer + + @ingroup MemoryBuffer + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian signed 32-bit value + @returns a pointer to the location four bytes after dst + @remarks does no validation of the inputs. +*/ +posh_u32_t * +POSH_WriteU32ToLittle( void *dst, posh_u32_t value ) +{ + posh_u32_t *p32 = ( posh_u32_t * ) dst; + posh_byte_t *p = ( posh_byte_t * ) dst; + + p[ 0 ] = ( value & 0xFF ); + p[ 1 ] = ( value & 0xFF00 ) >> 8; + p[ 2 ] = ( value & 0xFF0000 ) >> 16; + p[ 3 ] = ( value & 0xFF000000 ) >> 24; + + return p32 + 1; +} + +/** + * Writes a signed 32-bit value to a little endian buffer + + @ingroup MemoryBuffer + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian signed 32-bit value + @returns a pointer to the location four bytes after dst + @remarks does no validation of the inputs. This simply calls + POSH_WriteU32ToLittle() with appropriate casting. +*/ +posh_i32_t * +POSH_WriteI32ToLittle( void *dst, posh_i32_t value ) +{ + return ( posh_i32_t * ) POSH_WriteU32ToLittle( dst, ( posh_u32_t ) value ); +} + +/** + * Writes an unsigned 16-bit value to a big endian buffer + + @ingroup MemoryBuffer + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian unsigned 16-bit value + @returns a pointer to the location two bytes after dst + @remarks does no validation of the inputs +*/ +posh_u16_t * +POSH_WriteU16ToBig( void *dst, posh_u16_t value ) +{ + posh_u16_t *p16 = ( posh_u16_t * ) dst; + posh_byte_t *p = ( posh_byte_t * ) dst; + + p[ 1 ] = ( value & 0xFF ); + p[ 0 ] = ( value & 0xFF00 ) >> 8; + + return p16 + 1; +} + +/** + * Writes a signed 16-bit value to a big endian buffer + + @ingroup MemoryBuffer + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian signed 16-bit value + @returns a pointer to the location two bytes after dst + @remarks does no validation of the inputs. This simply calls + POSH_WriteU16ToLittle() with appropriate casting. +*/ +posh_i16_t * +POSH_WriteI16ToBig( void *dst, posh_i16_t value ) +{ + return ( posh_i16_t * ) POSH_WriteU16ToBig( dst, ( posh_u16_t ) value ); +} + +/** + * Writes an unsigned 32-bit value to a big endian buffer + + @ingroup MemoryBuffer + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian unsigned 32-bit value + @returns a pointer to the location four bytes after dst + @remarks does no validation of the inputs. +*/ +posh_u32_t * +POSH_WriteU32ToBig( void *dst, posh_u32_t value ) +{ + posh_u32_t *p32 = ( posh_u32_t * ) dst; + posh_byte_t *p = ( posh_byte_t * ) dst; + + p[ 3 ] = ( value & 0xFF ); + p[ 2 ] = ( value & 0xFF00 ) >> 8; + p[ 1 ] = ( value & 0xFF0000 ) >> 16; + p[ 0 ] = ( value & 0xFF000000 ) >> 24; + + return p32 + 1; +} + +/** + * Writes a signed 32-bit value to a big endian buffer + + @ingroup MemoryBuffer + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian signed 32-bit value + @returns a pointer to the location four bytes after dst + @remarks does no validation of the inputs. This simply calls + POSH_WriteU32ToBig() with appropriate casting. +*/ +posh_i32_t * +POSH_WriteI32ToBig( void *dst, posh_i32_t value ) +{ + return ( posh_i32_t * ) POSH_WriteU32ToBig( dst, ( posh_u32_t ) value ); +} + +#if defined POSH_64BIT_INTEGER +/** + * Writes an unsigned 64-bit value to a little-endian buffer + + @ingroup SixtyFourBit + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian unsigned 64-bit value + @returns a pointer to the location eight bytes after dst + @remarks does no validation of the inputs. +*/ +posh_u64_t * +POSH_WriteU64ToLittle( void *dst, posh_u64_t value ) +{ + posh_u64_t *p64 = ( posh_u64_t * ) dst; + posh_byte_t *p = ( posh_byte_t * ) dst; + int i; + + for ( i = 0; i < 8; i++, value >>= 8 ) + { + p[ i ] = ( posh_byte_t ) ( value & 0xFF ); + } + + return p64 + 1; +} + +/** + * Writes a signed 64-bit value to a little-endian buffer + + @ingroup SixtyFourBit + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian unsigned 64-bit value + @returns a pointer to the location eight bytes after dst + @remarks does no validation of the inputs. +*/ +posh_i64_t * +POSH_WriteI64ToLittle( void *dst, posh_i64_t value ) +{ + return ( posh_i64_t * ) POSH_WriteU64ToLittle( dst, ( posh_u64_t ) value ); +} + +/** + * Writes an unsigned 64-bit value to a big-endian buffer + + @ingroup SixtyFourBit + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian unsigned 64-bit value + @returns a pointer to the location eight bytes after dst + @remarks does no validation of the inputs. +*/ +posh_u64_t * +POSH_WriteU64ToBig( void *dst, posh_u64_t value ) +{ + posh_u64_t *p64 = ( posh_u64_t * ) dst; + posh_byte_t *p = ( posh_byte_t * ) dst; + int i; + + for ( i = 0; i < 8; i++, value >>= 8 ) + { + p[ 7-i ] = ( posh_byte_t ) ( value & 0xFF ); + } + + return p64 + 8; +} + +/** + * Writes a signed 64-bit value to a big-endian buffer + + @ingroup SixtyFourBit + @param dst [out] pointer to the destination buffer, may not be NULL + @param value [in] host-endian signed 64-bit value + @returns a pointer to the location eight bytes after dst + @remarks does no validation of the inputs. +*/ +posh_i64_t * +POSH_WriteI64ToBig( void *dst, posh_i64_t value ) +{ + return ( posh_i64_t * ) POSH_WriteU64ToBig( dst, ( posh_u64_t ) value ); +} + +#endif /* POSH_64BIT_INTEGER */ + +/* ---------------------------------------------------------------------------*/ +/* IN-MEMORY DESERIALIZATION */ +/* ---------------------------------------------------------------------------*/ + +/** + * Reads an unsigned 16-bit value from a little-endian buffer + @ingroup MemoryBuffer + @param src [in] source buffer + @returns host-endian unsigned 16-bit value +*/ +posh_u16_t +POSH_ReadU16FromLittle( const void *src ) +{ + posh_u16_t v = 0; + posh_byte_t *p = ( posh_byte_t * ) src; + + v |= p[ 0 ]; + v |= ( ( posh_u16_t ) p[ 1 ] ) << 8; + + return v; +} + +/** + * Reads a signed 16-bit value from a little-endian buffer + @ingroup MemoryBuffer + @param src [in] source buffer + @returns host-endian signed 16-bit value +*/ +posh_i16_t +POSH_ReadI16FromLittle( const void *src ) +{ + return ( posh_i16_t ) POSH_ReadU16FromLittle( src ); +} + +/** + * Reads an unsigned 32-bit value from a little-endian buffer + @ingroup MemoryBuffer + @param src [in] source buffer + @returns host-endian unsigned 32-bit value +*/ +posh_u32_t +POSH_ReadU32FromLittle( const void *src ) +{ + posh_u32_t v = 0; + posh_byte_t *p = ( posh_byte_t * ) src; + + v |= p[ 0 ]; + v |= ( ( posh_u32_t ) p[ 1 ] ) << 8; + v |= ( ( posh_u32_t ) p[ 2 ] ) << 16; + v |= ( ( posh_u32_t ) p[ 3 ] ) << 24; + + return v; +} + +/** + * Reads a signed 32-bit value from a little-endian buffer + @ingroup MemoryBuffer + @param src [in] source buffer + @returns host-endian signed 32-bit value +*/ +posh_i32_t +POSH_ReadI32FromLittle( const void *src ) +{ + return ( posh_i32_t ) POSH_ReadU32FromLittle( src ); +} + + +/** + * Reads an unsigned 16-bit value from a big-endian buffer + @ingroup MemoryBuffer + @param src [in] source buffer + @returns host-endian unsigned 16-bit value +*/ +posh_u16_t +POSH_ReadU16FromBig( const void *src ) +{ + posh_u16_t v = 0; + posh_byte_t *p = ( posh_byte_t * ) src; + + v |= p[ 1 ]; + v |= ( ( posh_u16_t ) p[ 0 ] ) << 8; + + return v; +} + +/** + * Reads a signed 16-bit value from a big-endian buffer + @ingroup MemoryBuffer + @param src [in] source buffer + @returns host-endian signed 16-bit value +*/ +posh_i16_t +POSH_ReadI16FromBig( const void *src ) +{ + return ( posh_i16_t ) POSH_ReadU16FromBig( src ); +} + +/** + * Reads an unsigned 32-bit value from a big-endian buffer + @ingroup MemoryBuffer + @param src [in] source buffer + @returns host-endian unsigned 32-bit value +*/ +posh_u32_t +POSH_ReadU32FromBig( const void *src ) +{ + posh_u32_t v = 0; + posh_byte_t *p = ( posh_byte_t * ) src; + + v |= p[ 3 ]; + v |= ( ( posh_u32_t ) p[ 2 ] ) << 8; + v |= ( ( posh_u32_t ) p[ 1 ] ) << 16; + v |= ( ( posh_u32_t ) p[ 0 ] ) << 24; + + return v; +} + +/** + * Reads a signed 32-bit value from a big-endian buffer + @ingroup MemoryBuffer + @param src [in] source buffer + @returns host-endian signed 32-bit value +*/ +posh_i32_t +POSH_ReadI32FromBig( const void *src ) +{ + return POSH_BigI32( (*(const posh_i32_t*)src ) ); +} + +#if defined POSH_64BIT_INTEGER + +/** + * Reads an unsigned 64-bit value from a little-endian buffer + @param src [in] source buffer + @returns host-endian unsigned 32-bit value +*/ +posh_u64_t +POSH_ReadU64FromLittle( const void *src ) +{ + posh_u64_t v = 0; + posh_byte_t *p = ( posh_byte_t * ) src; + int i; + + for ( i = 0; i < 8; i++ ) + { + v |= ( ( posh_u64_t ) p[ i ] ) << (i*8); + } + + return v; +} + +/** + * Reads a signed 64-bit value from a little-endian buffer + @param src [in] source buffer + @returns host-endian signed 32-bit value +*/ +posh_i64_t +POSH_ReadI64FromLittle( const void *src ) +{ + return ( posh_i64_t ) POSH_ReadU64FromLittle( src ); +} + +/** + * Reads an unsigned 64-bit value from a big-endian buffer + @param src [in] source buffer + @returns host-endian unsigned 32-bit value +*/ +posh_u64_t +POSH_ReadU64FromBig( const void *src ) +{ + posh_u64_t v = 0; + posh_byte_t *p = ( posh_byte_t * ) src; + int i; + + for ( i = 0; i < 8; i++ ) + { + v |= ( ( posh_u64_t ) p[ 7-i ] ) << (i*8); + } + + return v; +} + +/** + * Reads an signed 64-bit value from a big-endian buffer + @param src [in] source buffer + @returns host-endian signed 32-bit value +*/ +posh_i64_t +POSH_ReadI64FromBig( const void *src ) +{ + return ( posh_i64_t ) POSH_ReadU64FromBig( src ); +} + +#endif /* POSH_64BIT_INTEGER */ + +/* ---------------------------------------------------------------------------*/ +/* FLOATING POINT SUPPORT */ +/* ---------------------------------------------------------------------------*/ + +#if !defined POSH_NO_FLOAT + +/** @ingroup FloatingPoint + @param[in] f floating point value + @returns a little-endian bit representation of f + */ +posh_u32_t +POSH_LittleFloatBits( float f ) +{ + union + { + float f32; + posh_u32_t u32; + } u; + + u.f32 = f; + + return POSH_LittleU32( u.u32 ); +} + +/** + * Extracts raw big-endian bits from a 32-bit floating point value + * + @ingroup FloatingPoint + @param f [in] floating point value + @returns a big-endian bit representation of f + */ +posh_u32_t +POSH_BigFloatBits( float f ) +{ + union + { + float f32; + posh_u32_t u32; + } u; + + u.f32 = f; + + return POSH_BigU32( u.u32 ); +} + +/** + * Extracts raw, little-endian bit representation from a 64-bit double. + * + @param d [in] 64-bit double precision value + @param dst [out] 8-byte storage buffer + @ingroup FloatingPoint + @returns the raw bits used to represent the value 'd', in the form dst[0]=LSB + */ +void +POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] ) +{ + union + { + double d64; + posh_byte_t bytes[ 8 ]; + } u; + + u.d64 = d; + +#if defined POSH_LITTLE_ENDIAN + dst[ 0 ] = u.bytes[ 0 ]; + dst[ 1 ] = u.bytes[ 1 ]; + dst[ 2 ] = u.bytes[ 2 ]; + dst[ 3 ] = u.bytes[ 3 ]; + dst[ 4 ] = u.bytes[ 4 ]; + dst[ 5 ] = u.bytes[ 5 ]; + dst[ 6 ] = u.bytes[ 6 ]; + dst[ 7 ] = u.bytes[ 7 ]; +#else + dst[ 0 ] = u.bytes[ 7 ]; + dst[ 1 ] = u.bytes[ 6 ]; + dst[ 2 ] = u.bytes[ 5 ]; + dst[ 3 ] = u.bytes[ 4 ]; + dst[ 4 ] = u.bytes[ 3 ]; + dst[ 5 ] = u.bytes[ 2 ]; + dst[ 6 ] = u.bytes[ 1 ]; + dst[ 7 ] = u.bytes[ 0 ]; +#endif +} + +/** + * Creates a double-precision, 64-bit floating point value from a set of raw, + * little-endian bits + + @ingroup FloatingPoint + @param src [in] little-endian byte representation of 64-bit double precision + floating point value + @returns double precision floating point representation of the raw bits + @remarks No error checking is performed, so there are no guarantees that the + result is a valid number, nor is there any check to ensure that src is + non-NULL. BE CAREFUL USING THIS. + */ +double +POSH_DoubleFromBits( const posh_byte_t src[ 8 ] ) +{ + union + { + double d64; + posh_byte_t bytes[ 8 ]; + } u; + +#if defined POSH_LITTLE_ENDIAN + u.bytes[ 0 ] = src[ 0 ]; + u.bytes[ 1 ] = src[ 1 ]; + u.bytes[ 2 ] = src[ 2 ]; + u.bytes[ 3 ] = src[ 3 ]; + u.bytes[ 4 ] = src[ 4 ]; + u.bytes[ 5 ] = src[ 5 ]; + u.bytes[ 6 ] = src[ 6 ]; + u.bytes[ 7 ] = src[ 7 ]; +#else + u.bytes[ 0 ] = src[ 7 ]; + u.bytes[ 1 ] = src[ 6 ]; + u.bytes[ 2 ] = src[ 5 ]; + u.bytes[ 3 ] = src[ 4 ]; + u.bytes[ 4 ] = src[ 3 ]; + u.bytes[ 5 ] = src[ 2 ]; + u.bytes[ 6 ] = src[ 1 ]; + u.bytes[ 7 ] = src[ 0 ]; +#endif + + return u.d64; +} + +/** + * Creates a floating point number from little endian bits + * + @ingroup FloatingPoint + @param bits [in] raw floating point bits in little-endian form + @returns a floating point number based on the given bit representation + @remarks No error checking is performed, so there are no guarantees that the + result is a valid number. BE CAREFUL USING THIS. + */ +float +POSH_FloatFromLittleBits( posh_u32_t bits ) +{ + union + { + float f32; + posh_u32_t u32; + } u; + + u.u32 = bits; +#if defined POSH_BIG_ENDIAN + u.u32 = POSH_SwapU32( u.u32 ); +#endif + + return u.f32; +} + +/** + * Creates a floating point number from big-endian bits + * + @ingroup FloatingPoint + @param bits [in] raw floating point bits in big-endian form + @returns a floating point number based on the given bit representation + @remarks No error checking is performed, so there are no guarantees that the + result is a valid number. BE CAREFUL USING THIS. + */ +float +POSH_FloatFromBigBits( posh_u32_t bits ) +{ + union + { + float f32; + posh_u32_t u32; + } u; + + u.u32 = bits; +#if defined POSH_LITTLE_ENDIAN + u.u32 = POSH_SwapU32( u.u32 ); +#endif + + return u.f32; +} + +#endif /* !defined POSH_NO_FLOAT */ Index: ps/trunk/libraries/source/nvtt/src/extern/stb/stb_dxt.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/extern/stb/stb_dxt.h +++ ps/trunk/libraries/source/nvtt/src/extern/stb/stb_dxt.h @@ -0,0 +1,624 @@ +// stb_dxt.h - v1.04 - DXT1/DXT5 compressor - public domain +// original by fabian "ryg" giesen - ported to C by stb +// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation +// +// USAGE: +// call stb_compress_dxt_block() for every block (you must pad) +// source should be a 4x4 block of RGBA data in row-major order; +// A is ignored if you specify alpha=0; you can turn on dithering +// and "high quality" using mode. +// +// version history: +// v1.04 - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec); +// single color match fix (allow for inexact color interpolation); +// optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps. +// v1.03 - (stb) endianness support +// v1.02 - (stb) fix alpha encoding bug +// v1.01 - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom +// v1.00 - (stb) first release + +#ifndef STB_INCLUDE_STB_DXT_H +#define STB_INCLUDE_STB_DXT_H + +// compression mode (bitflags) +#define STB_DXT_NORMAL 0 +#define STB_DXT_DITHER 1 // use dithering. dubious win. never use for normal maps and the like! +#define STB_DXT_HIGHQUAL 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower. + +void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode); +#define STB_COMPRESS_DXT_BLOCK + +#ifdef STB_DXT_IMPLEMENTATION + +// configuration options for DXT encoder. set them in the project/makefile or just define +// them at the top. + +// STB_DXT_USE_ROUNDING_BIAS +// use a rounding bias during color interpolation. this is closer to what "ideal" +// interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03) +// implicitly had this turned on. +// +// in case you're targeting a specific type of hardware (e.g. console programmers): +// NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer +// to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias. +// you also see "(a*5 + b*3) / 8" on some old GPU designs. +// #define STB_DXT_USE_ROUNDING_BIAS + +#include +#include +#include // memset + +static unsigned char stb__Expand5[32]; +static unsigned char stb__Expand6[64]; +static unsigned char stb__OMatch5[256][2]; +static unsigned char stb__OMatch6[256][2]; +static unsigned char stb__QuantRBTab[256+16]; +static unsigned char stb__QuantGTab[256+16]; + +static int stb__Mul8Bit(int a, int b) +{ + int t = a*b + 128; + return (t + (t >> 8)) >> 8; +} + +static void stb__From16Bit(unsigned char *out, unsigned short v) +{ + int rv = (v & 0xf800) >> 11; + int gv = (v & 0x07e0) >> 5; + int bv = (v & 0x001f) >> 0; + + out[0] = stb__Expand5[rv]; + out[1] = stb__Expand6[gv]; + out[2] = stb__Expand5[bv]; + out[3] = 0; +} + +static unsigned short stb__As16Bit(int r, int g, int b) +{ + return (stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31); +} + +// linear interpolation at 1/3 point between a and b, using desired rounding type +static int stb__Lerp13(int a, int b) +{ +#ifdef STB_DXT_USE_ROUNDING_BIAS + // with rounding bias + return a + stb__Mul8Bit(b-a, 0x55); +#else + // without rounding bias + // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed. + return (2*a + b) / 3; +#endif +} + +// lerp RGB color +static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2) +{ + out[0] = stb__Lerp13(p1[0], p2[0]); + out[1] = stb__Lerp13(p1[1], p2[1]); + out[2] = stb__Lerp13(p1[2], p2[2]); +} + +/****************************************************************************/ + +// compute table to reproduce constant colors as accurately as possible +static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size) +{ + int i,mn,mx; + for (i=0;i<256;i++) { + int bestErr = 256; + for (mn=0;mn> 4)]; + ep1[0] = bp[ 0] - dp[ 0]; + dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)]; + ep1[1] = bp[ 4] - dp[ 4]; + dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)]; + ep1[2] = bp[ 8] - dp[ 8]; + dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)]; + ep1[3] = bp[12] - dp[12]; + bp += 16; + dp += 16; + et = ep1, ep1 = ep2, ep2 = et; // swap + } + } +} + +// The color matching function +static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither) +{ + unsigned int mask = 0; + int dirr = color[0*4+0] - color[1*4+0]; + int dirg = color[0*4+1] - color[1*4+1]; + int dirb = color[0*4+2] - color[1*4+2]; + int dots[16]; + int stops[4]; + int i; + int c0Point, halfPoint, c3Point; + + for(i=0;i<16;i++) + dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb; + + for(i=0;i<4;i++) + stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb; + + // think of the colors as arranged on a line; project point onto that line, then choose + // next color out of available ones. we compute the crossover points for "best color in top + // half"/"best in bottom half" and then the same inside that subinterval. + // + // relying on this 1d approximation isn't always optimal in terms of euclidean distance, + // but it's very close and a lot faster. + // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html + + c0Point = (stops[1] + stops[3]) >> 1; + halfPoint = (stops[3] + stops[2]) >> 1; + c3Point = (stops[2] + stops[0]) >> 1; + + if(!dither) { + // the version without dithering is straightforward + for (i=15;i>=0;i--) { + int dot = dots[i]; + mask <<= 2; + + if(dot < halfPoint) + mask |= (dot < c0Point) ? 1 : 3; + else + mask |= (dot < c3Point) ? 2 : 0; + } + } else { + // with floyd-steinberg dithering + int err[8],*ep1 = err,*ep2 = err+4; + int *dp = dots, y; + + c0Point <<= 4; + halfPoint <<= 4; + c3Point <<= 4; + for(i=0;i<8;i++) + err[i] = 0; + + for(y=0;y<4;y++) + { + int dot,lmask,step; + + dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]); + if(dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + ep1[0] = dp[0] - stops[step]; + lmask = step; + + dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]); + if(dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + ep1[1] = dp[1] - stops[step]; + lmask |= step<<2; + + dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]); + if(dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + ep1[2] = dp[2] - stops[step]; + lmask |= step<<4; + + dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]); + if(dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + ep1[3] = dp[3] - stops[step]; + lmask |= step<<6; + + dp += 4; + mask |= lmask << (y*8); + { int *et = ep1; ep1 = ep2; ep2 = et; } // swap + } + } + + return mask; +} + +// The color optimization function. (Clever code, part 1) +static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16) +{ + int mind = 0x7fffffff,maxd = -0x7fffffff; + unsigned char *minp, *maxp; + double magn; + int v_r,v_g,v_b; + static const int nIterPower = 4; + float covf[6],vfr,vfg,vfb; + + // determine color distribution + int cov[6]; + int mu[3],min[3],max[3]; + int ch,i,iter; + + for(ch=0;ch<3;ch++) + { + const unsigned char *bp = ((const unsigned char *) block) + ch; + int muv,minv,maxv; + + muv = minv = maxv = bp[0]; + for(i=4;i<64;i+=4) + { + muv += bp[i]; + if (bp[i] < minv) minv = bp[i]; + else if (bp[i] > maxv) maxv = bp[i]; + } + + mu[ch] = (muv + 8) >> 4; + min[ch] = minv; + max[ch] = maxv; + } + + // determine covariance matrix + for (i=0;i<6;i++) + cov[i] = 0; + + for (i=0;i<16;i++) + { + int r = block[i*4+0] - mu[0]; + int g = block[i*4+1] - mu[1]; + int b = block[i*4+2] - mu[2]; + + cov[0] += r*r; + cov[1] += r*g; + cov[2] += r*b; + cov[3] += g*g; + cov[4] += g*b; + cov[5] += b*b; + } + + // convert covariance matrix to float, find principal axis via power iter + for(i=0;i<6;i++) + covf[i] = cov[i] / 255.0f; + + vfr = (float) (max[0] - min[0]); + vfg = (float) (max[1] - min[1]); + vfb = (float) (max[2] - min[2]); + + for(iter=0;iter magn) magn = fabs(vfg); + if (fabs(vfb) > magn) magn = fabs(vfb); + + if(magn < 4.0f) { // too small, default to luminance + v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000. + v_g = 587; + v_b = 114; + } else { + magn = 512.0 / magn; + v_r = (int) (vfr * magn); + v_g = (int) (vfg * magn); + v_b = (int) (vfb * magn); + } + + // Pick colors at extreme points + for(i=0;i<16;i++) + { + int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b; + + if (dot < mind) { + mind = dot; + minp = block+i*4; + } + + if (dot > maxd) { + maxd = dot; + maxp = block+i*4; + } + } + + *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]); + *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]); +} + +static int stb__sclamp(float y, int p0, int p1) +{ + int x = (int) y; + if (x < p0) return p0; + if (x > p1) return p1; + return x; +} + +// The refinement function. (Clever code, part 2) +// Tries to optimize colors to suit block contents better. +// (By solving a least squares system via normal equations+Cramer's rule) +static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask) +{ + static const int w1Tab[4] = { 3,0,2,1 }; + static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 }; + // ^some magic to save a lot of multiplies in the accumulating loop... + // (precomputed products of weights for least squares system, accumulated inside one 32-bit register) + + float frb,fg; + unsigned short oldMin, oldMax, min16, max16; + int i, akku = 0, xx,xy,yy; + int At1_r,At1_g,At1_b; + int At2_r,At2_g,At2_b; + unsigned int cm = mask; + + oldMin = *pmin16; + oldMax = *pmax16; + + if((mask ^ (mask<<2)) < 4) // all pixels have the same index? + { + // yes, linear system would be singular; solve using optimal + // single-color match on average color + int r = 8, g = 8, b = 8; + for (i=0;i<16;++i) { + r += block[i*4+0]; + g += block[i*4+1]; + b += block[i*4+2]; + } + + r >>= 4; g >>= 4; b >>= 4; + + max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0]; + min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1]; + } else { + At1_r = At1_g = At1_b = 0; + At2_r = At2_g = At2_b = 0; + for (i=0;i<16;++i,cm>>=2) { + int step = cm&3; + int w1 = w1Tab[step]; + int r = block[i*4+0]; + int g = block[i*4+1]; + int b = block[i*4+2]; + + akku += prods[step]; + At1_r += w1*r; + At1_g += w1*g; + At1_b += w1*b; + At2_r += r; + At2_g += g; + At2_b += b; + } + + At2_r = 3*At2_r - At1_r; + At2_g = 3*At2_g - At1_g; + At2_b = 3*At2_b - At1_b; + + // extract solutions and decide solvability + xx = akku >> 16; + yy = (akku >> 8) & 0xff; + xy = (akku >> 0) & 0xff; + + frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy); + fg = frb * 63.0f / 31.0f; + + // solve. + max16 = stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11; + max16 |= stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5; + max16 |= stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0; + + min16 = stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11; + min16 |= stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5; + min16 |= stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0; + } + + *pmin16 = min16; + *pmax16 = max16; + return oldMin != min16 || oldMax != max16; +} + +// Color block compression +static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode) +{ + unsigned int mask; + int i; + int dither; + int refinecount; + unsigned short max16, min16; + unsigned char dblock[16*4],color[4*4]; + + dither = mode & STB_DXT_DITHER; + refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1; + + // check if block is constant + for (i=1;i<16;i++) + if (((unsigned int *) block)[i] != ((unsigned int *) block)[0]) + break; + + if(i == 16) { // constant color + int r = block[0], g = block[1], b = block[2]; + mask = 0xaaaaaaaa; + max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0]; + min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1]; + } else { + // first step: compute dithered version for PCA if desired + if(dither) + stb__DitherBlock(dblock,block); + + // second step: pca+map along principal axis + stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16); + if (max16 != min16) { + stb__EvalColors(color,max16,min16); + mask = stb__MatchColorsBlock(block,color,dither); + } else + mask = 0; + + // third step: refine (multiple times if requested) + for (i=0;i> 8); + dest[2] = (unsigned char) (min16); + dest[3] = (unsigned char) (min16 >> 8); + dest[4] = (unsigned char) (mask); + dest[5] = (unsigned char) (mask >> 8); + dest[6] = (unsigned char) (mask >> 16); + dest[7] = (unsigned char) (mask >> 24); +} + +// Alpha block compression (this is easy for a change) +static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src,int mode) +{ + int i,dist,bias,dist4,dist2,bits,mask; + + // find min/max color + int mn,mx; + mn = mx = src[3]; + + for (i=1;i<16;i++) + { + if (src[i*4+3] < mn) mn = src[i*4+3]; + else if (src[i*4+3] > mx) mx = src[i*4+3]; + } + + // encode them + ((unsigned char *)dest)[0] = mx; + ((unsigned char *)dest)[1] = mn; + dest += 2; + + // determine bias and emit color indices + // given the choice of mx/mn, these indices are optimal: + // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/ + dist = mx-mn; + dist4 = dist*4; + dist2 = dist*2; + bias = (dist < 8) ? (dist - 1) : (dist/2 + 2); + bias -= mn * 7; + bits = 0,mask=0; + + for (i=0;i<16;i++) { + int a = src[i*4+3]*7 + bias; + int ind,t; + + // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max). + t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t; + t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t; + ind += (a >= dist); + + // turn linear scale into DXT index (0/1 are extremal pts) + ind = -ind & 7; + ind ^= (2 > ind); + + // write index + mask |= ind << bits; + if((bits += 3) >= 8) { + *dest++ = mask; + mask >>= 8; + bits -= 8; + } + } +} + +static void stb__InitDXT() +{ + int i; + for(i=0;i<32;i++) + stb__Expand5[i] = (i<<3)|(i>>2); + + for(i=0;i<64;i++) + stb__Expand6[i] = (i<<2)|(i>>4); + + for(i=0;i<256+16;i++) + { + int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8; + stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)]; + stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)]; + } + + stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32); + stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64); +} + +void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode) +{ + static int init=1; + if (init) { + stb__InitDXT(); + init=0; + } + + if (alpha) { + stb__CompressAlphaBlock(dest,(unsigned char*) src,mode); + dest += 8; + } + + stb__CompressColorBlock(dest,(unsigned char*) src,mode); +} +#endif // STB_DXT_IMPLEMENTATION + +#endif // STB_INCLUDE_STB_DXT_H Index: ps/trunk/libraries/source/nvtt/src/extern/stb/stb_image.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/extern/stb/stb_image.h +++ ps/trunk/libraries/source/nvtt/src/extern/stb/stb_image.h @@ -0,0 +1,4954 @@ +/* stbi-1.29 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c + when you control the images you're loading + no warranty implied; use at your own risk + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline (no JPEG progressive) + PNG 8-bit only + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + + - decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code) + - supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD) + + Latest revisions: + 1.29 (2010-08-16) various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) cast-to-uint8 to fix warnings (Laurent Gomila) + allow trailing 0s at end of image data (Laurent Gomila) + 1.26 (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) refix trans_data warning (Won Chun) + 1.24 (2010-07-12) perf improvements reading from files + minor perf improvements for jpeg + deprecated type-specific functions in hope of feedback + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) removed image *writing* support to stb_image_write.h + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva + 1.21 fix use of 'uint8' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + + See end of file for full revision history. + + TODO: + stbi_info support for BMP,PSD,HDR,PIC + rewrite stbi_info and load_file variations to share file handling code + (current system allows individual functions to be called directly, + since each does all the work, but I doubt anyone uses this in practice) + + + ============================ Contributors ========================= + + Image formats Optimizations & bugfixes + Sean Barrett (jpeg, png, bmp) Fabian "ryg" Giesen + Nicolas Schulz (hdr, psd) + Jonathan Dummer (tga) Bug fixes & warning fixes + Jean-Marc Lienher (gif) Marc LeBlanc + Tom Seddon (pic) Christpher Lloyd + Thatcher Ulrich (psd) Dave Moore + Won Chun + the Horde3D community + Extensions, features Janez Zemva + Jetro Lauha (stbi_info) Jonathan Blow + James "moose2000" Brown (iPhone PNG) Laurent Gomila + Aruelien Pocheville + + If your name should be here but isn't, let Sean know. + +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// To get a header file for this, either cut and paste the header, +// or create stb_image.h, #define STBI_HEADER_FILE_ONLY, and +// then include stb_image.c from it. + +//// begin header file //////////////////////////////////////////////////// +// +// Limitations: +// - no jpeg progressive support +// - non-HDR formats support 8-bit samples only (jpeg, png) +// - no delayed line count (jpeg) -- IJG doesn't support either +// - no 1-bit BMP +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *comp -- outputs # of image components in image file +// int req_comp -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise. +// If req_comp is non-zero, *comp has the number of components that _would_ +// have been output otherwise. E.g. if you set req_comp to 4, you will always +// get RGBA output, but you can check *comp to easily see if it's opaque. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *comp will be unchanged. The function stbi_failure_reason() +// can be queried for an extremely brief, end-user unfriendly explanation +// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid +// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB; nominally they +// would silently load as BGR, except the existing code should have just +// failed on such iPhone PNGs. But you can disable this conversion by +// by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through. +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); + +#ifndef STBI_NO_STDIO +#include +#endif + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for req_comp + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +typedef unsigned char stbi_uc; + +#ifdef __cplusplus +extern "C" { +#endif + +// PRIMARY API - works on images of any type + +// load image by filename, open file, or memory buffer +extern stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +#ifndef STBI_NO_HDR + extern float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + + #ifndef STBI_NO_STDIO + extern float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp); + extern float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + #endif + + extern void stbi_hdr_to_ldr_gamma(float gamma); + extern void stbi_hdr_to_ldr_scale(float scale); + + extern void stbi_ldr_to_hdr_gamma(float gamma); + extern void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_HDR + +// get a VERY brief reason for failure +// NOT THREADSAFE +extern const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +extern void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +extern int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); + +#ifndef STBI_NO_STDIO +extern int stbi_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); + +extern int stbi_is_hdr (char const *filename); +extern int stbi_is_hdr_from_file(FILE *f); +#endif + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +extern void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +extern void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + + +// ZLIB client - used by PNG, available for other purposes + +extern char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +extern char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +extern char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +// define new loaders +typedef struct +{ + int (*test_memory)(stbi_uc const *buffer, int len); + stbi_uc * (*load_from_memory)(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + #ifndef STBI_NO_STDIO + int (*test_file)(FILE *f); + stbi_uc * (*load_from_file)(FILE *f, int *x, int *y, int *comp, int req_comp); + #endif +} stbi_loader; + +// register a loader by filling out the above structure (you must define ALL functions) +// returns 1 if added or already added, 0 if not added (too many loaders) +// NOT THREADSAFE +extern int stbi_register_loader(stbi_loader *loader); + +// define faster low-level operations (typically SIMD support) +#ifdef STBI_SIMD +typedef void (*stbi_idct_8x8)(stbi_uc *out, int out_stride, short data[64], unsigned short *dequantize); +// compute an integer IDCT on "input" +// input[x] = data[x] * dequantize[x] +// write results to 'out': 64 samples, each run of 8 spaced by 'out_stride' +// CLAMP results to 0..255 +typedef void (*stbi_YCbCr_to_RGB_run)(stbi_uc *output, stbi_uc const *y, stbi_uc const *cb, stbi_uc const *cr, int count, int step); +// compute a conversion from YCbCr to RGB +// 'count' pixels +// write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B +// y: Y input channel +// cb: Cb input channel; scale/biased to be 0..255 +// cr: Cr input channel; scale/biased to be 0..255 + +extern void stbi_install_idct(stbi_idct_8x8 func); +extern void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func); +#endif // STBI_SIMD + + + + +// TYPE-SPECIFIC ACCESS + +#ifdef STBI_TYPE_SPECIFIC_FUNCTIONS + +// is it a jpeg? +extern int stbi_jpeg_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_jpeg_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_test_file (FILE *f); +extern stbi_uc *stbi_jpeg_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a png? +extern int stbi_png_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_png_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_png_test_file (FILE *f); +extern stbi_uc *stbi_png_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a bmp? +extern int stbi_bmp_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_bmp_test_file (FILE *f); +extern stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a tga? +extern int stbi_tga_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_tga_test_file (FILE *f); +extern stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a psd? +extern int stbi_psd_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_psd_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_psd_test_file (FILE *f); +extern stbi_uc *stbi_psd_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it an hdr? +extern int stbi_hdr_test_memory (stbi_uc const *buffer, int len); + +extern float * stbi_hdr_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_hdr_test_file (FILE *f); +extern float * stbi_hdr_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a pic? +extern int stbi_pic_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_pic_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_pic_test_file (FILE *f); +extern stbi_uc *stbi_pic_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a gif? +extern int stbi_gif_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_gif_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_gif_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern int stbi_gif_test_file (FILE *f); +extern stbi_uc *stbi_gif_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_gif_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_gif_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +#endif//STBI_TYPE_SPECIFIC_FUNCTIONS + + + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifndef STBI_HEADER_FILE_ONLY + +#ifndef STBI_NO_HDR +#include // ldexp +#include // strcmp +#endif + +#ifndef STBI_NO_STDIO +#include +#endif +#include +#include +#include +#include + +#ifndef _MSC_VER + #ifdef __cplusplus + #define __forceinline inline + #else + #define __forceinline + #endif +#endif + + +// implementation: +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef signed short int16; +typedef unsigned int uint32; +typedef signed int int32; +typedef unsigned int uint; + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(uint32)==4 ? 1 : -1]; + +#if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE) +#define STBI_NO_WRITE +#endif + +#define STBI_NOTUSED(v) v=v + +#ifdef _MSC_VER +#define STBI_HAS_LRTOL +#endif + +#ifdef STBI_HAS_LRTOL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// Generic API that works on all image types +// + +// deprecated functions + +// is it a jpeg? +extern int stbi_jpeg_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_jpeg_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_test_file (FILE *f); +extern stbi_uc *stbi_jpeg_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a png? +extern int stbi_png_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_png_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_png_test_file (FILE *f); +extern stbi_uc *stbi_png_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a bmp? +extern int stbi_bmp_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_bmp_test_file (FILE *f); +extern stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a tga? +extern int stbi_tga_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_tga_test_file (FILE *f); +extern stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a psd? +extern int stbi_psd_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_psd_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_psd_test_file (FILE *f); +extern stbi_uc *stbi_psd_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it an hdr? +extern int stbi_hdr_test_memory (stbi_uc const *buffer, int len); + +extern float * stbi_hdr_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_hdr_test_file (FILE *f); +extern float * stbi_hdr_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a pic? +extern int stbi_pic_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_pic_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_pic_test_file (FILE *f); +extern stbi_uc *stbi_pic_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a gif? +extern int stbi_gif_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_gif_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_gif_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern int stbi_gif_test_file (FILE *f); +extern stbi_uc *stbi_gif_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_gif_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_gif_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + + +// this is not threadsafe +static const char *failure_reason; + +const char *stbi_failure_reason(void) +{ + return failure_reason; +} + +static int e(const char *str) +{ + failure_reason = str; + return 0; +} + +#ifdef STBI_NO_FAILURE_STRINGS + #define e(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define e(x,y) e(y) +#else + #define e(x,y) e(x) +#endif + +#define epf(x,y) ((float *) (e(x,y)?NULL:NULL)) +#define epuc(x,y) ((unsigned char *) (e(x,y)?NULL:NULL)) + +void stbi_image_free(void *retval_from_stbi_load) +{ + free(retval_from_stbi_load); +} + +#define MAX_LOADERS 32 +stbi_loader *loaders[MAX_LOADERS]; +static int max_loaders = 0; + +int stbi_register_loader(stbi_loader *loader) +{ + int i; + for (i=0; i < MAX_LOADERS; ++i) { + // already present? + if (loaders[i] == loader) + return 1; + // end of the list? + if (loaders[i] == NULL) { + loaders[i] = loader; + max_loaders = i+1; + return 1; + } + } + // no room for it + return 0; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_STDIO +unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + unsigned char *result; + if (!f) return epuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_file(f)) return stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + if (stbi_png_test_file(f)) return stbi_png_load_from_file(f,x,y,comp,req_comp); + if (stbi_bmp_test_file(f)) return stbi_bmp_load_from_file(f,x,y,comp,req_comp); + if (stbi_gif_test_file(f)) return stbi_gif_load_from_file(f,x,y,comp,req_comp); + if (stbi_psd_test_file(f)) return stbi_psd_load_from_file(f,x,y,comp,req_comp); + if (stbi_pic_test_file(f)) return stbi_pic_load_from_file(f,x,y,comp,req_comp); + + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) { + float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_file(f)) + return loaders[i]->load_from_file(f,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_file(f)) + return stbi_tga_load_from_file(f,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_memory(buffer,len)) return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_png_test_memory(buffer,len)) return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_bmp_test_memory(buffer,len)) return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_gif_test_memory(buffer,len)) return stbi_gif_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_psd_test_memory(buffer,len)) return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_pic_test_memory(buffer,len)) return stbi_pic_load_from_memory(buffer,len,x,y,comp,req_comp); + + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) { + float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_memory(buffer,len)) + return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_memory(buffer,len)) + return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} + +#ifndef STBI_NO_HDR + +#ifndef STBI_NO_STDIO +float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + float *result; + if (!f) return epf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) + return stbi_hdr_load_from_file(f,x,y,comp,req_comp); + #endif + data = stbi_load_from_file(f, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) + return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + #endif + data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +// these is-hdr-or-not is defined independent of whether STBI_NO_HDR is +// defined, for API simplicity; if STBI_NO_HDR is defined, it always +// reports false! + +int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_memory(buffer, len); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +extern int stbi_is_hdr (char const *filename) +{ + FILE *f = fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +extern int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_file(f); + #else + return 0; + #endif +} + +#endif + +#ifndef STBI_NO_HDR +static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f; +static float l2h_gamma=2.2f, l2h_scale=1.0f; + +void stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; } +void stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; } + +void stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; } +void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; } +#endif + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + SCAN_load=0, + SCAN_type, + SCAN_header +}; + +typedef struct +{ + uint32 img_x, img_y; + int img_n, img_out_n; + + #ifndef STBI_NO_STDIO + FILE *img_file; + int buflen; + uint8 buffer_start[128]; + int from_file; + #endif + uint8 *img_buffer, *img_buffer_end; +} stbi; + +#ifndef STBI_NO_STDIO +static void start_file(stbi *s, FILE *f) +{ + s->img_file = f; + s->buflen = sizeof(s->buffer_start); + s->img_buffer_end = s->buffer_start + s->buflen; + s->img_buffer = s->img_buffer_end; + s->from_file = 1; +} +#endif + +static void start_mem(stbi *s, uint8 const *buffer, int len) +{ +#ifndef STBI_NO_STDIO + s->img_file = NULL; + s->from_file = 0; +#endif + s->img_buffer = (uint8 *) buffer; + s->img_buffer_end = (uint8 *) buffer+len; +} + +#ifndef STBI_NO_STDIO +static void refill_buffer(stbi *s) +{ + int n = fread(s->buffer_start, 1, s->buflen, s->img_file); + if (n == 0) { + s->from_file = 0; + s->img_buffer = s->img_buffer_end-1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} +#endif + +__forceinline static int get8(stbi *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; +#ifndef STBI_NO_STDIO + if (s->from_file) { + refill_buffer(s); + return *s->img_buffer++; + } +#endif + return 0; +} + +__forceinline static int at_eof(stbi *s) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + if (!feof(s->img_file)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->from_file == 0) return 1; + } +#endif + return s->img_buffer >= s->img_buffer_end; +} + +__forceinline static uint8 get8u(stbi *s) +{ + return (uint8) get8(s); +} + +static void skip(stbi *s, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + int blen = s->img_buffer_end - s->img_buffer; + if (blen < n) { + s->img_buffer = s->img_buffer_end; + fseek(s->img_file, n - blen, SEEK_CUR); + return; + } + } +#endif + s->img_buffer += n; +} + +static int getn(stbi *s, stbi_uc *buffer, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + int blen = s->img_buffer_end - s->img_buffer; + if (blen < n) { + int res; + memcpy(buffer, s->img_buffer, blen); + res = ((int) fread(buffer + blen, 1, n - blen, s->img_file) == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } +#endif + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} + +static int get16(stbi *s) +{ + int z = get8(s); + return (z << 8) + get8(s); +} + +static uint32 get32(stbi *s) +{ + uint32 z = get16(s); + return (z << 16) + get16(s); +} + +static int get16le(stbi *s) +{ + int z = get8(s); + return z + (get8(s) << 8); +} + +static uint32 get32le(stbi *s) +{ + uint32 z = get16le(s); + return z + (get16le(s) << 16); +} + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static uint8 compute_y(int r, int g, int b) +{ + return (uint8) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + assert(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) malloc(req_comp * x * y); + if (good == NULL) { + free(data); + return epuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define COMBO(a,b) ((a)*8+(b)) + #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (COMBO(img_n, req_comp)) { + CASE(1,2) dest[0]=src[0], dest[1]=255; break; + CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break; + CASE(2,1) dest[0]=src[0]; break; + CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break; + CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break; + CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break; + CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break; + CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break; + default: assert(0); + } + #undef CASE + } + + free(data); + return good; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output = (float *) malloc(x * y * comp * sizeof(float)); + if (output == NULL) { free(data); return epf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale; + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + free(data); + return output; +} + +#define float2int(x) ((int) (x)) +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output = (stbi_uc *) malloc(x * y * comp); + if (output == NULL) { free(data); return epuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (uint8) float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (uint8) float2int(z); + } + } + free(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder (not actually fully baseline implementation) +// +// simple implementation +// - channel subsampling of at most 2 in each dimension +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - uses a lot of intermediate memory, could cache poorly +// - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4 +// stb_jpeg: 1.34 seconds (MSVC6, default release build) +// stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro) +// IJL11.dll: 1.08 seconds (compiled by intel) +// IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG) +// IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro) + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + uint8 fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + uint16 code[256]; + uint8 values[256]; + uint8 size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} huffman; + +typedef struct +{ + #ifdef STBI_SIMD + unsigned short dequant2[4][64]; + #endif + stbi s; + huffman huff_dc[4]; + huffman huff_ac[4]; + uint8 dequant[4][64]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + uint8 *data; + void *raw_data; + uint8 *linebuf; + } img_comp[4]; + + uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int scan_n, order[4]; + int restart_interval, todo; +} jpeg; + +static int build_huffman(huffman *h, int *count) +{ + int i,j,k=0,code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (uint8) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (uint16) (code++); + if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (uint8) i; + } + } + } + return 1; +} + +static void grow_buffer_unsafe(jpeg *j) +{ + do { + int b = j->nomore ? 0 : get8(&j->s); + if (b == 0xff) { + int c = get8(&j->s); + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +__forceinline static int decode(jpeg *j, huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & bmask[k]) + h->delta[k]; + assert((((j->code_buffer) >> (32 - h->size[c])) & bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// combined JPEG 'receive' and JPEG 'extend', since baseline +// always extends everything it receives. +__forceinline static int extend_receive(jpeg *j, int n) +{ + unsigned int m = 1 << (n-1); + unsigned int k; + if (j->code_bits < n) grow_buffer_unsafe(j); + + #if 1 + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~bmask[n]; + k &= bmask[n]; + j->code_bits -= n; + #else + k = (j->code_buffer >> (32 - n)) & bmask[n]; + j->code_bits -= n; + j->code_buffer <<= n; + #endif + // the following test is probably a random branch that won't + // predict well. I tried to table accelerate it but failed. + // maybe it's compiling as a conditional move? + if (k < m) + return (-1 << n) + k + 1; + else + return k; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static uint8 dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b) +{ + int diff,dc,k; + int t = decode(j, hdc); + if (t < 0) return e("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) dc; + + // decode AC components, see JPEG spec + k = 1; + do { + int r,s; + int rs = decode(j, hac); + if (rs < 0) return e("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + data[dezigzag[k++]] = (short) extend_receive(j,s); + } + } while (k < 64); + return 1; +} + +// take a -128..127 value and clamp it and convert to 0..255 +__forceinline static uint8 clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (uint8) x; +} + +#define f2f(x) (int) (((x) * 4096 + 0.5)) +#define fsh(x) ((x) << 12) + +// derived from jidctint -- DCT_ISLOW +#define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * f2f(0.5411961f); \ + t2 = p1 + p3*f2f(-1.847759065f); \ + t3 = p1 + p2*f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = fsh(p2+p3); \ + t1 = fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*f2f( 1.175875602f); \ + t0 = t0*f2f( 0.298631336f); \ + t1 = t1*f2f( 2.053119869f); \ + t2 = t2*f2f( 3.072711026f); \ + t3 = t3*f2f( 1.501321110f); \ + p1 = p5 + p1*f2f(-0.899976223f); \ + p2 = p5 + p2*f2f(-2.562915447f); \ + p3 = p3*f2f(-1.961570560f); \ + p4 = p4*f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +#ifdef STBI_SIMD +typedef unsigned short stbi_dequantize_t; +#else +typedef uint8 stbi_dequantize_t; +#endif + +// .344 seconds on 3*anemones.jpg +static void idct_block(uint8 *out, int out_stride, short data[64], stbi_dequantize_t *dequantize) +{ + int i,val[64],*v=val; + stbi_dequantize_t *dq = dequantize; + uint8 *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d,++dq, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * dq[0] << 2; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], + d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = clamp((x0+t3) >> 17); + o[7] = clamp((x0-t3) >> 17); + o[1] = clamp((x1+t2) >> 17); + o[6] = clamp((x1-t2) >> 17); + o[2] = clamp((x2+t1) >> 17); + o[5] = clamp((x2-t1) >> 17); + o[3] = clamp((x3+t0) >> 17); + o[4] = clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SIMD +static stbi_idct_8x8 stbi_idct_installed = idct_block; + +extern void stbi_install_idct(stbi_idct_8x8 func) +{ + stbi_idct_installed = func; +} +#endif + +#define MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static uint8 get_marker(jpeg *j) +{ + uint8 x; + if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; } + x = get8u(&j->s); + if (x != 0xff) return MARKER_none; + while (x == 0xff) + x = get8u(&j->s); + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, reset the entropy decoder and +// the dc prediction +static void reset(jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0; + j->marker = MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int parse_entropy_coded_data(jpeg *z) +{ + reset(z); + if (z->scan_n == 1) { + int i,j; + #ifdef STBI_SIMD + __declspec(align(16)) + #endif + short data[64]; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #ifdef STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } else { // interleaved! + int i,j,k,x,y; + short data[64]; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #ifdef STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } + return 1; +} + +static int process_marker(jpeg *z, int m) +{ + int L; + switch (m) { + case MARKER_none: // no marker found + return e("expected marker","Corrupt JPEG"); + + case 0xC2: // SOF - progressive + return e("progressive jpeg","JPEG format not supported (progressive)"); + + case 0xDD: // DRI - specify restart interval + if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG"); + z->restart_interval = get16(&z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = get16(&z->s)-2; + while (L > 0) { + int q = get8(&z->s); + int p = q >> 4; + int t = q & 15,i; + if (p != 0) return e("bad DQT type","Corrupt JPEG"); + if (t > 3) return e("bad DQT table","Corrupt JPEG"); + for (i=0; i < 64; ++i) + z->dequant[t][dezigzag[i]] = get8u(&z->s); + #ifdef STBI_SIMD + for (i=0; i < 64; ++i) + z->dequant2[t][i] = z->dequant[t][i]; + #endif + L -= 65; + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = get16(&z->s)-2; + while (L > 0) { + uint8 *v; + int sizes[16],i,m=0; + int q = get8(&z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = get8(&z->s); + m += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < m; ++i) + v[i] = get8u(&z->s); + L -= m; + } + return L==0; + } + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + skip(&z->s, get16(&z->s)-2); + return 1; + } + return 0; +} + +// after we see SOS +static int process_scan_header(jpeg *z) +{ + int i; + int Ls = get16(&z->s); + z->scan_n = get8(&z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = get8(&z->s), which; + int q = get8(&z->s); + for (which = 0; which < z->s.img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s.img_n) return 0; + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + get8(&z->s); // should be 63, but might be 0 + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + + return 1; +} + +static int process_frame_header(jpeg *z, int scan) +{ + stbi *s = &z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = get16(s); if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG + p = get8(s); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = get16(s); if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = get16(s); if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires + c = get8(s); + if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG"); // JFIF requires + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG"); + + for (i=0; i < s->img_n; ++i) { + z->img_comp[i].id = get8(s); + if (z->img_comp[i].id != i+1) // JFIF requires + if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files! + return e("bad component ID","Corrupt JPEG"); + q = get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG"); + z->img_comp[i].tq = get8(s); if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG"); + } + + if (scan != SCAN_load) return 1; + + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].raw_data = malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15); + if (z->img_comp[i].raw_data == NULL) { + for(--i; i >= 0; --i) { + free(z->img_comp[i].raw_data); + z->img_comp[i].data = NULL; + } + return e("outofmem", "Out of memory"); + } + // align blocks for installable-idct using mmx/sse + z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + z->img_comp[i].linebuf = NULL; + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define DNL(x) ((x) == 0xdc) +#define SOI(x) ((x) == 0xd8) +#define EOI(x) ((x) == 0xd9) +#define SOF(x) ((x) == 0xc0 || (x) == 0xc1) +#define SOS(x) ((x) == 0xda) + +static int decode_jpeg_header(jpeg *z, int scan) +{ + int m; + z->marker = MARKER_none; // initialize cached marker to empty + m = get_marker(z); + if (!SOI(m)) return e("no SOI","Corrupt JPEG"); + if (scan == SCAN_type) return 1; + m = get_marker(z); + while (!SOF(m)) { + if (!process_marker(z,m)) return 0; + m = get_marker(z); + while (m == MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG"); + m = get_marker(z); + } + } + if (!process_frame_header(z, scan)) return 0; + return 1; +} + +static int decode_jpeg_image(jpeg *j) +{ + int m; + j->restart_interval = 0; + if (!decode_jpeg_header(j, SCAN_load)) return 0; + m = get_marker(j); + while (!EOI(m)) { + if (SOS(m)) { + if (!process_scan_header(j)) return 0; + if (!parse_entropy_coded_data(j)) return 0; + if (j->marker == MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!at_eof(&j->s)) { + int x = get8(&j->s); + if (x == 255) { + j->marker = get8u(&j->s); + break; + } else if (x != 0) { + return 0; + } + } + // if we reach eof without hitting a marker, get_marker() below will fail and we'll eventually return 0 + } + } else { + if (!process_marker(j, m)) return 0; + } + m = get_marker(j); + } + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1, + int w, int hs); + +#define div4(x) ((uint8) ((x) >> 2)) + +static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static uint8* resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + uint8 *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = div4(n+input[i-1]); + out[i*2+1] = div4(n+input[i+1]); + } + out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define div16(x) ((uint8) ((x) >> 4)) + +static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = div16(3*t0 + t1 + 8); + out[i*2 ] = div16(3*t1 + t0 + 8); + } + out[w*2-1] = div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + in_far = in_far; + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +#define float2fixed(x) ((int) ((x) * 65536 + 0.5)) + +// 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro) +// VC6 without processor=Pro is generating multiple LEAs per multiply! +static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 16) + 32768; // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr*float2fixed(1.40200f); + g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f); + b = y_fixed + cb*float2fixed(1.77200f); + r >>= 16; + g >>= 16; + b >>= 16; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (uint8)r; + out[1] = (uint8)g; + out[2] = (uint8)b; + out[3] = 255; + out += step; + } +} + +#ifdef STBI_SIMD +static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row; + +void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func) +{ + stbi_YCbCr_installed = func; +} +#endif + + +// clean up the temporary component buffers +static void cleanup_jpeg(jpeg *j) +{ + int i; + for (i=0; i < j->s.img_n; ++i) { + if (j->img_comp[i].data) { + free(j->img_comp[i].raw_data); + j->img_comp[i].data = NULL; + } + if (j->img_comp[i].linebuf) { + free(j->img_comp[i].linebuf); + j->img_comp[i].linebuf = NULL; + } + } +} + +typedef struct +{ + resample_row_func resample; + uint8 *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi_resample; + +static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n; + // validate req_comp + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + z->s.img_n = 0; + + // load a jpeg image from whichever source + if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s.img_n; + + if (z->s.img_n == 3 && n < 3) + decode_n = 1; + else + decode_n = z->s.img_n; + + // resample and color-convert + { + int k; + uint i,j; + uint8 *output; + uint8 *coutput[4]; + + stbi_resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (uint8 *) malloc(z->s.img_x + 3); + if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s.img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2; + else r->resample = resample_row_generic; + } + + // can't error after this so, this is safe + output = (uint8 *) malloc(n * z->s.img_x * z->s.img_y + 1); + if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s.img_y; ++j) { + uint8 *out = output + n * z->s.img_x * j; + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + uint8 *y = coutput[0]; + if (z->s.img_n == 3) { + #ifdef STBI_SIMD + stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n); + #else + YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n); + #endif + } else + for (i=0; i < z->s.img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + uint8 *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s.img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + cleanup_jpeg(z); + *out_x = z->s.img_x; + *out_y = z->s.img_y; + if (comp) *comp = z->s.img_n; // report original components, not output + return output; + } +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + jpeg j; + start_file(&j.s, f); + return load_jpeg_image(&j, x,y,comp,req_comp); +} + +unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + #ifdef STBI_SMALL_STACK + unsigned char *result; + jpeg *j = (jpeg *) malloc(sizeof(*j)); + start_mem(&j->s, buffer, len); + result = load_jpeg_image(j,x,y,comp,req_comp); + free(j); + return result; + #else + jpeg j; + start_mem(&j.s, buffer,len); + return load_jpeg_image(&j, x,y,comp,req_comp); + #endif +} + +static int stbi_jpeg_info_raw(jpeg *j, int *x, int *y, int *comp) +{ + if (!decode_jpeg_header(j, SCAN_header)) + return 0; + if (x) *x = j->s.img_x; + if (y) *y = j->s.img_y; + if (comp) *comp = j->s.img_n; + return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_jpeg_test_file(FILE *f) +{ + int n,r; + jpeg j; + n = ftell(f); + start_file(&j.s, f); + r = decode_jpeg_header(&j, SCAN_type); + fseek(f,n,SEEK_SET); + return r; +} + +int stbi_jpeg_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + jpeg j; + long n = ftell(f); + int res; + start_file(&j.s, f); + res = stbi_jpeg_info_raw(&j, x, y, comp); + fseek(f, n, SEEK_SET); + return res; +} + +int stbi_jpeg_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = fopen(filename, "rb"); + int result; + if (!f) return e("can't fopen", "Unable to open file"); + result = stbi_jpeg_info_from_file(f, x, y, comp); + fclose(f); + return result; +} +#endif + +int stbi_jpeg_test_memory(stbi_uc const *buffer, int len) +{ + jpeg j; + start_mem(&j.s, buffer,len); + return decode_jpeg_header(&j, SCAN_type); +} + +int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + jpeg j; + start_mem(&j.s, buffer, len); + return stbi_jpeg_info_raw(&j, x, y, comp); +} + +#ifndef STBI_NO_STDIO +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define ZFAST_BITS 9 // accelerate all cases in default tables +#define ZFAST_MASK ((1 << ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + uint16 fast[1 << ZFAST_BITS]; + uint16 firstcode[16]; + int maxcode[17]; + uint16 firstsymbol[16]; + uint8 size[288]; + uint16 value[288]; +} zhuffman; + +__forceinline static int bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +__forceinline static int bit_reverse(int v, int bits) +{ + assert(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return bitreverse16(v) >> (16-bits); +} + +static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 255, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + assert(sizes[i] <= (1 << i)); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (uint16) code; + z->firstsymbol[i] = (uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + z->size[c] = (uint8)s; + z->value[c] = (uint16)i; + if (s <= ZFAST_BITS) { + int k = bit_reverse(next_code[s],s); + while (k < (1 << ZFAST_BITS)) { + z->fast[k] = (uint16) c; + k += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + uint8 *zbuffer, *zbuffer_end; + int num_bits; + uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + zhuffman z_length, z_distance; +} zbuf; + +__forceinline static int zget8(zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void fill_bits(zbuf *z) +{ + do { + assert(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +__forceinline static unsigned int zreceive(zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +__forceinline static int zhuffman_decode(zbuf *a, zhuffman *z) +{ + int b,s,k; + if (a->num_bits < 16) fill_bits(a); + b = z->fast[a->code_buffer & ZFAST_MASK]; + if (b < 0xffff) { + s = z->size[b]; + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; + } + + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = bit_reverse(a->code_buffer, 16); + for (s=ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + assert(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +static int expand(zbuf *z, int n) // need to make room for n bytes +{ + char *q; + int cur, limit; + if (!z->z_expandable) return e("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) realloc(z->zout_start, limit); + if (q == NULL) return e("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static int length_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static int length_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static int dist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int parse_huffman_block(zbuf *a) +{ + for(;;) { + int z = zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes + if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0; + *a->zout++ = (char) z; + } else { + uint8 *p; + int len,dist; + if (z == 256) return 1; + z -= 257; + len = length_base[z]; + if (length_extra[z]) len += zreceive(a, length_extra[z]); + z = zhuffman_decode(a, &a->z_distance); + if (z < 0) return e("bad huffman code","Corrupt PNG"); + dist = dist_base[z]; + if (dist_extra[z]) dist += zreceive(a, dist_extra[z]); + if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG"); + if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0; + p = (uint8 *) (a->zout - dist); + while (len--) + *a->zout++ = *p++; + } + } +} + +static int compute_huffman_codes(zbuf *a) +{ + static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + zhuffman z_codelength; + uint8 lencodes[286+32+137];//padding for maximum single op + uint8 codelength_sizes[19]; + int i,n; + + int hlit = zreceive(a,5) + 257; + int hdist = zreceive(a,5) + 1; + int hclen = zreceive(a,4) + 4; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (uint8) s; + } + if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < hlit + hdist) { + int c = zhuffman_decode(a, &z_codelength); + assert(c >= 0 && c < 19); + if (c < 16) + lencodes[n++] = (uint8) c; + else if (c == 16) { + c = zreceive(a,2)+3; + memset(lencodes+n, lencodes[n-1], c); + n += c; + } else if (c == 17) { + c = zreceive(a,3)+3; + memset(lencodes+n, 0, c); + n += c; + } else { + assert(c == 18); + c = zreceive(a,7)+11; + memset(lencodes+n, 0, c); + n += c; + } + } + if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG"); + if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int parse_uncompressed_block(zbuf *a) +{ + uint8 header[4]; + int len,nlen,k; + if (a->num_bits & 7) + zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns? + a->code_buffer >>= 8; + a->num_bits -= 8; + } + assert(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = (uint8) zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!expand(a, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int parse_zlib_header(zbuf *a) +{ + int cmf = zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = zget8(a); + if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +// @TODO: should statically initialize these for optimal thread safety +static uint8 default_length[288], default_distance[32]; +static void init_defaults(void) +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) default_length[i] = 8; + for ( ; i <= 255; ++i) default_length[i] = 9; + for ( ; i <= 279; ++i) default_length[i] = 7; + for ( ; i <= 287; ++i) default_length[i] = 8; + + for (i=0; i <= 31; ++i) default_distance[i] = 5; +} + +int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead +static int parse_zlib(zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = zreceive(a,1); + type = zreceive(a,2); + if (type == 0) { + if (!parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!default_distance[31]) init_defaults(); + if (!zbuild_huffman(&a->z_length , default_length , 288)) return 0; + if (!zbuild_huffman(&a->z_distance, default_distance, 32)) return 0; + } else { + if (!compute_huffman_codes(a)) return 0; + } + if (!parse_huffman_block(a)) return 0; + } + if (stbi_png_partial && a->zout - a->zout_start > 65536) + break; + } while (!final); + return 1; +} + +static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return parse_zlib(a, parse_header); +} + +char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + zbuf a; + char *p = (char *) malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer + len; + if (do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + free(a.zout_start); + return NULL; + } +} + +char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + zbuf a; + char *p = (char *) malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer + len; + if (do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + free(a.zout_start); + return NULL; + } +} + +int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + zbuf a; + char *p = (char *) malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer+len; + if (do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + free(a.zout_start); + return NULL; + } +} + +int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + + +typedef struct +{ + uint32 length; + uint32 type; +} chunk; + +#define PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) + +static chunk get_chunk_header(stbi *s) +{ + chunk c; + c.length = get32(s); + c.type = get32(s); + return c; +} + +static int check_png_header(stbi *s) +{ + static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi s; + uint8 *idata, *expanded, *out; +} png; + + +enum { + F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4, + F_avg_first, F_paeth_first +}; + +static uint8 first_row_filter[5] = +{ + F_none, F_sub, F_none, F_avg_first, F_paeth_first +}; + +static int paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +// create the png data from post-deflated data +static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y) +{ + stbi *s = &a->s; + uint32 i,j,stride = x*out_n; + int k; + int img_n = s->img_n; // copy it into a local for later + assert(out_n == s->img_n || out_n == s->img_n+1); + if (stbi_png_partial) y = 1; + a->out = (uint8 *) malloc(x * y * out_n); + if (!a->out) return e("outofmem", "Out of memory"); + if (!stbi_png_partial) { + if (s->img_x == x && s->img_y == y) { + if (raw_len != (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + } else { // interlaced: + if (raw_len < (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + } + } + for (j=0; j < y; ++j) { + uint8 *cur = a->out + stride*j; + uint8 *prior = cur - stride; + int filter = *raw++; + if (filter > 4) return e("invalid filter","Corrupt PNG"); + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + // handle first pixel explicitly + for (k=0; k < img_n; ++k) { + switch (filter) { + case F_none : cur[k] = raw[k]; break; + case F_sub : cur[k] = raw[k]; break; + case F_up : cur[k] = raw[k] + prior[k]; break; + case F_avg : cur[k] = raw[k] + (prior[k]>>1); break; + case F_paeth : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break; + case F_avg_first : cur[k] = raw[k]; break; + case F_paeth_first: cur[k] = raw[k]; break; + } + } + if (img_n != out_n) cur[img_n] = 255; + raw += img_n; + cur += out_n; + prior += out_n; + // this is a little gross, so that we don't switch per-pixel or per-component + if (img_n == out_n) { + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \ + for (k=0; k < img_n; ++k) + switch (filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-img_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-img_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break; + } + #undef CASE + } else { + assert(img_n+1 == out_n); + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \ + for (k=0; k < img_n; ++k) + switch (filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-out_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-out_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break; + } + #undef CASE + } + } + return 1; +} + +static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced) +{ + uint8 *final; + int p; + int save; + if (!interlaced) + return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y); + save = stbi_png_partial; + stbi_png_partial = 0; + + // de-interlacing + final = (uint8 *) malloc(a->s.img_x * a->s.img_y * out_n); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) { + free(final); + return 0; + } + for (j=0; j < y; ++j) + for (i=0; i < x; ++i) + memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n, + a->out + (j*x+i)*out_n, out_n); + free(a->out); + raw += (x*out_n+1)*y; + raw_len -= (x*out_n+1)*y; + } + } + a->out = final; + + stbi_png_partial = save; + return 1; +} + +static int compute_transparency(png *z, uint8 tc[3], int out_n) +{ + stbi *s = &z->s; + uint32 i, pixel_count = s->img_x * s->img_y; + uint8 *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + assert(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n) +{ + uint32 i, pixel_count = a->s.img_x * a->s.img_y; + uint8 *p, *temp_out, *orig = a->out; + + p = (uint8 *) malloc(pixel_count * pal_img_n); + if (p == NULL) return e("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + free(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi_unpremultiply_on_load = 0; +static int stbi_de_iphone_flag = 0; + +void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi_unpremultiply_on_load = flag_true_if_should_unpremultiply; +} +void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi_de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi_de_iphone(png *z) +{ + stbi *s = &z->s; + uint32 i, pixel_count = s->img_x * s->img_y; + uint8 *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + uint8 t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + assert(s->img_out_n == 4); + if (stbi_unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + uint8 a = p[3]; + uint8 t = p[0]; + if (a) { + p[0] = p[2] * 255 / a; + p[1] = p[1] * 255 / a; + p[2] = t * 255 / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + uint8 t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +static int parse_png_file(png *z, int scan, int req_comp) +{ + uint8 palette[1024], pal_img_n=0; + uint8 has_trans=0, tc[3]; + uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, iphone=0; + stbi *s = &z->s; + + if (!check_png_header(s)) return 0; + + if (scan == SCAN_type) return 1; + + for (;;) { + chunk c = get_chunk_header(s); + switch (c.type) { + case PNG_TYPE('C','g','B','I'): + iphone = stbi_de_iphone_flag; + skip(s, c.length); + break; + case PNG_TYPE('I','H','D','R'): { + int depth,color,comp,filter; + if (!first) return e("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return e("bad IHDR len","Corrupt PNG"); + s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)"); + s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)"); + depth = get8(s); if (depth != 8) return e("8bit only","PNG not supported: 8-bit only"); + color = get8(s); if (color > 6) return e("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG"); + comp = get8(s); if (comp) return e("bad comp method","Corrupt PNG"); + filter= get8(s); if (filter) return e("bad filter method","Corrupt PNG"); + interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + if (scan == SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case PNG_TYPE('P','L','T','E'): { + if (first) return e("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = get8u(s); + palette[i*4+1] = get8u(s); + palette[i*4+2] = get8u(s); + palette[i*4+3] = 255; + } + break; + } + + case PNG_TYPE('t','R','N','S'): { + if (first) return e("first not IHDR", "Corrupt PNG"); + if (z->idata) return e("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = get8u(s); + } else { + if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG"); + if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG"); + has_trans = 1; + for (k=0; k < s->img_n; ++k) + tc[k] = (uint8) get16(s); // non 8-bit images will be larger + } + break; + } + + case PNG_TYPE('I','D','A','T'): { + if (first) return e("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG"); + if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; } + if (ioff + c.length > idata_limit) { + uint8 *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + p = (uint8 *) realloc(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory"); + z->idata = p; + } + if (!getn(s, z->idata+ioff,c.length)) return e("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case PNG_TYPE('I','E','N','D'): { + uint32 raw_len; + if (first) return e("first not IHDR", "Corrupt PNG"); + if (scan != SCAN_load) return 1; + if (z->idata == NULL) return e("no IDAT","Corrupt PNG"); + z->expanded = (uint8 *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, 16384, (int *) &raw_len, !iphone); + if (z->expanded == NULL) return 0; // zlib should set error + free(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0; + if (has_trans) + if (!compute_transparency(z, tc, s->img_out_n)) return 0; + if (iphone && s->img_out_n > 2) + stbi_de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!expand_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } + free(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return e("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX chunk not known"; + invalid_chunk[0] = (uint8) (c.type >> 24); + invalid_chunk[1] = (uint8) (c.type >> 16); + invalid_chunk[2] = (uint8) (c.type >> 8); + invalid_chunk[3] = (uint8) (c.type >> 0); + #endif + return e(invalid_chunk, "PNG not supported: unknown chunk type"); + } + skip(s, c.length); + break; + } + // end of chunk, read and skip CRC + get32(s); + } +} + +static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp) +{ + unsigned char *result=NULL; + p->expanded = NULL; + p->idata = NULL; + p->out = NULL; + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + if (parse_png_file(p, SCAN_load, req_comp)) { + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s.img_out_n) { + result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y); + p->s.img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s.img_x; + *y = p->s.img_y; + if (n) *n = p->s.img_n; + } + free(p->out); p->out = NULL; + free(p->expanded); p->expanded = NULL; + free(p->idata); p->idata = NULL; + + return result; +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_file(&p.s, f); + return do_png(&p, x,y,comp,req_comp); +} + +unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_png_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_mem(&p.s, buffer,len); + return do_png(&p, x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_png_test_file(FILE *f) +{ + png p; + int n,r; + n = ftell(f); + start_file(&p.s, f); + r = parse_png_file(&p, SCAN_type,STBI_default); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_png_test_memory(stbi_uc const *buffer, int len) +{ + png p; + start_mem(&p.s, buffer, len); + return parse_png_file(&p, SCAN_type,STBI_default); +} + +static int stbi_png_info_raw(png *p, int *x, int *y, int *comp) +{ + if (!parse_png_file(p, SCAN_header, 0)) + return 0; + if (x) *x = p->s.img_x; + if (y) *y = p->s.img_y; + if (comp) *comp = p->s.img_n; + return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_png_info (char const *filename, int *x, int *y, int *comp) +{ + int res; + FILE *f = fopen(filename, "rb"); + if (!f) return 0; + res = stbi_png_info_from_file(f, x, y, comp); + fclose(f); + return res; +} + +int stbi_png_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + png p; + int res; + long n = ftell(f); + start_file(&p.s, f); + res = stbi_png_info_raw(&p, x, y, comp); + fseek(f, n, SEEK_SET); + return res; +} +#endif // !STBI_NO_STDIO + +int stbi_png_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + png p; + start_mem(&p.s, buffer, len); + return stbi_png_info_raw(&p, x, y, comp); +} + +// Microsoft/Windows BMP image + +static int bmp_test(stbi *s) +{ + int sz; + if (get8(s) != 'B') return 0; + if (get8(s) != 'M') return 0; + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + get32le(s); // discard data offset + sz = get32le(s); + if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1; + return 0; +} + +#ifndef STBI_NO_STDIO +int stbi_bmp_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s,f); + r = bmp_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_bmp_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_test(&s); +} + +// returns 0..31 for the highest set bit +static int high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +static int shiftsigned(int v, int shift, int bits) +{ + int result; + int z=0; + + if (shift < 0) v <<= -shift; + else v >>= shift; + result = v; + + z = bits; + while (z < 8) { + result += v >> z; + z += bits; + } + return result; +} + +static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + uint8 *out; + unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0; + stbi_uc pal[256][4]; + int psize=0,i,j,compress=0,width; + int bpp, flip_vertically, pad, target, offset, hsz; + if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP"); + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + offset = get32le(s); + hsz = get32le(s); + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = get16le(s); + s->img_y = get16le(s); + } else { + s->img_x = get32le(s); + s->img_y = get32le(s); + } + if (get16le(s) != 1) return epuc("bad BMP", "bad BMP"); + bpp = get16le(s); + if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit"); + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + if (hsz == 12) { + if (bpp < 24) + psize = (offset - 14 - 24) / 3; + } else { + compress = get32le(s); + if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE"); + get32le(s); // discard sizeof + get32le(s); // discard hres + get32le(s); // discard vres + get32le(s); // discard colorsused + get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + get32le(s); + get32le(s); + get32le(s); + get32le(s); + } + if (bpp == 16 || bpp == 32) { + mr = mg = mb = 0; + if (compress == 0) { + if (bpp == 32) { + mr = 0xffu << 16; + mg = 0xffu << 8; + mb = 0xffu << 0; + ma = 0xffu << 24; + fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255 + } else { + mr = 31u << 10; + mg = 31u << 5; + mb = 31u << 0; + } + } else if (compress == 3) { + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (mr == mg && mg == mb) { + // ?!?!? + return epuc("bad BMP", "bad BMP"); + } + } else + return epuc("bad BMP", "bad BMP"); + } + } else { + assert(hsz == 108); + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + ma = get32le(s); + get32le(s); // discard color space + for (i=0; i < 12; ++i) + get32le(s); // discard color space parameters + } + if (bpp < 16) + psize = (offset - 14 - hsz) >> 2; + } + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + out = (stbi_uc *) malloc(target * s->img_x * s->img_y); + if (!out) return epuc("outofmem", "Out of memory"); + if (bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { free(out); return epuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = get8u(s); + pal[i][1] = get8u(s); + pal[i][0] = get8u(s); + if (hsz != 12) get8(s); + pal[i][3] = 255; + } + skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); + if (bpp == 4) width = (s->img_x + 1) >> 1; + else if (bpp == 8) width = s->img_x; + else { free(out); return epuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=get8(s),v2=0; + if (bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (bpp == 8) ? get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + skip(s, pad); + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + skip(s, offset - 14 - hsz); + if (bpp == 24) width = 3 * s->img_x; + else if (bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (bpp == 24) { + easy = 1; + } else if (bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP"); + // right shift amt to put high bit in position #7 + rshift = high_bit(mr)-7; rcount = bitcount(mr); + gshift = high_bit(mg)-7; gcount = bitcount(mr); + bshift = high_bit(mb)-7; bcount = bitcount(mr); + ashift = high_bit(ma)-7; acount = bitcount(mr); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + int a; + out[z+2] = get8u(s); + out[z+1] = get8u(s); + out[z+0] = get8u(s); + z += 3; + a = (easy == 2 ? get8(s) : 255); + if (target == 4) out[z++] = (uint8) a; + } + } else { + for (i=0; i < (int) s->img_x; ++i) { + uint32 v = (bpp == 16 ? get16le(s) : get32le(s)); + int a; + out[z++] = (uint8) shiftsigned(v & mr, rshift, rcount); + out[z++] = (uint8) shiftsigned(v & mg, gshift, gcount); + out[z++] = (uint8) shiftsigned(v & mb, bshift, bcount); + a = (ma ? shiftsigned(v & ma, ashift, acount) : 255); + if (target == 4) out[z++] = (uint8) a; + } + } + skip(s, pad); + } + } + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = target; + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_bmp_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return bmp_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_load(&s, x,y,comp,req_comp); +} + +// Targa Truevision - TGA +// by Jonathan Dummer + +static int tga_info(stbi *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp; + int sz; + get8u(s); // discard Offset + sz = get8u(s); // color type + if( sz > 1 ) return 0; // only RGB or indexed allowed + sz = get8u(s); // image type + // only RGB or grey allowed, +/- RLE + if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0; + get16le(s); // discard palette start + get16le(s); // discard palette length + get8(s); // discard bits per palette color entry + get16le(s); // discard x origin + get16le(s); // discard y origin + tga_w = get16le(s); + if( tga_w < 1 ) return 0; // test width + tga_h = get16le(s); + if( tga_h < 1 ) return 0; // test height + sz = get8(s); // bits per pixel + // only RGB or RGBA or grey allowed + if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32)) return 0; + tga_comp = sz; + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp / 8; + return 1; // seems to have passed everything +} + +#ifndef STBI_NO_STDIO +int stbi_tga_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + stbi s; + int r; + long n = ftell(f); + start_file(&s, f); + r = tga_info(&s, x, y, comp); + fseek(f, n, SEEK_SET); + return r; +} +#endif + +int stbi_tga_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_info(&s, x, y, comp); +} + +static int tga_test(stbi *s) +{ + int sz; + get8u(s); // discard Offset + sz = get8u(s); // color type + if ( sz > 1 ) return 0; // only RGB or indexed allowed + sz = get8u(s); // image type + if ( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE + get16(s); // discard palette start + get16(s); // discard palette length + get8(s); // discard bits per palette color entry + get16(s); // discard x origin + get16(s); // discard y origin + if ( get16(s) < 1 ) return 0; // test width + if ( get16(s) < 1 ) return 0; // test height + sz = get8(s); // bits per pixel + if ( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0; // only RGB or RGBA or grey allowed + return 1; // seems to have passed everything +} + +#ifndef STBI_NO_STDIO +int stbi_tga_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = tga_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_tga_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_test(&s); +} + +static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + // read in the TGA header stuff + int tga_offset = get8u(s); + int tga_indexed = get8u(s); + int tga_image_type = get8u(s); + int tga_is_RLE = 0; + int tga_palette_start = get16le(s); + int tga_palette_len = get16le(s); + int tga_palette_bits = get8u(s); + int tga_x_origin = get16le(s); + int tga_y_origin = get16le(s); + int tga_width = get16le(s); + int tga_height = get16le(s); + int tga_bits_per_pixel = get8u(s); + int tga_inverted = get8u(s); + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4]; + unsigned char trans_data[4]; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + /* int tga_alpha_bits = tga_inverted & 15; */ + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // error check + if ( //(tga_indexed) || + (tga_width < 1) || (tga_height < 1) || + (tga_image_type < 1) || (tga_image_type > 3) || + ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) && + (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32)) + ) + { + return NULL; + } + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) + { + tga_bits_per_pixel = tga_palette_bits; + } + + // tga info + *x = tga_width; + *y = tga_height; + if ( (req_comp < 1) || (req_comp > 4) ) + { + // just use whatever the file was + req_comp = tga_bits_per_pixel / 8; + *comp = req_comp; + } else + { + // force a new number of components + *comp = tga_bits_per_pixel/8; + } + tga_data = (unsigned char*)malloc( tga_width * tga_height * req_comp ); + + // skip to the data's starting position (offset usually = 0) + skip(s, tga_offset ); + // do I need to load a palette? + if ( tga_indexed ) + { + // any data to skip? (offset usually = 0) + skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)malloc( tga_palette_len * tga_palette_bits / 8 ); + if (!getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 )) + return NULL; + } + // load the data + trans_data[0] = trans_data[1] = trans_data[2] = trans_data[3] = 0; + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE chunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = get8u(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in 1 byte, then perform the lookup + int pal_idx = get8u(s); + if ( pal_idx >= tga_palette_len ) + { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_bits_per_pixel / 8; + for (j = 0; j*8 < tga_bits_per_pixel; ++j) + { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else + { + // read in the data raw + for (j = 0; j*8 < tga_bits_per_pixel; ++j) + { + raw_data[j] = get8u(s); + } + } + // convert raw to the intermediate format + switch (tga_bits_per_pixel) + { + case 8: + // Luminous => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 16: + // Luminous,Alpha => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[1]; + break; + case 24: + // BGR => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 32: + // BGRA => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[3]; + break; + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + // convert to final format + switch (req_comp) + { + case 1: + // RGBA => Luminance + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + break; + case 2: + // RGBA => Luminance,Alpha + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + tga_data[i*req_comp+1] = trans_data[3]; + break; + case 3: + // RGBA => RGB + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + break; + case 4: + // RGBA => RGBA + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + tga_data[i*req_comp+3] = trans_data[3]; + break; + } + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * req_comp; + int index2 = (tga_height - 1 - j) * tga_width * req_comp; + for (i = tga_width * req_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + free( tga_palette ); + } + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_tga_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return tga_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_load(&s, x,y,comp,req_comp); +} + + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +static int psd_test(stbi *s) +{ + if (get32(s) != 0x38425053) return 0; // "8BPS" + else return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_psd_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = psd_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_psd_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_test(&s); +} + +static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + int pixelCount; + int channelCount, compression; + int channel, i, count, len; + int w,h; + uint8 *out; + + // Check identifier + if (get32(s) != 0x38425053) // "8BPS" + return epuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (get16(s) != 1) + return epuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = get16(s); + if (channelCount < 0 || channelCount > 16) + return epuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = get32(s); + w = get32(s); + + // Make sure the depth is 8 bits. + if (get16(s) != 8) + return epuc("unsupported bit depth", "PSD bit depth is not 8 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (get16(s) != 3) + return epuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + skip(s,get32(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + skip(s, get32(s) ); + + // Skip the reserved data. + skip(s, get32(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = get16(s); + if (compression > 1) + return epuc("bad compression", "PSD has an unknown compression format"); + + // Create the destination image. + out = (stbi_uc *) malloc(4 * w*h); + if (!out) return epuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4; + } else { + // Read the RLE data. + count = 0; + while (count < pixelCount) { + len = get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + count += len; + while (len) { + *p = get8u(s); + p += 4; + len--; + } + } else if (len > 128) { + uint8 val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len ^= 0x0FF; + len += 2; + val = get8u(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out + channel; + if (channel > channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4; + } else { + // Read the data. + for (i = 0; i < pixelCount; i++) + *p = get8u(s), p += 4; + } + } + } + + if (req_comp && req_comp != 4) { + out = convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // convert_format frees input on failure + } + + if (comp) *comp = channelCount; + *y = h; + *x = w; + + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_psd_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return psd_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_load(&s, x,y,comp,req_comp); +} + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +static int pic_is4(stbi *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int pic_test(stbi *s) +{ + int i; + + if (!pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + get8(s); + + if (!pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} pic_packet_t; + +static stbi_uc *pic_readval(stbi *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (at_eof(s)) return epuc("bad file","PIC file too short"); + dest[i]=get8u(s); + } + } + + return dest; +} + +static void pic_copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *pic_load2(stbi *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + pic_packet_t packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + pic_packet_t *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return epuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = get8(s); + packet->size = get8u(s); + packet->type = get8u(s); + packet->channel = get8u(s); + + act_comp |= packet->channel; + + if (at_eof(s)) return epuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return epuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return epuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=get8u(s); + if (at_eof(s)) return epuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (uint8) left; + + if (!pic_readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = get8(s), i; + if (at_eof(s)) return epuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + int i; + + if (count==128) + count = get16(s); + else + count -= 127; + if (count > left) + return epuc("bad file","scanline overrun"); + + if (!pic_readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return epuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static stbi_uc *pic_load(stbi *s,int *px,int *py,int *comp,int req_comp) +{ + stbi_uc *result; + int i, x,y; + + for (i=0; i<92; ++i) + get8(s); + + x = get16(s); + y = get16(s); + if (at_eof(s)) return epuc("bad file","file too short (pic header)"); + if ((1 << 28) / x < y) return epuc("too large", "Image too large to decode"); + + get32(s); //skip `ratio' + get16(s); //skip `fields' + get16(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) malloc(x*y*4); + memset(result, 0xff, x*y*4); + + if (!pic_load2(s,x,y,comp, result)) { + free(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=convert_format(result,4,req_comp,x,y); + + return result; +} + +int stbi_pic_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s,buffer,len); + return pic_test(&s); +} + +stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s,buffer,len); + return pic_load(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_pic_test_file(FILE *f) +{ + int result; + long l = ftell(f); + stbi s; + start_file(&s,f); + result = pic_test(&s); + fseek(f,l,SEEK_SET); + return result; +} + +stbi_uc *stbi_pic_load(char const *filename,int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *result; + FILE *f=fopen(filename,"rb"); + if (!f) return 0; + result = stbi_pic_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +stbi_uc *stbi_pic_load_from_file(FILE *f,int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s,f); + return pic_load(&s,x,y,comp,req_comp); +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb +typedef struct stbi_gif_lzw_struct { + int16 prefix; + uint8 first; + uint8 suffix; +} stbi_gif_lzw; + +typedef struct stbi_gif_struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + int flags, bgindex, ratio, transparent, eflags; + uint8 pal[256][4]; + uint8 lpal[256][4]; + stbi_gif_lzw codes[4096]; + uint8 *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; +} stbi_gif; + +static int gif_test(stbi *s) +{ + int sz; + if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') return 0; + sz = get8(s); + if (sz != '9' && sz != '7') return 0; + if (get8(s) != 'a') return 0; + return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_gif_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s,f); + r = gif_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_gif_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return gif_test(&s); +} + +static void stbi_gif_parse_colortable(stbi *s, uint8 pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = get8u(s); + pal[i][1] = get8u(s); + pal[i][0] = get8u(s); + pal[i][3] = transp ? 0 : 255; + } +} + +static int stbi_gif_header(stbi *s, stbi_gif *g, int *comp, int is_info) +{ + uint8 version; + if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') + return e("not GIF", "Corrupt GIF"); + + version = get8u(s); + if (version != '7' && version != '9') return e("not GIF", "Corrupt GIF"); + if (get8(s) != 'a') return e("not GIF", "Corrupt GIF"); + + failure_reason = ""; + g->w = get16le(s); + g->h = get16le(s); + g->flags = get8(s); + g->bgindex = get8(s); + g->ratio = get8(s); + g->transparent = -1; + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi_gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi_gif_info_raw(stbi *s, int *x, int *y, int *comp) +{ + stbi_gif g; + if (!stbi_gif_header(s, &g, comp, 1)) return 0; + if (x) *x = g.w; + if (y) *y = g.h; + return 1; +} + +static void stbi_out_gif_code(stbi_gif *g, uint16 code) +{ + uint8 *p, *c; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi_out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + p = &g->out[g->cur_x + g->cur_y]; + c = &g->color_table[g->codes[code].suffix * 4]; + + if (c[3] >= 128) { + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static uint8 *stbi_process_gif_raster(stbi *s, stbi_gif *g) +{ + uint8 lzw_cs; + int32 len, code; + uint32 first; + int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi_gif_lzw *p; + + lzw_cs = get8u(s); + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (code = 0; code < clear; code++) { + g->codes[code].prefix = -1; + g->codes[code].first = (uint8) code; + g->codes[code].suffix = (uint8) code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (int32) get8(s) << valid_bits; + valid_bits += 8; + } else { + int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + skip(s, len); + while ((len = get8(s)) > 0) + skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) return epuc("no clear code", "Corrupt GIF"); + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 4096) return epuc("too many codes", "Corrupt GIF"); + p->prefix = (int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return epuc("illegal code in raster", "Corrupt GIF"); + + stbi_out_gif_code(g, (uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return epuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +static void stbi_fill_gif_background(stbi_gif *g) +{ + int i; + uint8 *c = g->pal[g->bgindex]; + // @OPTIMIZE: write a dword at a time + for (i = 0; i < g->w * g->h * 4; i += 4) { + uint8 *p = &g->out[i]; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +static uint8 *stbi_gif_load_next(stbi *s, stbi_gif *g, int *comp, int req_comp) +{ + int i; + uint8 *old_out = 0; + + if (g->out == 0) { + if (!stbi_gif_header(s, g, comp,0)) return 0; // failure_reason set by stbi_gif_header + g->out = (uint8 *) malloc(4 * g->w * g->h); + if (g->out == 0) return epuc("outofmem", "Out of memory"); + stbi_fill_gif_background(g); + } else { + // animated-gif-only path + if (((g->eflags & 0x1C) >> 2) == 3) { + old_out = g->out; + g->out = (uint8 *) malloc(4 * g->w * g->h); + if (g->out == 0) return epuc("outofmem", "Out of memory"); + memcpy(g->out, old_out, g->w*g->h*4); + } + } + + for (;;) { + switch (get8(s)) { + case 0x2C: /* Image Descriptor */ + { + int32 x, y, w, h; + uint8 *o; + + x = get16le(s); + y = get16le(s); + w = get16le(s); + h = get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return epuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi_gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (uint8 *) g->lpal; + } else if (g->flags & 0x80) { + for (i=0; i < 256; ++i) // @OPTIMIZE: reset only the previous transparent + g->pal[i][3] = 255; + if (g->transparent >= 0 && (g->eflags & 0x01)) + g->pal[g->transparent][3] = 0; + g->color_table = (uint8 *) g->pal; + } else + return epuc("missing color table", "Corrupt GIF"); + + o = stbi_process_gif_raster(s, g); + if (o == NULL) return NULL; + + if (req_comp && req_comp != 4) + o = convert_format(o, 4, req_comp, g->w, g->h); + return o; + } + + case 0x21: // Comment Extension. + { + int len; + if (get8(s) == 0xF9) { // Graphic Control Extension. + len = get8(s); + if (len == 4) { + g->eflags = get8(s); + get16le(s); // delay + g->transparent = get8(s); + } else { + skip(s, len); + break; + } + } + while ((len = get8(s)) != 0) + skip(s, len); + break; + } + + case 0x3B: // gif stream termination code + return (uint8 *) 1; + + default: + return epuc("unknown code", "Corrupt GIF"); + } + } +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_gif_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + uint8 *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_gif_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_gif_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + uint8 *u = 0; + stbi s; + stbi_gif g={0}; + start_file(&s, f); + + u = stbi_gif_load_next(&s, &g, comp, req_comp); + if (u == (void *) 1) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + } + + return u; +} +#endif + +stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + uint8 *u = 0; + stbi s; + stbi_gif g={0}; + start_mem(&s, buffer, len); + u = stbi_gif_load_next(&s, &g, comp, req_comp); + if (u == (void *) 1) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + } + return u; +} + +#ifndef STBI_NO_STDIO +int stbi_gif_info (char const *filename, int *x, int *y, int *comp) +{ + int res; + FILE *f = fopen(filename, "rb"); + if (!f) return 0; + res = stbi_gif_info_from_file(f, x, y, comp); + fclose(f); + return res; +} + +int stbi_gif_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + stbi s; + int res; + long n = ftell(f); + start_file(&s, f); + res = stbi_gif_info_raw(&s, x, y, comp); + fseek(f, n, SEEK_SET); + return res; +} +#endif // !STBI_NO_STDIO + +int stbi_gif_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi s; + start_mem(&s, buffer, len); + return stbi_gif_info_raw(&s, x, y, comp); +} + + + + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int hdr_test(stbi *s) +{ + const char *signature = "#?RADIANCE\n"; + int i; + for (i=0; signature[i]; ++i) + if (get8(s) != signature[i]) + return 0; + return 1; +} + +int stbi_hdr_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return hdr_test(&s); +} + +#ifndef STBI_NO_STDIO +int stbi_hdr_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = hdr_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +#define HDR_BUFLEN 1024 +static char *hdr_gettoken(stbi *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) get8(z); + + while (!at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == HDR_BUFLEN-1) { + // flush to end of line + while (!at_eof(z) && get8(z) != '\n') + ; + break; + } + c = (char) get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + + +static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + char buffer[HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + + + // Check identifier + if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0) + return epf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return epf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = strtol(token, NULL, 10); + + *x = width; + *y = height; + + *comp = 3; + if (req_comp == 0) req_comp = 3; + + // Read data + hdr_data = (float *) malloc(height * width * req_comp * sizeof(float)); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + getn(s, rgbe, 4); + hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = get8(s); + c2 = get8(s); + len = get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + uint8 rgbe[4]; + rgbe[0] = (uint8) c1; + rgbe[1] = (uint8) c2; + rgbe[2] = (uint8) len; + rgbe[3] = (uint8) get8u(s); + hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + free(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= get8(s); + if (len != width) { free(hdr_data); free(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) scanline = (stbi_uc *) malloc(width * 4); + + for (k = 0; k < 4; ++k) { + i = 0; + while (i < width) { + count = get8u(s); + if (count > 128) { + // Run + value = get8u(s); + count -= 128; + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = get8u(s); + } + } + } + for (i=0; i < width; ++i) + hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + free(scanline); + } + + return hdr_data; +} + +#ifndef STBI_NO_STDIO +float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s,f); + return hdr_load(&s,x,y,comp,req_comp); +} +#endif + +float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s,buffer, len); + return hdr_load(&s,x,y,comp,req_comp); +} + +#endif // STBI_NO_HDR + + +#ifndef STBI_NO_STDIO +int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = fopen(filename, "rb"); + int result; + if (!f) return e("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + if (stbi_jpeg_info_from_file(f, x, y, comp)) + return 1; + if (stbi_png_info_from_file(f, x, y, comp)) + return 1; + if (stbi_gif_info_from_file(f, x, y, comp)) + return 1; + // @TODO: stbi_bmp_info_from_file + // @TODO: stbi_psd_info_from_file + #ifndef STBI_NO_HDR + // @TODO: stbi_hdr_info_from_file + #endif + // test tga last because it's a crappy test! + if (stbi_tga_info_from_file(f, x, y, comp)) + return 1; + return e("unknown image type", "Image not of any known type, or corrupt"); +} +#endif // !STBI_NO_STDIO + +int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + if (stbi_jpeg_info_from_memory(buffer, len, x, y, comp)) + return 1; + if (stbi_png_info_from_memory(buffer, len, x, y, comp)) + return 1; + if (stbi_gif_info_from_memory(buffer, len, x, y, comp)) + return 1; + // @TODO: stbi_bmp_info_from_memory + // @TODO: stbi_psd_info_from_memory + #ifndef STBI_NO_HDR + // @TODO: stbi_hdr_info_from_memory + #endif + // test tga last because it's a crappy test! + if (stbi_tga_info_from_memory(buffer, len, x, y, comp)) + return 1; + return e("unknown image type", "Image not of any known type, or corrupt"); +} + +#endif // STBI_HEADER_FILE_ONLY + +/* + revision history: + 1.29 (2010-08-16) various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-uint8 to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.e. Janez (U+017D)emva) + 1.21 fix use of 'uint8' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 2008-08-02 + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less + than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant +*/ Index: ps/trunk/libraries/source/nvtt/src/src/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/CMakeLists.txt @@ -1,23 +1,21 @@ +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/poshlib) +INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/stb) + SUBDIRS(nvcore) SUBDIRS(nvmath) SUBDIRS(nvimage) +SUBDIRS(nvthread) SUBDIRS(nvtt) +SUBDIRS(bc6h) +SUBDIRS(bc7) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +# Make PNG optional (we disable it on macOS) +SET(PNG TRUE CACHE BOOL "") -# initial variables -SET(GLUT TRUE CACHE BOOL "") -SET(GLEW TRUE CACHE BOOL "") -SET(CG TRUE CACHE BOOL "") -SET(CUDA TRUE CACHE BOOL "") -SET(OPENEXR TRUE CACHE BOOL "") -SET(JPEG TRUE CACHE BOOL "") -SET(PNG TRUE CACHE BOOL "") -SET(TIFF TRUE CACHE BOOL "") - -# OpenGL -INCLUDE(FindOpenGL) +# OpenGL +INCLUDE(FindOpenGL) IF(OPENGL_FOUND) MESSAGE(STATUS "Looking for OpenGL - found") ELSE(OPENGL_FOUND) @@ -25,15 +23,12 @@ ENDIF(OPENGL_FOUND) # GLUT -IF(GLUT) - INCLUDE(${NV_CMAKE_DIR}/FindGLUT.cmake) - #INCLUDE(FindGLUT) - IF(GLUT_FOUND) - MESSAGE(STATUS "Looking for GLUT - found") - ELSE(GLUT_FOUND) - MESSAGE(STATUS "Looking for GLUT - not found") - ENDIF(GLUT_FOUND) -ENDIF(GLUT) +#INCLUDE(FindGLUT) +#IF(GLUT_FOUND) +# MESSAGE(STATUS "Looking for GLUT - found") +#ELSE(GLUT_FOUND) +# MESSAGE(STATUS "Looking for GLUT - not found") +#ENDIF(GLUT_FOUND) # DirectX INCLUDE(${NV_CMAKE_DIR}/FindDirectX.cmake) @@ -44,105 +39,118 @@ ENDIF(DX10_FOUND) # GLEW -IF(GLEW) - INCLUDE(${NV_CMAKE_DIR}/FindGLEW.cmake) - IF(GLEW_FOUND) - MESSAGE(STATUS "Looking for GLEW - found") - ELSE(GLEW_FOUND) - MESSAGE(STATUS "Looking for GLEW - not found") - ENDIF(GLEW_FOUND) -ENDIF(GLEW) +#INCLUDE(${NV_CMAKE_DIR}/FindGLEW.cmake) +#IF(GLEW_FOUND) +# MESSAGE(STATUS "Looking for GLEW - found") +#ELSE(GLEW_FOUND) +# MESSAGE(STATUS "Looking for GLEW - not found") +#ENDIF(GLEW_FOUND) # Cg -IF(CG) - INCLUDE(${NV_CMAKE_DIR}/FindCg.cmake) - IF(CG_FOUND) - MESSAGE(STATUS "Looking for Cg - found") - ELSE(CG_FOUND) - MESSAGE(STATUS "Looking for Cg - not found") - ENDIF(CG_FOUND) -ENDIF(CG) +#INCLUDE(${NV_CMAKE_DIR}/FindCg.cmake) +#IF(CG_FOUND) +# MESSAGE(STATUS "Looking for Cg - found") +#ELSE(CG_FOUND) +# MESSAGE(STATUS "Looking for Cg - not found") +#ENDIF(CG_FOUND) # CUDA -IF(CUDA) - INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake) - IF(CUDA_FOUND) - SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise") - MESSAGE(STATUS "Looking for CUDA - found") - ELSE(CUDA_FOUND) - MESSAGE(STATUS "Looking for CUDA - not found") - ENDIF(CUDA_FOUND) -ENDIF(CUDA) +#FIND_PACKAGE(CUDA) +#IF(CUDA_FOUND) +# IF(MINGW) +# MESSAGE(STATUS "Looking for CUDA - not supported on MinGW") +# UNSET(CUDA_FOUND) +# ENDIF(MINGW) +# IF(CUDA_FOUND) +# SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise") +# MESSAGE(STATUS "Looking for CUDA - found") +# ENDIF(CUDA_FOUND) +#ELSE(CUDA_FOUND) +# MESSAGE(STATUS "Looking for CUDA - not found") +#ENDIF(CUDA_FOUND) # Maya -INCLUDE(${NV_CMAKE_DIR}/FindMaya.cmake) -IF(MAYA_FOUND) - SET(HAVE_MAYA ${MAYA_FOUND} CACHE BOOL "Set to TRUE if Maya is found, FALSE otherwise") - MESSAGE(STATUS "Looking for Maya - found") -ELSE(MAYA_FOUND) - MESSAGE(STATUS "Looking for Maya - not found") -ENDIF(MAYA_FOUND) - -# JPEG -IF(JPEG) - INCLUDE(FindJPEG) - IF(JPEG_FOUND) - SET(HAVE_JPEG ${JPEG_FOUND} CACHE BOOL "Set to TRUE if JPEG is found, FALSE otherwise") - MESSAGE(STATUS "Looking for JPEG - found") - ELSE(JPEG_FOUND) - MESSAGE(STATUS "Looking for JPEG - not found") - ENDIF(JPEG_FOUND) -ENDIF(JPEG) - -# PNG -IF(PNG) - INCLUDE(FindPNG) - IF(PNG_FOUND) - SET(HAVE_PNG ${PNG_FOUND} CACHE BOOL "Set to TRUE if PNG is found, FALSE otherwise") - MESSAGE(STATUS "Looking for PNG - found") - ELSE(PNG_FOUND) - MESSAGE(STATUS "Looking for PNG - not found") - ENDIF(PNG_FOUND) -ENDIF(PNG) - -# TIFF -IF(TIFF) - INCLUDE(FindTIFF) - IF(TIFF_FOUND) - SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise") - MESSAGE(STATUS "Looking for TIFF - found") - ELSE(TIFF_FOUND) - MESSAGE(STATUS "Looking for TIFF - not found") - ENDIF(TIFF_FOUND) -ENDIF(TIFF) - -# OpenEXR -IF(OPENEXR) - INCLUDE(${NV_CMAKE_DIR}/FindOpenEXR.cmake) - IF(OPENEXR_FOUND) - SET(HAVE_OPENEXR ${OPENEXR_FOUND} CACHE BOOL "Set to TRUE if OpenEXR is found, FALSE otherwise") - MESSAGE(STATUS "Looking for OpenEXR - found") - ELSE(OPENEXR_FOUND) - MESSAGE(STATUS "Looking for OpenEXR - not found") - ENDIF(OPENEXR_FOUND) -ENDIF(OPENEXR) - -# Qt -# We don't actually use this and it requires having Qt4 installed, so why is this in here? -#FIND_PACKAGE(Qt4) +#INCLUDE(${NV_CMAKE_DIR}/FindMaya.cmake) +#IF(MAYA_FOUND) +# SET(HAVE_MAYA ${MAYA_FOUND} CACHE BOOL "Set to TRUE if Maya is found, FALSE otherwise") +# MESSAGE(STATUS "Looking for Maya - found") +#ELSE(MAYA_FOUND) +# MESSAGE(STATUS "Looking for Maya - not found") +#ENDIF(MAYA_FOUND) + +# FreeImage +#INCLUDE(${NV_CMAKE_DIR}/FindFreeImage.cmake) +#IF(FREEIMAGE_FOUND) +# SET(HAVE_FREEIMAGE ${FREEIMAGE_FOUND} CACHE BOOL "Set to TRUE if FreeImage is found, FALSE otherwise") +# MESSAGE(STATUS "Looking for FreeImage - found") +#ELSE(FREEIMAGE_FOUND) +# MESSAGE(STATUS "Looking for FreeImage - not found") +#ENDIF(FREEIMAGE_FOUND) + +# JPEG +#INCLUDE(FindJPEG) +#IF(JPEG_FOUND) +# SET(HAVE_JPEG ${JPEG_FOUND} CACHE BOOL "Set to TRUE if JPEG is found, FALSE otherwise") +# MESSAGE(STATUS "Looking for JPEG - found") +#ELSE(JPEG_FOUND) +# MESSAGE(STATUS "Looking for JPEG - not found") +#ENDIF(JPEG_FOUND) + +# PNG +IF(PNG) + INCLUDE(FindPNG) + IF(PNG_FOUND) + SET(HAVE_PNG ${PNG_FOUND} CACHE BOOL "Set to TRUE if PNG is found, FALSE otherwise") + MESSAGE(STATUS "Looking for PNG - found") + ELSE(PNG_FOUND) + MESSAGE(STATUS "Looking for PNG - not found") + ENDIF(PNG_FOUND) +ENDIF(PNG) + +# TIFF +#SET(TIFF_NAMES libtiff) +#INCLUDE(FindTIFF) +#IF(TIFF_FOUND) +# SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise") +# MESSAGE(STATUS "Looking for TIFF - found") +#ELSE(TIFF_FOUND) +# MESSAGE(STATUS "Looking for TIFF - not found") +#ENDIF(TIFF_FOUND) + +# OpenEXR +#INCLUDE(${NV_CMAKE_DIR}/FindOpenEXR.cmake) +#IF(OPENEXR_FOUND) +# SET(HAVE_OPENEXR ${OPENEXR_FOUND} CACHE BOOL "Set to TRUE if OpenEXR is found, FALSE otherwise") +# MESSAGE(STATUS "Looking for OpenEXR - found") +#ELSE(OPENEXR_FOUND) +# MESSAGE(STATUS "Looking for OpenEXR - not found") +#ENDIF(OPENEXR_FOUND) + +# OpenMP +#INCLUDE(FindOpenMP) +#IF(OPENMP_FOUND) +# SET(HAVE_OPENMP ${OPENMP_FOUND} CACHE BOOL "Set to TRUE if OpenMP is found, FALSE otherwise") +# MESSAGE(STATUS "Looking for OpenMP - found") +# SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +#ELSE(OPENMP_FOUND) +# MESSAGE(STATUS "Looking for OpenMP - not found") +#ENDIF(OPENMP_FOUND) # Threads FIND_PACKAGE(Threads REQUIRED) MESSAGE(STATUS "Use thread library: ${CMAKE_THREAD_LIBS_INIT}") +SET(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT}) # configuration file INCLUDE(CheckIncludeFiles) -CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) -CHECK_INCLUDE_FILES(stdarg.h HAVE_STDARG_H) -CHECK_INCLUDE_FILES(signal.h HAVE_SIGNAL_H) -CHECK_INCLUDE_FILES(execinfo.h HAVE_EXECINFO_H) -CHECK_INCLUDE_FILES(malloc.h HAVE_MALLOC_H) +CHECK_INCLUDE_FILES("unistd.h" HAVE_UNISTD_H) +CHECK_INCLUDE_FILES("stdarg.h" HAVE_STDARG_H) +CHECK_INCLUDE_FILES("signal.h" HAVE_SIGNAL_H) +CHECK_INCLUDE_FILES("execinfo.h" HAVE_EXECINFO_H) +CHECK_INCLUDE_FILES("malloc.h" HAVE_MALLOC_H) +CHECK_INCLUDE_FILES("dispatch/dispatch.h" HAVE_DISPATCH_H) CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/nvconfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/nvconfig.h) Index: ps/trunk/libraries/source/nvtt/src/src/bc6h/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc6h/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/bc6h/CMakeLists.txt @@ -0,0 +1,22 @@ +PROJECT(bc6h) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(BC6H_SRCS + bits.h + shapes_two.h + tile.h + zoh_utils.cpp + zoh_utils.h + zoh.cpp + zoh.h + zohone.cpp + zohtwo.cpp) + +ADD_LIBRARY(bc6h STATIC ${BC6H_SRCS}) + +IF(NOT WIN32) + IF(CMAKE_COMPILER_IS_GNUCXX) + SET_TARGET_PROPERTIES(bc6h PROPERTIES COMPILE_FLAGS -fPIC) + ENDIF(CMAKE_COMPILER_IS_GNUCXX) +ENDIF(NOT WIN32) Index: ps/trunk/libraries/source/nvtt/src/src/bc6h/bits.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc6h/bits.h +++ ps/trunk/libraries/source/nvtt/src/src/bc6h/bits.h @@ -0,0 +1,76 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ +#pragma once +#ifndef _ZOH_BITS_H +#define _ZOH_BITS_H + +// read/write a bitstream + +#include "nvcore/Debug.h" + +namespace ZOH { + +class Bits +{ +public: + + Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} + Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} + + void write(int value, int nbits) { + nvAssert (nbits >= 0 && nbits < 32); + nvAssert (sizeof(int)>= 4); + for (int i=0; i>i); + } + int read(int nbits) { + nvAssert (nbits >= 0 && nbits < 32); + nvAssert (sizeof(int)>= 4); + int out = 0; + for (int i=0; i= 0 && ptr < maxbits); bptr = ptr; } + int getsize() { return bend; } + +private: + int bptr; // next bit to read + int bend; // last written bit + 1 + char *bits; // ptr to user bit stream + const char *cbits; // ptr to const user bit stream + int maxbits; // max size of user bit stream + char readonly; // 1 if this is a read-only stream + + int readone() { + nvAssert (bptr < bend); + if (bptr >= bend) return 0; + int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7)); + ++bptr; + return bit != 0; + } + void writeone(int bit) { + nvAssert (!readonly); // "Writing a read-only bit stream" + nvAssert (bptr < maxbits); + if (bptr >= maxbits) return; + if (bit&1) + bits[bptr>>3] |= 1 << (bptr & 7); + else + bits[bptr>>3] &= ~(1 << (bptr & 7)); + if (bptr++ >= bend) bend = bptr; + } +}; + +} + +#endif Index: ps/trunk/libraries/source/nvtt/src/src/bc6h/shapes_two.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc6h/shapes_two.h +++ ps/trunk/libraries/source/nvtt/src/src/bc6h/shapes_two.h @@ -0,0 +1,133 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ +#pragma once +#ifndef _ZOH_SHAPES_TWO_H +#define _ZOH_SHAPES_TWO_H + +// shapes for two regions + +#define NREGIONS 2 +#define NSHAPES 64 +#define SHAPEBITS 6 + +static const int shapes[NSHAPES*16] = +{ +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, +0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, +1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, +1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, +1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, + +0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, +0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, +0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, + +0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, +0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, + +0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, +0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, +1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, +1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, + +0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, +0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, +0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, +0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, + +0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, +1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, +0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, +1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, + +0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, +1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, +1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + +0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, +1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, +0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, + +0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, +1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, + +0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, +1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, +1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, +0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, + +0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, +1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, +1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, +1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, +0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, + +}; + +#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16] + +static const int shapeindex_to_compressed_indices[NSHAPES*2] = +{ + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + + 0,15, 0, 2, 0, 8, 0, 2, + 0, 2, 0, 8, 0, 8, 0,15, + 0, 2, 0, 8, 0, 2, 0, 2, + 0, 8, 0, 8, 0, 2, 0, 2, + + 0,15, 0,15, 0, 6, 0, 8, + 0, 2, 0, 8, 0,15, 0,15, + 0, 2, 0, 8, 0, 2, 0, 2, + 0, 2, 0,15, 0,15, 0, 6, + + 0, 6, 0, 2, 0, 6, 0, 8, + 0,15, 0,15, 0, 2, 0, 2, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0, 2, 0, 2, 0,15 + +}; +#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*2+(region)] + +#endif Index: ps/trunk/libraries/source/nvtt/src/src/bc6h/tile.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc6h/tile.h +++ ps/trunk/libraries/source/nvtt/src/src/bc6h/tile.h @@ -0,0 +1,83 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ +#pragma once +#ifndef _ZOH_TILE_H +#define _ZOH_TILE_H + +#include "zoh_utils.h" +#include "nvmath/Vector.h" +#include + +namespace ZOH { + +//#define USE_IMPORTANCE_MAP 1 // define this if you want to increase importance of some pixels in tile +class Tile +{ +public: + // NOTE: this returns the appropriately-clamped BIT PATTERN of the half as an INTEGRAL float value + static float half2float(uint16 h) + { + return (float) Utils::ushort_to_format(h); + } + // NOTE: this is the inverse of the above operation + static uint16 float2half(float f) + { + return Utils::format_to_ushort((int)f); + } + + // look for adjacent pixels that are identical. if there are enough of them, increase their importance + void generate_importance_map() + { + // initialize + for (int y=0; y= size_x || yn < 0 || yn >= size_y) + return false; + return( (data[y][x].x == data[yn][xn].x) && + (data[y][x].y == data[yn][xn].y) && + (data[y][x].z == data[yn][xn].z) ); + } + +#ifdef USE_IMPORTANCE_MAP + bool match_4_neighbor(int x, int y) + { + return is_equal(x,y,x-1,y) || is_equal(x,y,x+1,y) || is_equal(x,y,x,y-1) || is_equal(x,y,x,y+1); + } +#else + bool match_4_neighbor(int x, int y) + { + return false; + } +#endif + + Tile() {}; + ~Tile(){}; + Tile(int xs, int ys) {size_x = xs; size_y = ys;} + + static const int TILE_H = 4; + static const int TILE_W = 4; + static const int TILE_TOTAL = TILE_H * TILE_W; + nv::Vector3 data[TILE_H][TILE_W]; + float importance_map[TILE_H][TILE_W]; + int size_x, size_y; // actual size of tile +}; + +} + +#endif // _ZOH_TILE_H Index: ps/trunk/libraries/source/nvtt/src/src/bc6h/zoh.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc6h/zoh.h +++ ps/trunk/libraries/source/nvtt/src/src/bc6h/zoh.h @@ -0,0 +1,65 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ +#pragma once +#ifndef _ZOH_H +#define _ZOH_H + +#include "tile.h" + +namespace ZOH { + +// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f + +static const int NREGIONS_TWO = 2; +static const int NREGIONS_ONE = 1; +static const int NCHANNELS = 3; + +struct FltEndpts +{ + nv::Vector3 A; + nv::Vector3 B; +}; + +struct IntEndpts +{ + int A[NCHANNELS]; + int B[NCHANNELS]; +}; + +struct ComprEndpts +{ + uint A[NCHANNELS]; + uint B[NCHANNELS]; +}; + +static const int BLOCKSIZE=16; +static const int BITSIZE=128; + +void compress(const Tile &t, char *block); +void decompress(const char *block, Tile &t); + +float compressone(const Tile &t, char *block); +float compresstwo(const Tile &t, char *block); +void decompressone(const char *block, Tile &t); +void decompresstwo(const char *block, Tile &t); + +float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block); +float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]); + +float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block); +float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]); + +bool isone(const char *block); + +} + +#endif // _ZOH_H Index: ps/trunk/libraries/source/nvtt/src/src/bc6h/zoh.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc6h/zoh.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc6h/zoh.cpp @@ -0,0 +1,197 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// the zoh compressor and decompressor + +#include "tile.h" +#include "zoh.h" + +#include // memcpy + +using namespace ZOH; + + +bool ZOH::isone(const char *block) +{ + char code = block[0] & 0x1F; + + return (code == 0x03 || code == 0x07 || code == 0x0b || code == 0x0f); +} + +void ZOH::compress(const Tile &t, char *block) +{ + char oneblock[ZOH::BLOCKSIZE], twoblock[ZOH::BLOCKSIZE]; + + float mseone = ZOH::compressone(t, oneblock); + float msetwo = ZOH::compresstwo(t, twoblock); + + if (mseone <= msetwo) + memcpy(block, oneblock, ZOH::BLOCKSIZE); + else + memcpy(block, twoblock, ZOH::BLOCKSIZE); +} + +void ZOH::decompress(const char *block, Tile &t) +{ + if (ZOH::isone(block)) + ZOH::decompressone(block, t); + else + ZOH::decompresstwo(block, t); +} + +/* +void ZOH::compress(string inf, string zohf) +{ + Array2D pixels; + int w, h; + char block[ZOH::BLOCKSIZE]; + + Exr::readRgba(inf, pixels, w, h); + FILE *zohfile = fopen(zohf.c_str(), "wb"); + if (zohfile == NULL) throw "Unable to open .zoh file for write"; + + // stuff for progress bar O.o + int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W); + int tilecnt = 0; + int ndots = 25; + int dotcnt = 0; + printf("Progress ["); + for (int i=0; i (ntiles * dotcnt)/ndots) { printf("."); fflush(stdout); ++dotcnt; } + } + } + + printf("]\n"); // advance to next line finally + + if (fclose(zohfile)) throw "Close failed on .zoh file"; +} + +static int str2int(std::string s) +{ + int thing; + std::stringstream str (stringstream::in | stringstream::out); + str << s; + str >> thing; + return thing; +} + +// zoh file name is ...-w-h.zoh, extract width and height +static void extract(string zohf, int &w, int &h) +{ + size_t n = zohf.rfind('.', zohf.length()-1); + size_t n1 = zohf.rfind('-', n-1); + size_t n2 = zohf.rfind('-', n1-1); + string width = zohf.substr(n2+1, n1-n2-1); + w = str2int(width); + string height = zohf.substr(n1+1, n-n1-1); + h = str2int(height); +} + +static int mode_to_prec[] = { + 10,7,11,10, + 10,7,11,11, + 10,7,11,12, + 10,7,9,16, + 10,7,8,-1, + 10,7,8,-1, + 10,7,8,-1, + 10,7,6,-1, +}; + +static int shapeindexhist[32], modehist[32], prechistone[16], prechisttwo[16], oneregion, tworegions; + +static void stats(char block[ZOH::BLOCKSIZE]) +{ + char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++; + int prec = mode_to_prec[mode]; + nvAssert (prec != -1); + if (!ZOH::isone(block)) + { + tworegions++; + prechisttwo[prec]++; + int shapeindex = ((block[0] & 0xe0) >> 5) | ((block[1] & 0x3) << 3); + shapeindexhist[shapeindex]++; + } + else + { + oneregion++; + prechistone[prec]++; + } +} + +static void printstats() +{ + printf("\nPrecision histogram 10b to 16b one region: "); for (int i=10; i<=16; ++i) printf("%d,", prechistone[i]); + printf("\nPrecision histogram 6b to 11b two regions: "); for (int i=6; i<=11; ++i) printf("%d,", prechisttwo[i]); + printf("\nMode histogram: "); for (int i=0; i<32; ++i) printf("%d,", modehist[i]); + printf("\nShape index histogram: "); for (int i=0; i<32; ++i) printf("%d,", shapeindexhist[i]); + printf("\nOne region %5.2f%% Two regions %5.2f%%", 100.0*oneregion/float(oneregion+tworegions), 100.0*tworegions/float(oneregion+tworegions)); + printf("\n"); +} + +void ZOH::decompress(string zohf, string outf) +{ + Array2D pixels; + int w, h; + char block[ZOH::BLOCKSIZE]; + + extract(zohf, w, h); + FILE *zohfile = fopen(zohf.c_str(), "rb"); + if (zohfile == NULL) throw "Unable to open .zoh file for read"; + pixels.resizeErase(h, w); + + // convert to tiles and decompress each tile + for (int y=0; y + +using namespace nv; +using namespace ZOH; + +static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64 +static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64 + +/*static*/ Format Utils::FORMAT; + +int Utils::lerp(int a, int b, int i, int denom) +{ + nvDebugCheck (denom == 3 || denom == 7 || denom == 15); + nvDebugCheck (i >= 0 && i <= denom); + + int round = 32, shift = 6; + const int *weights; + + switch(denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 15: weights = denom15_weights_64; break; + case 7: weights = denom7_weights_64; break; + default: nvDebugCheck(0); + } + + return (a*weights[denom-i] +b*weights[i] + round) >> shift; +} + +Vector3 Utils::lerp(const Vector3& a, const Vector3 &b, int i, int denom) +{ + nvDebugCheck (denom == 3 || denom == 7 || denom == 15); + nvDebugCheck (i >= 0 && i <= denom); + + int shift = 6; + const int *weights; + + switch(denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 15: weights = denom15_weights_64; break; + case 7: weights = denom7_weights_64; break; + default: nvUnreachable(); + } + + // no need to round these as this is an exact division + return (a*float(weights[denom-i]) +b*float(weights[i])) / float(1 << shift); +} + + +/* + For unsigned f16, clamp the input to [0,F16MAX]. Thus u15. + For signed f16, clamp the input to [-F16MAX,F16MAX]. Thus s16. + + The conversions proceed as follows: + + unsigned f16: get bits. if high bit set, clamp to 0, else clamp to F16MAX. + signed f16: get bits. extract exp+mantissa and clamp to F16MAX. return -value if sign bit was set, else value + unsigned int: get bits. return as a positive value. + signed int. get bits. return as a value in -32768..32767. + + The inverse conversions are just the inverse of the above. +*/ + +// clamp the 3 channels of the input vector to the allowable range based on FORMAT +// note that each channel is a float storing the allowable range as a bit pattern converted to float +// that is, for unsigned f16 say, we would clamp each channel to the range [0, F16MAX] + +void Utils::clamp(Vector3 &v) +{ + for (int i=0; i<3; ++i) + { + switch(Utils::FORMAT) + { + case UNSIGNED_F16: + if (v.component[i] < 0.0) v.component[i] = 0; + else if (v.component[i] > F16MAX) v.component[i] = F16MAX; + break; + + case SIGNED_F16: + if (v.component[i] < -F16MAX) v.component[i] = -F16MAX; + else if (v.component[i] > F16MAX) v.component[i] = F16MAX; + break; + + default: + nvUnreachable(); + } + } +} + +// convert a u16 value to s17 (represented as an int) based on the format expected +int Utils::ushort_to_format(unsigned short input) +{ + int out, s; + + // clamp to the valid range we are expecting + switch (Utils::FORMAT) + { + case UNSIGNED_F16: + if (input & F16S_MASK) out = 0; + else if (input > F16MAX) out = F16MAX; + else out = input; + break; + + case SIGNED_F16: + s = input & F16S_MASK; + input &= F16EM_MASK; + if (input > F16MAX) out = F16MAX; + else out = input; + out = s ? -out : out; + break; + } + return out; +} + +// convert a s17 value to u16 based on the format expected +unsigned short Utils::format_to_ushort(int input) +{ + unsigned short out; + + // clamp to the valid range we are expecting + switch (Utils::FORMAT) + { + case UNSIGNED_F16: + nvDebugCheck (input >= 0 && input <= F16MAX); + out = input; + break; + + case SIGNED_F16: + nvDebugCheck (input >= -F16MAX && input <= F16MAX); + // convert to sign-magnitude + int s; + if (input < 0) { s = F16S_MASK; input = -input; } + else { s = 0; } + out = s | input; + break; + } + return out; +} + +// quantize the input range into equal-sized bins +int Utils::quantize(float value, int prec) +{ + int q, ivalue, s; + + nvDebugCheck (prec > 1); // didn't bother to make it work for 1 + + value = (float)floor(value + 0.5); + + int bias = (prec > 10) ? ((1<<(prec-1))-1) : 0; // bias precisions 11..16 to get a more accurate quantization + + switch (Utils::FORMAT) + { + case UNSIGNED_F16: + nvDebugCheck (value >= 0 && value <= F16MAX); + ivalue = (int)value; + q = ((ivalue << prec) + bias) / (F16MAX+1); + nvDebugCheck (q >= 0 && q < (1 << prec)); + break; + + case SIGNED_F16: + nvDebugCheck (value >= -F16MAX && value <= F16MAX); + // convert to sign-magnitude + ivalue = (int)value; + if (ivalue < 0) { s = 1; ivalue = -ivalue; } else s = 0; + + q = ((ivalue << (prec-1)) + bias) / (F16MAX+1); + if (s) + q = -q; + nvDebugCheck (q > -(1 << (prec-1)) && q < (1 << (prec-1))); + break; + } + + return q; +} + +int Utils::finish_unquantize(int q, int prec) +{ + if (Utils::FORMAT == UNSIGNED_F16) + return (q * 31) >> 6; // scale the magnitude by 31/64 + else if (Utils::FORMAT == SIGNED_F16) + return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32 + else + return q; +} + +// unquantize each bin to midpoint of original bin range, except +// for the end bins which we push to an endpoint of the bin range. +// we do this to ensure we can represent all possible original values. +// the asymmetric end bins do not affect PSNR for the test images. +// +// code this function assuming an arbitrary bit pattern as the encoded block +int Utils::unquantize(int q, int prec) +{ + int unq, s; + + nvDebugCheck (prec > 1); // not implemented for prec 1 + + switch (Utils::FORMAT) + { + // modify this case to move the multiplication by 31 after interpolation. + // Need to use finish_unquantize. + + // since we have 16 bits available, let's unquantize this to 16 bits unsigned + // thus the scale factor is [0-7c00)/[0-10000) = 31/64 + case UNSIGNED_F16: + if (prec >= 15) + unq = q; + else if (q == 0) + unq = 0; + else if (q == ((1<> prec; + break; + + // here, let's stick with S16 (no apparent quality benefit from going to S17) + // range is (-7c00..7c00)/(-8000..8000) = 31/32 + case SIGNED_F16: + // don't remove this test even though it appears equivalent to the code below + // as it isn't -- the code below can overflow for prec = 16 + if (prec >= 16) + unq = q; + else + { + if (q < 0) { s = 1; q = -q; } else s = 0; + + if (q == 0) + unq = 0; + else if (q >= ((1<<(prec-1))-1)) + unq = s ? -S16MAX : S16MAX; + else + { + unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1); + if (s) + unq = -unq; + } + } + break; + } + return unq; +} + + + +// pick a norm! +#define NORM_EUCLIDEAN 1 + +float Utils::norm(const Vector3 &a, const Vector3 &b) +{ +#ifdef NORM_EUCLIDEAN + return lengthSquared(a - b); +#endif +#ifdef NORM_ABS + Vector3 err = a - b; + return fabs(err.x) + fabs(err.y) + fabs(err.z); +#endif +} + +// parse [{:}]{,} +// the pointer starts here ^ +// name is 1 or 2 chars and matches field names. start and end are decimal numbers +void Utils::parse(const char *encoding, int &ptr, Field &field, int &endbit, int &len) +{ + if (ptr <= 0) return; + --ptr; + if (encoding[ptr] == ',') --ptr; + nvDebugCheck (encoding[ptr] == ']'); + --ptr; + endbit = 0; + int scale = 1; + while (encoding[ptr] != ':' && encoding[ptr] != '[') + { + nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9'); + endbit += (encoding[ptr--] - '0') * scale; + scale *= 10; + } + int startbit = 0; scale = 1; + if (encoding[ptr] == '[') + startbit = endbit; + else + { + ptr--; + while (encoding[ptr] != '[') + { + nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9'); + startbit += (encoding[ptr--] - '0') * scale; + scale *= 10; + } + } + len = startbit - endbit + 1; // startbit>=endbit note + --ptr; + if (encoding[ptr] == 'm') field = FIELD_M; + else if (encoding[ptr] == 'd') field = FIELD_D; + else { + // it's wxyz + nvDebugCheck (encoding[ptr] >= 'w' && encoding[ptr] <= 'z'); + int foo = encoding[ptr--] - 'w'; + // now it is r g or b + if (encoding[ptr] == 'r') foo += 10; + else if (encoding[ptr] == 'g') foo += 20; + else if (encoding[ptr] == 'b') foo += 30; + else nvDebugCheck(0); + field = (Field) foo; + } +} + + Index: ps/trunk/libraries/source/nvtt/src/src/bc6h/zohone.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc6h/zohone.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc6h/zohone.cpp @@ -0,0 +1,799 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// one region zoh compress/decompress code +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +#include "bits.h" +#include "tile.h" +#include "zoh.h" +#include "zoh_utils.h" + +#include "nvmath/Vector.inl" +#include "nvmath/Fitting.h" + +#include // strlen +#include // FLT_MAX + +using namespace nv; +using namespace ZOH; + +#define NINDICES 16 +#define INDEXBITS 4 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) + +#define NSHAPES 1 + +static const int shapes[NSHAPES] = +{ + 0x0000 +}; // only 1 shape + +#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NDELTA 2 + +struct Chanpat +{ + int prec[NDELTA]; // precision pattern for one channel +}; + +struct Pattern +{ + Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define MAXMODEBITS 5 +#define MAXMODES (1<> 2) & 3 and x = index & 3 +static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex) +{ + int index_positions[NREGIONS_ONE]; + + index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 + + for (int region = 0; region < NREGIONS_ONE; ++region) + { + int x = index_positions[region] & 3; + int y = (index_positions[region] >> 2) & 3; + nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table + if (indices[y][x] & HIGH_INDEXBIT) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=0; i> endbit, len); break; + case FIELD_RW: out.write(rw >> endbit, len); break; + case FIELD_RX: out.write(rx >> endbit, len); break; + case FIELD_GW: out.write(gw >> endbit, len); break; + case FIELD_GX: out.write(gx >> endbit, len); break; + case FIELD_BW: out.write(bw >> endbit, len); break; + case FIELD_BX: out.write(bx >> endbit, len); break; + + case FIELD_D: + case FIELD_RY: + case FIELD_RZ: + case FIELD_GY: + case FIELD_GZ: + case FIELD_BY: + case FIELD_BZ: + default: nvUnreachable(); + } + } +} + +static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p) +{ + // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode + int mode = in.read(2); + if (mode != 0x00 && mode != 0x01) + mode = (in.read(3) << 2) | mode; + + int pat_index = mode_to_pat[mode]; + + nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS); + nvDebugCheck (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + int d; + int rw, rx; + int gw, gx; + int bw, bx; + + d = 0; + rw = rx = 0; + gw = gx = 0; + bw = bx = 0; + + int ptr = int(strlen(p.encoding)); + + while (ptr) + { + Field field; + int endbit, len; + + // !!!UNDONE: get rid of string parsing!!! + Utils::parse(p.encoding, ptr, field, endbit, len); + + switch(field) + { + case FIELD_M: break; // already processed so ignore + case FIELD_RW: rw |= in.read(len) << endbit; break; + case FIELD_RX: rx |= in.read(len) << endbit; break; + case FIELD_GW: gw |= in.read(len) << endbit; break; + case FIELD_GX: gx |= in.read(len) << endbit; break; + case FIELD_BW: bw |= in.read(len) << endbit; break; + case FIELD_BX: bx |= in.read(len) << endbit; break; + + case FIELD_D: + case FIELD_RY: + case FIELD_RZ: + case FIELD_GY: + case FIELD_GZ: + case FIELD_BY: + case FIELD_BZ: + default: nvUnreachable(); + } + } + + nvDebugCheck (in.getptr() == 128 - 63); + + endpts[0].A[0] = rw; endpts[0].B[0] = rx; + endpts[0].A[1] = gw; endpts[0].B[1] = gx; + endpts[0].A[2] = bw; endpts[0].B[2] = bx; +} + +// compress index 0 +static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) +{ + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); + + out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0)); + } +} + +static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block) +{ + Bits out(block, ZOH::BITSIZE); + + write_header(endpts, p, out); + + write_indices(indices, shapeindex, out); + + nvDebugCheck(out.getptr() == ZOH::BITSIZE); +} + +static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES]) +{ + // scale endpoints + int a, b; // really need a IntVector3... + + a = Utils::unquantize(endpts.A[0], prec); + b = Utils::unquantize(endpts.B[0], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); + + a = Utils::unquantize(endpts.A[1], prec); + b = Utils::unquantize(endpts.B[1], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); + + a = Utils::unquantize(endpts.A[2], prec); + b = Utils::unquantize(endpts.B[2], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); +} + +// position 0 was compressed +static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) +{ + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); + + indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0)); + } +} + +void ZOH::decompressone(const char *block, Tile &t) +{ + Bits in(block, ZOH::BITSIZE); + + Pattern p; + IntEndpts endpts[NREGIONS_ONE]; + ComprEndpts compr_endpts[NREGIONS_ONE]; + + read_header(in, compr_endpts, p); + int shapeindex = 0; // only one shape + + decompress_endpts(compr_endpts, endpts, p); + + Vector3 palette[NREGIONS_ONE][NINDICES]; + for (int r = 0; r < NREGIONS_ONE; ++r) + generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]); + + // read indices + int indices[Tile::TILE_H][Tile::TILE_W]; + + read_indices(in, shapeindex, indices); + + nvDebugCheck(in.getptr() == ZOH::BITSIZE); + + // lookup + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; +} + +// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr +static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec) +{ + Vector3 palette[NINDICES]; + float toterr = 0; + Vector3 err; + + generate_palette_quantized(endpts, prec, palette); + + for (int i = 0; i < np; ++i) + { + float err, besterr; + + besterr = Utils::norm(colors[i], palette[0]) * importance[i]; + + for (int j = 1; j < NINDICES && besterr > 0; ++j) + { + err = Utils::norm(colors[i], palette[j]) * importance[i]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec, + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_ONE]) +{ + // build list of possibles + Vector3 palette[NREGIONS_ONE][NINDICES]; + + for (int region = 0; region < NREGIONS_ONE; ++region) + { + generate_palette_quantized(endpts[region], prec, &palette[region][0]); + toterr[region] = 0; + } + + Vector3 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr; + + besterr = Utils::norm(tile.data[y][x], palette[region][0]); + indices[y][x] = 0; + + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts, + float old_err, int do_b) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndpts temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + + // copy real endpoints so we can perturb them + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, prec); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + } + } + // if this was an improvement, move the endpoint and continue search from there + if (improved) + { + if (do_b == 0) + new_endpts.A[ch] += beststep; + else + new_endpts.B[ch] += beststep; + } + } + return min_err; +} + +static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts) +{ + float opt_err = orig_err; + for (int ch = 0; ch < NCHANNELS; ++ch) + { + opt_endpts.A[ch] = orig_endpts.A[ch]; + opt_endpts.B[ch] = orig_endpts.B[ch]; + } + /* + err0 = perturb(rgb0, delta0) + err1 = perturb(rgb1, delta1) + if (err0 < err1) + if (err0 >= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndpts new_a, new_b; + IntEndpts new_endpt; + int do_b; + + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + opt_endpts.A[ch] = new_a.A[ch]; + opt_err = err0; + do_b = 1; // do B next + } + else + { + if (err1 >= opt_err) + continue; + opt_endpts.B[ch] = new_b.B[ch]; + opt_err = err1; + do_b = 0; // do A next + } + + // now alternate endpoints and keep trying until there is no improvement + for (;;) + { + float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b); + if (err >= opt_err) + break; + if (do_b == 0) + opt_endpts.A[ch] = new_endpt.A[ch]; + else + opt_endpts.B[ch] = new_endpt.B[ch]; + opt_err = err; + do_b = 1 - do_b; // now move the other endpoint + } + } +} + +static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_ONE], + const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE]) +{ + Vector3 pixels[Tile::TILE_TOTAL]; + float importance[Tile::TILE_TOTAL]; + float err = 0; + + for (int region=0; region 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +float ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + Utils::clamp(endpts[region].A); + Utils::clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +float ZOH::compressone(const Tile &t, char *block) +{ + int shapeindex_best = 0; + FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE]; + float msebest = FLT_MAX; + + /* + collect the mse values that are within 5% of the best values + optimize each one and choose the best + */ + // hack for now -- just use the best value WORK + for (int i=0; i0.0; ++i) + { + float mse = roughone(t, i, tempendpts); + if (mse < msebest) + { + msebest = mse; + shapeindex_best = i; + memcpy(endptsbest, tempendpts, sizeof(endptsbest)); + } + + } + return refineone(t, shapeindex_best, endptsbest, block); +} Index: ps/trunk/libraries/source/nvtt/src/src/bc6h/zohtwo.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc6h/zohtwo.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc6h/zohtwo.cpp @@ -0,0 +1,883 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// two regions zoh compress/decompress code +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +/* optimization algorithm + + get initial float endpoints + convert endpoints using 16 bit precision, transform, and get bit delta. choose likely endpoint compression candidates. + note that there will be 1 or 2 candidates; 2 will be chosen when the delta values are close to the max possible. + for each EC candidate in order from max precision to smaller precision + convert endpoints using the appropriate precision. + optimize the endpoints and minimize square error. save the error and index assignments. apply index compression as well. + (thus the endpoints and indices are in final form.) + transform and get bit delta. + if the bit delta fits, exit + if we ended up with no candidates somehow, choose the tail set of EC candidates and retry. this should happen hardly ever. + add a state variable to nvDebugCheck we only do this once. + convert to bit stream. + return the error. + + Global optimization + order all tiles based on their errors + do something special for high-error tiles + the goal here is to try to avoid tiling artifacts. but I think this is a research problem. let's just generate an error image... + + display an image that shows partitioning and precision selected for each tile +*/ + +#include "bits.h" +#include "tile.h" +#include "zoh.h" +#include "zoh_utils.h" + +#include "nvmath/Fitting.h" +#include "nvmath/Vector.inl" + +#include // strlen +#include // FLT_MAX + +using namespace nv; +using namespace ZOH; + +#define NINDICES 8 +#define INDEXBITS 3 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#include "shapes_two.h" +// use only the first 32 available shapes +#undef NSHAPES +#undef SHAPEBITS +#define NSHAPES 32 +#define SHAPEBITS 5 + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NDELTA 4 + +struct Chanpat +{ + int prec[NDELTA]; // precision pattern for one channel +}; + +struct Pattern +{ + Chanpat chan[NCHANNELS]; // allow different bit patterns per channel -- but we still want constant precision per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define MAXMODEBITS 5 +#define MAXMODES (1<> endbit, len); break; + case FIELD_D: out.write( d >> endbit, len); break; + case FIELD_RW: out.write(rw >> endbit, len); break; + case FIELD_RX: out.write(rx >> endbit, len); break; + case FIELD_RY: out.write(ry >> endbit, len); break; + case FIELD_RZ: out.write(rz >> endbit, len); break; + case FIELD_GW: out.write(gw >> endbit, len); break; + case FIELD_GX: out.write(gx >> endbit, len); break; + case FIELD_GY: out.write(gy >> endbit, len); break; + case FIELD_GZ: out.write(gz >> endbit, len); break; + case FIELD_BW: out.write(bw >> endbit, len); break; + case FIELD_BX: out.write(bx >> endbit, len); break; + case FIELD_BY: out.write(by >> endbit, len); break; + case FIELD_BZ: out.write(bz >> endbit, len); break; + default: nvUnreachable(); + } + } +} + +static bool read_header(Bits &in, ComprEndpts endpts[NREGIONS_TWO], int &shapeindex, Pattern &p) +{ + // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode + int mode = in.read(2); + if (mode != 0x00 && mode != 0x01) + mode = (in.read(3) << 2) | mode; + + int pat_index = mode_to_pat[mode]; + + if (pat_index == -2) + return false; // reserved mode found + + nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS); + nvDebugCheck (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + int d; + int rw, rx, ry, rz; + int gw, gx, gy, gz; + int bw, bx, by, bz; + + d = 0; + rw = rx = ry = rz = 0; + gw = gx = gy = gz = 0; + bw = bx = by = bz = 0; + + int ptr = int(strlen(p.encoding)); + + while (ptr) + { + Field field; + int endbit, len; + + // !!!UNDONE: get rid of string parsing!!! + Utils::parse(p.encoding, ptr, field, endbit, len); + + switch(field) + { + case FIELD_M: break; // already processed so ignore + case FIELD_D: d |= in.read(len) << endbit; break; + case FIELD_RW: rw |= in.read(len) << endbit; break; + case FIELD_RX: rx |= in.read(len) << endbit; break; + case FIELD_RY: ry |= in.read(len) << endbit; break; + case FIELD_RZ: rz |= in.read(len) << endbit; break; + case FIELD_GW: gw |= in.read(len) << endbit; break; + case FIELD_GX: gx |= in.read(len) << endbit; break; + case FIELD_GY: gy |= in.read(len) << endbit; break; + case FIELD_GZ: gz |= in.read(len) << endbit; break; + case FIELD_BW: bw |= in.read(len) << endbit; break; + case FIELD_BX: bx |= in.read(len) << endbit; break; + case FIELD_BY: by |= in.read(len) << endbit; break; + case FIELD_BZ: bz |= in.read(len) << endbit; break; + default: nvUnreachable(); + } + } + + nvDebugCheck (in.getptr() == 128 - 46); + + shapeindex = d; + endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz; + endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz; + endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz; + + return true; +} + +static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) +{ + int positions[NREGIONS_TWO]; + + for (int r = 0; r < NREGIONS_TWO; ++r) + positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r); + + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); + + bool match = false; + + for (int r = 0; r < NREGIONS_TWO; ++r) + if (positions[r] == pos) { match = true; break; } + + out.write(indices[y][x], INDEXBITS - (match ? 1 : 0)); + } +} + +static void emit_block(const ComprEndpts compr_endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block) +{ + Bits out(block, ZOH::BITSIZE); + + write_header(compr_endpts, shapeindex, p, out); + + write_indices(indices, shapeindex, out); + + nvDebugCheck(out.getptr() == ZOH::BITSIZE); +} + +static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES]) +{ + // scale endpoints + int a, b; // really need a IntVector3... + + a = Utils::unquantize(endpts.A[0], prec); + b = Utils::unquantize(endpts.B[0], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); + + a = Utils::unquantize(endpts.A[1], prec); + b = Utils::unquantize(endpts.B[1], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); + + a = Utils::unquantize(endpts.A[2], prec); + b = Utils::unquantize(endpts.B[2], prec); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); +} + +static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) +{ + int positions[NREGIONS_TWO]; + + for (int r = 0; r < NREGIONS_TWO; ++r) + positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r); + + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); + + bool match = false; + + for (int r = 0; r < NREGIONS_TWO; ++r) + if (positions[r] == pos) { match = true; break; } + + indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0)); + } +} + +void ZOH::decompresstwo(const char *block, Tile &t) +{ + Bits in(block, ZOH::BITSIZE); + + Pattern p; + IntEndpts endpts[NREGIONS_TWO]; + ComprEndpts compr_endpts[NREGIONS_TWO]; + int shapeindex; + + if (!read_header(in, compr_endpts, shapeindex, p)) + { + // reserved mode, return all zeroes + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + t.data[y][x] = Vector3(0.0f); + + return; + } + + decompress_endpts(compr_endpts, endpts, p); + + Vector3 palette[NREGIONS_TWO][NINDICES]; + for (int r = 0; r < NREGIONS_TWO; ++r) + generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]); + + int indices[Tile::TILE_H][Tile::TILE_W]; + + read_indices(in, shapeindex, indices); + + nvDebugCheck(in.getptr() == ZOH::BITSIZE); + + // lookup + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; +} + +// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr +static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec) +{ + Vector3 palette[NINDICES]; + float toterr = 0; + Vector3 err; + + generate_palette_quantized(endpts, prec, palette); + + for (int i = 0; i < np; ++i) + { + float err, besterr; + + besterr = Utils::norm(colors[i], palette[0]) * importance[i]; + + for (int j = 1; j < NINDICES && besterr > 0; ++j) + { + err = Utils::norm(colors[i], palette[j]) * importance[i]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_TWO], int prec, + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_TWO]) +{ + // build list of possibles + Vector3 palette[NREGIONS_TWO][NINDICES]; + + for (int region = 0; region < NREGIONS_TWO; ++region) + { + generate_palette_quantized(endpts[region], prec, &palette[region][0]); + toterr[region] = 0; + } + + Vector3 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr; + + besterr = Utils::norm(tile.data[y][x], palette[region][0]); + indices[y][x] = 0; + + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts, + float old_err, int do_b) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndpts temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + + // copy real endpoints so we can perturb them + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, prec); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + } + } + // if this was an improvement, move the endpoint and continue search from there + if (improved) + { + if (do_b == 0) + new_endpts.A[ch] += beststep; + else + new_endpts.B[ch] += beststep; + } + } + return min_err; +} + +static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts) +{ + float opt_err = orig_err; + for (int ch = 0; ch < NCHANNELS; ++ch) + { + opt_endpts.A[ch] = orig_endpts.A[ch]; + opt_endpts.B[ch] = orig_endpts.B[ch]; + } + /* + err0 = perturb(rgb0, delta0) + err1 = perturb(rgb1, delta1) + if (err0 < err1) + if (err0 >= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndpts new_a, new_b; + IntEndpts new_endpt; + int do_b; + + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + opt_endpts.A[ch] = new_a.A[ch]; + opt_err = err0; + do_b = 1; // do B next + } + else + { + if (err1 >= opt_err) + continue; + opt_endpts.B[ch] = new_b.B[ch]; + opt_err = err1; + do_b = 0; // do A next + } + + // now alternate endpoints and keep trying until there is no improvement + for (;;) + { + float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b); + if (err >= opt_err) + break; + if (do_b == 0) + opt_endpts.A[ch] = new_endpt.A[ch]; + else + opt_endpts.B[ch] = new_endpt.B[ch]; + opt_err = err; + do_b = 1 - do_b; // now move the other endpoint + } + } +} + +static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_TWO], + const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO]) +{ + Vector3 pixels[Tile::TILE_TOTAL]; + float importance[Tile::TILE_TOTAL]; + float err = 0; + + for (int region=0; region 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +float ZOH::roughtwo(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_TWO]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + Utils::clamp(endpts[region].A); + Utils::clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +float ZOH::compresstwo(const Tile &t, char *block) +{ + int shapeindex_best = 0; + FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO]; + float msebest = FLT_MAX; + + /* + collect the mse values that are within 5% of the best values + optimize each one and choose the best + */ + // hack for now -- just use the best value WORK + for (int i=0; i0.0; ++i) + { + float mse = roughtwo(t, i, tempendpts); + if (mse < msebest) + { + msebest = mse; + shapeindex_best = i; + memcpy(endptsbest, tempendpts, sizeof(endptsbest)); + } + + } + return refinetwo(t, shapeindex_best, endptsbest, block); +} + Index: ps/trunk/libraries/source/nvtt/src/src/bc7/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/bc7/CMakeLists.txt @@ -0,0 +1,30 @@ +PROJECT(bc7) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +SET(BC7_SRCS + avpcl.cpp + avpcl.h + avpcl_mode0.cpp + avpcl_mode1.cpp + avpcl_mode2.cpp + avpcl_mode3.cpp + avpcl_mode4.cpp + avpcl_mode5.cpp + avpcl_mode6.cpp + avpcl_mode7.cpp + bits.h + endpts.h + shapes_three.h + shapes_two.h + tile.h + avpcl_utils.cpp + avpcl_utils.h) + +ADD_LIBRARY(bc7 STATIC ${BC7_SRCS}) + +IF(NOT WIN32) + IF(CMAKE_COMPILER_IS_GNUCXX) + SET_TARGET_PROPERTIES(bc7 PROPERTIES COMPILE_FLAGS -fPIC) + ENDIF(CMAKE_COMPILER_IS_GNUCXX) +ENDIF(NOT WIN32) Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl.h +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl.h @@ -0,0 +1,99 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _AVPCL_H +#define _AVPCL_H + +#include "tile.h" +#include "bits.h" + +#define DISABLE_EXHAUSTIVE 1 // define this if you don't want to spend a lot of time on exhaustive compression +#define USE_ZOH_INTERP 1 // use zoh interpolator, otherwise use exact avpcl interpolators +#define USE_ZOH_INTERP_ROUNDED 1 // use the rounded versions! + +namespace AVPCL { + +static const int NREGIONS_TWO = 2; +static const int NREGIONS_THREE = 3; + +static const int BLOCKSIZE=16; +static const int BITSIZE=128; + +// global flags +extern bool flag_premult; +extern bool flag_nonuniform; +extern bool flag_nonuniform_ati; + +// global mode +extern bool mode_rgb; // true if image had constant alpha = 255 + +void compress(const Tile &t, char *block); +void decompress(const char *block, Tile &t); + +float compress_mode0(const Tile &t, char *block); +void decompress_mode0(const char *block, Tile &t); + +float compress_mode1(const Tile &t, char *block); +void decompress_mode1(const char *block, Tile &t); + +float compress_mode2(const Tile &t, char *block); +void decompress_mode2(const char *block, Tile &t); + +float compress_mode3(const Tile &t, char *block); +void decompress_mode3(const char *block, Tile &t); + +float compress_mode4(const Tile &t, char *block); +void decompress_mode4(const char *block, Tile &t); + +float compress_mode5(const Tile &t, char *block); +void decompress_mode5(const char *block, Tile &t); + +float compress_mode6(const Tile &t, char *block); +void decompress_mode6(const char *block, Tile &t); + +float compress_mode7(const Tile &t, char *block); +void decompress_mode7(const char *block, Tile &t); + +inline int getmode(Bits &in) +{ + int mode = 0; + + if (in.read(1)) mode = 0; + else if (in.read(1)) mode = 1; + else if (in.read(1)) mode = 2; + else if (in.read(1)) mode = 3; + else if (in.read(1)) mode = 4; + else if (in.read(1)) mode = 5; + else if (in.read(1)) mode = 6; + else if (in.read(1)) mode = 7; + else mode = 8; // reserved + return mode; +} +inline int getmode(const char *block) +{ + int bits = block[0], mode = 0; + + if (bits & 1) mode = 0; + else if ((bits&3) == 2) mode = 1; + else if ((bits&7) == 4) mode = 2; + else if ((bits & 0xF) == 8) mode = 3; + else if ((bits & 0x1F) == 16) mode = 4; + else if ((bits & 0x3F) == 32) mode = 5; + else if ((bits & 0x7F) == 64) mode = 6; + else if ((bits & 0xFF) == 128) mode = 7; + else mode = 8; // reserved + return mode; +} + +} + +#endif Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl.cpp @@ -0,0 +1,264 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// the avpcl compressor and decompressor + +#include "tile.h" +#include "avpcl.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include +#include + +using namespace nv; +using namespace AVPCL; + +// global flags +bool AVPCL::flag_premult = false; +bool AVPCL::flag_nonuniform = false; +bool AVPCL::flag_nonuniform_ati = false; + +// global mode +bool AVPCL::mode_rgb = false; // true if image had constant alpha = 255 + +void AVPCL::compress(const Tile &t, char *block) +{ + char tempblock[AVPCL::BLOCKSIZE]; + float msebest = FLT_MAX; + + float mse_mode0 = AVPCL::compress_mode0(t, tempblock); if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode1 = AVPCL::compress_mode1(t, tempblock); if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode2 = AVPCL::compress_mode2(t, tempblock); if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode3 = AVPCL::compress_mode3(t, tempblock); if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode4 = AVPCL::compress_mode4(t, tempblock); if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode5 = AVPCL::compress_mode5(t, tempblock); if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode6 = AVPCL::compress_mode6(t, tempblock); if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + float mse_mode7 = AVPCL::compress_mode7(t, tempblock); if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } + + /*if (errfile) + { + float errs[21]; + int nerrs = 8; + errs[0] = mse_mode0; + errs[1] = mse_mode1; + errs[2] = mse_mode2; + errs[3] = mse_mode3; + errs[4] = mse_mode4; + errs[5] = mse_mode5; + errs[6] = mse_mode6; + errs[7] = mse_mode7; + if (fwrite(errs, sizeof(float), nerrs, errfile) != nerrs) + throw "Write error on error file"; + }*/ +} + +/* +static int getbit(char *b, int start) +{ + if (start < 0 || start >= 128) return 0; // out of range + + int ix = start >> 3; + return (b[ix] & (1 << (start & 7))) != 0; +} + +static int getbits(char *b, int start, int len) +{ + int out = 0; + for (int i=0; i= 128) return; // out of range + + int ix = start >> 3; + + if (bit & 1) + b[ix] |= (1 << (start & 7)); + else + b[ix] &= ~(1 << (start & 7)); +} + +static void setbits(char *b, int start, int len, int bits) +{ + for (int i=0; i> i); +} +*/ + +void AVPCL::decompress(const char *cblock, Tile &t) +{ + char block[AVPCL::BLOCKSIZE]; + memcpy(block, cblock, AVPCL::BLOCKSIZE); + + switch(getmode(block)) + { + case 0: AVPCL::decompress_mode0(block, t); break; + case 1: AVPCL::decompress_mode1(block, t); break; + case 2: AVPCL::decompress_mode2(block, t); break; + case 3: AVPCL::decompress_mode3(block, t); break; + case 4: AVPCL::decompress_mode4(block, t); break; + case 5: AVPCL::decompress_mode5(block, t); break; + case 6: AVPCL::decompress_mode6(block, t); break; + case 7: AVPCL::decompress_mode7(block, t); break; + case 8: // return a black tile if you get a reserved mode + for (int y=0; y pixels; + int w, h; + char block[AVPCL::BLOCKSIZE]; + + Targa::read(inf, pixels, w, h); + FILE *avpclfile = fopen(avpclf.c_str(), "wb"); + if (avpclfile == NULL) throw "Unable to open .avpcl file for write"; + FILE *errfile = NULL; + if (errf != "") + { + errfile = fopen(errf.c_str(), "wb"); + if (errfile == NULL) throw "Unable to open error file for write"; + } + + // Look at alpha channel and override the premult flag if alpha is constant (but only if premult is set) + if (AVPCL::flag_premult) + { + if (AVPCL::mode_rgb) + { + AVPCL::flag_premult = false; + cout << endl << "NOTE: Source image alpha is constant 255, turning off premultiplied-alpha error metric." << endl << endl; + } + } + + // stuff for progress bar O.o + int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W); + int tilecnt = 0; + clock_t start, prev, cur; + + start = prev = clock(); + + // convert to tiles and compress each tile + for (int y=0; y> thing; + return thing; +} + +// avpcl file name is ...-w-h-RGB[A].avpcl, extract width and height +static void extract(string avpclf, int &w, int &h, bool &mode_rgb) +{ + size_t n = avpclf.rfind('.', avpclf.length()-1); + size_t n1 = avpclf.rfind('-', n-1); + size_t n2 = avpclf.rfind('-', n1-1); + size_t n3 = avpclf.rfind('-', n2-1); + // ...-wwww-hhhh-RGB[A].avpcl + // ^ ^ ^ ^ + // n3 n2 n1 n n3 pixels; + int w, h; + char block[AVPCL::BLOCKSIZE]; + + extract(avpclf, w, h, AVPCL::mode_rgb); + FILE *avpclfile = fopen(avpclf.c_str(), "rb"); + if (avpclfile == NULL) throw "Unable to open .avpcl file for read"; + pixels.resizeErase(h, w); + + // convert to tiles and decompress each tile + for (int y=0; y +#include + +#include "shapes_three.h" + +// use only the first 16 available shapes +#undef NSHAPES +#undef SHAPEBITS +#define NSHAPES 16 +#define SHAPEBITS 4 + +using namespace nv; +using namespace AVPCL; + +#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values + +#define NINDICES 8 +#define INDEXBITS 3 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue xfm mode mb + 4,4,4,4,4,4, 4,4,4,4,4,4, 4,4,4,4,4,4, 0, 0x1, 1, "", // really 444.1 x 6 +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGB]; + int endpt_b_prec[NCHANNELS_RGB]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + nvAssert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +static void transform_forward(IntEndptsRGB_2 ep[NREGIONS]) +{ + nvUnreachable(); +} + +static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS]) +{ + nvUnreachable(); +} + +// endpoints are 555,555; reduce to 444,444 and put the lsb bit majority in compr_bits +static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + nvAssert (compr_endpts.A[j] < 16); + } + compr_endpts.a_lsb = onescnt >= 2; + + onescnt = 0; + for (int j=0; j> 1; + nvAssert (compr_endpts.B[j] < 16); + } + compr_endpts.b_lsb = onescnt >= 2; +} + +static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts) +{ + for (int j=0; j= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + float err = Utils::metric4(colors[i], palette[j]) * importance[i]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return FLT_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGB_2 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGB_2 temp_endpts; + float best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = max(0, opt_endpts.A[ch] - adelta); + int ahigh = min((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGB_2 new_a, new_b; + IntEndptsRGB_2 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGB; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i> 1) & 1; + + // make sure we have a valid error for temp_in + // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position) + float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices); + + // now try to optimize these endpoints + float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + + // if we find an improvement, update the best so far and correct the output endpoints and errors + if (temp_out_err < best_err) + { + best_err = temp_out_err; + opt_err[region] = temp_out_err; + opt_endpts[region] = temp_out; + } + } + } +} + +/* optimization algorithm + for each pattern + convert endpoints using pattern precision + assign indices and get initial error + compress indices (and possibly reorder endpoints) + transform endpoints + if transformed endpoints fit pattern + get original endpoints back + optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better + compress new indices + transform new endpoints + if new endpoints fit pattern AND if error is improved + emit compressed block with new data + else + emit compressed block with original data // to try to preserve maximum endpoint precision +*/ + +static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +{ + float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; + int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; + + for (int sp = 0; sp < NPATTERNS; ++sp) + { + quantize_endpts(endpts, pattern_precs[sp], orig_endpts); + assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err); + swap_indices(orig_endpts, orig_indices, shapeindex_best); + if (patterns[sp].transformed) + transform_forward(orig_endpts); + // apply a heuristic here -- we check if the endpoints fit before we try to optimize them. + // the assumption made is that if they don't fit now, they won't fit after optimizing. + if (endpts_fit(orig_endpts, patterns[sp])) + { + if (patterns[sp].transformed) + transform_inverse(orig_endpts); + optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); + assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); + // (nreed) Commented out asserts because they go off all the time...not sure why + //for (int i=0; i 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + v.w = 255.0f; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + float toterr = 0; + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +// for this mode, we assume alpha = 255 constant and compress only the RGB portion. +// however, we do the error check against the actual alpha values supplied for the tile. +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*Vector4(direction, 0); + endpts[region].B = mean + maxp*Vector4(direction, 0); + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(float *list1, int *list2, int i, int j) +{ + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +float AVPCL::compress_mode0(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + float roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + float msebest = FLT_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode1.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode1.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode1.cpp @@ -0,0 +1,1047 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x10 (666x2).1 (666x2).1 64p 3bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" +#include "avpcl_utils.h" +#include "endpts.h" +#include +#include + +#include "shapes_two.h" + +using namespace nv; +using namespace AVPCL; + +#define NLSBMODES 2 // number of different lsb modes per region. since we have one .1 per region, that can have 2 values + +#define NINDICES 8 +#define INDEXBITS 3 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue xfm mode mb + 6,6,6,6, 6,6,6,6, 6,6,6,6, 0, 0x2, 2, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGB]; + int endpt_b_prec[NCHANNELS_RGB]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 6,6,6, 6,6,6, 6,6,6, 6,6,6, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + nvAssert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + + +static void transform_forward(IntEndptsRGB_1 ep[NREGIONS]) +{ + nvUnreachable(); +} + +static void transform_inverse(IntEndptsRGB_1 ep[NREGIONS]) +{ + nvUnreachable(); +} + +// endpoints are 777,777; reduce to 666,666 and put the lsb bit majority in compr_bits +static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_1& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + onescnt += endpts.B[j] & 1; + compr_endpts.B[j] = endpts.B[j] >> 1; + nvAssert (compr_endpts.A[j] < 64); + nvAssert (compr_endpts.B[j] < 64); + } + compr_endpts.lsb = onescnt >= 3; +} + +static void uncompress_one(const IntEndptsRGB_1& compr_endpts, IntEndptsRGB& endpts) +{ + for (int j=0; j= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + float err = Utils::metric4(colors[i], palette[j]) * importance[i]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return FLT_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGB_1 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGB_1 temp_endpts; + float best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = max(0, opt_endpts.A[ch] - adelta); + int ahigh = min((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGB_1 new_a, new_b; + IntEndptsRGB_1 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGB; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + v.w = 255.0f; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + float toterr = 0; + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + float err = Utils::metric4(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*Vector4(direction, 0); + endpts[region].B = mean + maxp*Vector4(direction, 0); + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(float *list1, int *list2, int i, int j) +{ + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +float AVPCL::compress_mode1(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + float roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + float msebest = FLT_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode2.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode2.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode2.cpp @@ -0,0 +1,1004 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x100 555x6 64p 2bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" +#include "avpcl_utils.h" +#include "endpts.h" +#include +#include + +#include "shapes_three.h" + +using namespace nv; +using namespace AVPCL; + +#define NINDICES 4 +#define INDEXBITS 2 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES 6 + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue xfm mode mb + 5,5,5,5,5,5, 5,5,5,5,5,5, 5,5,5,5,5,5, 0, 0x4, 3, "", +}; + + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGB]; + int endpt_b_prec[NCHANNELS_RGB]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS_THREE]; +}; + + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! + +static PatternPrec pattern_precs[NPATTERNS] = +{ + 5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 5,5,5, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + nvAssert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +#define R_0 ep[0].A[i] +#define R_1 ep[0].B[i] +#define R_2 ep[1].A[i] +#define R_3 ep[1].B[i] + +static void transform_forward(IntEndptsRGB ep[NREGIONS]) +{ + for (int i=0; i= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + float err = Utils::metric4(colors[i], palette[j]) * importance[i]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return FLT_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_THREE]) +{ + // build list of possibles + Vector4 palette[NREGIONS_THREE][NINDICES]; + + for (int region = 0; region < NREGIONS_THREE; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGB temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGB temp_endpts; + float best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = max(0, opt_endpts.A[ch] - adelta); + int ahigh = min((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGB new_a, new_b; + IntEndptsRGB new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGB; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + v.w = 255.0f; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vector4 palette[NREGIONS_THREE][NINDICES]) +{ + for (int region = 0; region < NREGIONS_THREE; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE]) +{ + // build list of possibles + Vector4 palette[NREGIONS_THREE][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + float toterr = 0; + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*Vector4(direction, 0); + endpts[region].B = mean + maxp*Vector4(direction, 0); + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(float *list1, int *list2, int i, int j) +{ + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +float AVPCL::compress_mode2(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS_THREE]; + } all[NSHAPES]; + float roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + float msebest = FLT_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode3.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode3.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode3.cpp @@ -0,0 +1,1059 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x1000 777.1x4 64p 2bi (30b) + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" +#include "avpcl_utils.h" +#include "endpts.h" +#include +#include + +#include "shapes_two.h" + +using namespace nv; +using namespace AVPCL; + +#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values + +#define NINDICES 4 +#define INDEXBITS 2 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 +#define NREGIONS 2 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue xfm mode mb + 7,7,7,7, 7,7,7,7, 7,7,7,7, 0, 0x8, 4, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGB]; + int endpt_b_prec[NCHANNELS_RGB]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 7,7,7, 7,7,7, 7,7,7, 7,7,7, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + nvAssert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +static void transform_forward(IntEndptsRGB_2 ep[NREGIONS]) +{ + nvUnreachable(); +} + +static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS]) +{ + nvUnreachable(); +} + +// endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits +static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + nvAssert (compr_endpts.A[j] < 128); + } + compr_endpts.a_lsb = onescnt >= 2; + + onescnt = 0; + for (int j=0; j> 1; + nvAssert (compr_endpts.B[j] < 128); + } + compr_endpts.b_lsb = onescnt >= 2; +} + +static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts) +{ + for (int j=0; j= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + float err = Utils::metric4(colors[i], palette[j]) * importance[i]; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return FLT_MAX; + } + } + return toterr; +} + +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGB_2 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGB_2 temp_endpts; + float best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = max(0, opt_endpts.A[ch] - adelta); + int ahigh = min((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGB_2 new_a, new_b; + IntEndptsRGB_2 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGB; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i> 1) & 1; + + // make sure we have a valid error for temp_in + // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position) + float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices); + + // now try to optimize these endpoints + float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + + // if we find an improvement, update the best so far and correct the output endpoints and errors + if (temp_out_err < best_err) + { + best_err = temp_out_err; + opt_err[region] = temp_out_err; + opt_endpts[region] = temp_out; + } + } + } +} + +/* optimization algorithm + for each pattern + convert endpoints using pattern precision + assign indices and get initial error + compress indices (and possibly reorder endpoints) + transform endpoints + if transformed endpoints fit pattern + get original endpoints back + optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better + compress new indices + transform new endpoints + if new endpoints fit pattern AND if error is improved + emit compressed block with new data + else + emit compressed block with original data // to try to preserve maximum endpoint precision +*/ + +static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +{ + float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; + int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; + + for (int sp = 0; sp < NPATTERNS; ++sp) + { + quantize_endpts(endpts, pattern_precs[sp], orig_endpts); + assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err); + swap_indices(orig_endpts, orig_indices, shapeindex_best); + if (patterns[sp].transformed) + transform_forward(orig_endpts); + // apply a heuristic here -- we check if the endpoints fit before we try to optimize them. + // the assumption made is that if they don't fit now, they won't fit after optimizing. + if (endpts_fit(orig_endpts, patterns[sp])) + { + if (patterns[sp].transformed) + transform_inverse(orig_endpts); + optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); + assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); + // (nreed) Commented out asserts because they go off all the time...not sure why + //for (int i=0; i 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + v.w = 255.0f; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + float toterr = 0; + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*Vector4(direction, 0); + endpts[region].B = mean + maxp*Vector4(direction, 0); + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(float *list1, int *list2, int i, int j) +{ + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +float AVPCL::compress_mode3(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + float roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + float msebest = FLT_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode4.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode4.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode4.cpp @@ -0,0 +1,1214 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x10000 2r 1i 555x2 6x2 2bi 3bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" +#include "avpcl_utils.h" +#include "endpts.h" +#include +#include + +using namespace nv; +using namespace AVPCL; + +// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits +// array 0 is always the RGB array and array 1 is always the A array +#define NINDEXARRAYS 2 +#define INDEXARRAY_RGB 0 +#define INDEXARRAY_A 1 +#define INDEXARRAY_2BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB) +#define INDEXARRAY_3BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB) + +#define NINDICES3 8 +#define INDEXBITS3 3 +#define HIGH_INDEXBIT3 (1<<(INDEXBITS3-1)) +#define DENOM3 (NINDICES3-1) +#define BIAS3 (DENOM3/2) + +#define NINDICES2 4 +#define INDEXBITS2 2 +#define HIGH_INDEXBIT2 (1<<(INDEXBITS2-1)) +#define DENOM2 (NINDICES2-1) +#define BIAS2 (DENOM2/2) + +#define NINDICES_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2) +#define INDEXBITS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2) +#define HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2) +#define DENOM_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2) +#define BIAS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2) + +#define NINDICES_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3) +#define INDEXBITS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3) +#define HIGH_INDEXBIT_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3) +#define DENOM_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3) +#define BIAS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3) + +#define NSHAPES 1 + +static int shapes[NSHAPES] = +{ + 0x0000, +}; + +#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) + +#define NREGIONS 1 // keep the region stuff in just in case... + +// encoded index compression location: region 0 is always at 0,0. + +#define NBITSIZES 2 // one endpoint pair + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel + int transform_mode; // x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise. + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define TRANSFORM_MODE_ALPHA 1 +#define TRANSFORM_MODE_RGB 2 + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue alpha xfm mode mb encoding + 5,5, 5,5, 5,5, 6,6, 0x0, 0x10, 5, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGBA]; + int endpt_b_prec[NCHANNELS_RGBA]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 5,5,5,6, 5,5,5,6, +}; + + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + nvAssert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +#define R_0 ep[0].A[i] +#define R_1 ep[0].B[i] + +static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS]) +{ + int i; + + if (transform_mode & TRANSFORM_MODE_RGB) + for (i=CHANNEL_R; i> 2) & 3 and x = index & 3 +static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]) +{ + int index_positions[NREGIONS]; + + index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 + + for (int region = 0; region < NREGIONS; ++region) + { + int x = index_positions[region] & 3; + int y = (index_positions[region] >> 2) & 3; + nvAssert(REGION(x,y,shapeindex) == region); // double check the table + + // swap RGB + if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode)) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; } + + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + if (REGION(x,y,shapeindex) == region) + indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x]; + } + + // swap A + if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode)) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; } + + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + if (REGION(x,y,shapeindex) == region) + indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x]; + } + } +} + +static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p) +{ + return true; +} + +static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out) +{ + // ignore shapeindex + out.write(p.mode, p.modebits); + out.write(rotatemode, ROTATEMODE_BITS); + out.write(indexmode, INDEXMODE_BITS); + for (int i=0; i= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + shapeindex = 0; // we don't have any + + rotatemode = in.read(ROTATEMODE_BITS); + indexmode = in.read(INDEXMODE_BITS); + for (int i=0; i>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0] + + // then the 3 bit indices + nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0] +} + +static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]) +{ + // the indices we shorten is always index 0 + + // do the 2 bit indices first + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0)); // read i..[1:0] or i..[0] + + // then the 3 bit indices + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0)); // read i..[1:0] or i..[0] +} + +static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block) +{ + Bits out(block, AVPCL::BITSIZE); + + write_header(endpts, shapeindex, p, rotatemode, indexmode, out); + + write_indices(indices, shapeindex, indexmode, out); + + nvAssert(out.getptr() == AVPCL::BITSIZE); +} + +static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) +{ + // scale endpoints for RGB + int a, b; + + a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); + b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]); + + // interpolate R + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); + + a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); + b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); + + // interpolate G + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); + + a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); + b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); + + // interpolate B + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); + + a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); + b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]); + + // interpolate A + for (int i = 0; i < NINDICES_A(indexmode); ++i) + palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode))); + +} + +static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS]) +{ + for (int i=0; i 0; ++j) + { + err = Utils::metric1(a, palette_a[j], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + palette_alpha = palette_a[j]; + indices[INDEXARRAY_A][i] = j; + } + } + toterr += besterr; // squared-error norms are additive since we don't do the square root + + // do RGB index + besterr = FLT_MAX; + for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : + Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][i] = j; + } + } + toterr += besterr; + if (toterr > current_besterr) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + { + indices[INDEXARRAY_RGB][k] = -1; + indices[INDEXARRAY_A][k] = -1; + } + return FLT_MAX; + } + } + else + { + // do RGB index + besterr = FLT_MAX; + int bestindex; + for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : + Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + bestindex = j; + indices[INDEXARRAY_RGB][i] = j; + } + } + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0); + toterr += besterr; + + // do A index + besterr = FLT_MAX; + for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) : + Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][i] = j; + } + } + toterr += besterr; // squared-error norms are additive since we don't do the square root + if (toterr > current_besterr) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + { + indices[INDEXARRAY_RGB][k] = -1; + indices[INDEXARRAY_A][k] = -1; + } + return FLT_MAX; + } + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) +{ + Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 + float palette_a[NREGIONS][NINDICES3]; // could be nindices2 + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]); + toterr[region] = 0; + } + + Vector3 rgb; + float a; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr; + float palette_alpha = 0, tile_alpha = 0; + + rgb.x = (tile.data[y][x]).x; + rgb.y = (tile.data[y][x]).y; + rgb.z = (tile.data[y][x]).z; + a = (tile.data[y][x]).w; + + if(AVPCL::flag_premult) + tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w; + + // compute the two indices separately + // if we're doing premultiplied alpha, we need to choose first the index that + // determines the alpha value, and then do the other index + + if (rotatemode == ROTATEMODE_RGBA_RGBA) + { + // do A index first as it has the alpha + besterr = FLT_MAX; + for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) + { + err = Utils::metric1(a, palette_a[region][i], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][y][x] = i; + palette_alpha = palette_a[region][i]; + } + } + toterr[region] += besterr; // squared-error norms are additive since we don't do the square root + + // do RGB index + besterr = FLT_MAX; + for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : + Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][y][x] = i; + } + } + toterr[region] += besterr; + } + else + { + // do RGB index first as it has the alpha + besterr = FLT_MAX; + int bestindex; + for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : + Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][y][x] = i; + bestindex = i; + } + } + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0); + toterr[region] += besterr; + + // do A index + besterr = FLT_MAX; + for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) : + Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][y][x] = i; + } + } + toterr[region] += besterr; // squared-error norms are additive since we don't do the square root + } + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, + float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGBA temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + + for (int j=0; j>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int j=0; j 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +{ + IntEndptsRGBA temp_endpts; + float best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + + for (int j=0; j 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = max(0, opt_endpts.A[ch] - adelta); + int ahigh = min((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGBA new_a, new_b; + IntEndptsRGBA new_endpt; + int do_b; + int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL]; + + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int j=0; j= opt_err) + continue; + + for (int j=0; j= opt_err) + break; + + for (int j=0; j 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + if (v.w < 0.0f) v.w = 0.0f; + if (v.w > 255.0f) v.w = 255.0f; +} + +// compute initial endpoints for the "RGB" portion and the "A" portion. +// Note these channels may have been rotated. +static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + + dp = alphas[i] - mean.w; + if (dp < mina) mina = dp; + if (dp > maxa) maxa = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + Vector4(minp*direction, mina); + endpts[region].B = mean + Vector4(maxp*direction, maxa); + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } +} + +float AVPCL::compress_mode4(const Tile &t, char *block) +{ + FltEndpts endpts[NREGIONS]; + char tempblock[AVPCL::BLOCKSIZE]; + float msebest = FLT_MAX; + int shape = 0; + Tile t1; + + // try all rotations. refine tries the 2 different indexings. + for (int r = 0; r < NROTATEMODES && msebest > 0; ++r) + { + rotate_tile(t, r, t1); + rough(t1, shape, endpts); + for (int i = 0; i < NINDEXMODES && msebest > 0; ++i) + { + float mse = refine(t1, shape, r, i, endpts, tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + } + return msebest; +} Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode5.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode5.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode5.cpp @@ -0,0 +1,1216 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x100000 2r 777x2 8x2 2bi 2bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" +#include "avpcl_utils.h" +#include "endpts.h" +#include +#include + +using namespace nv; +using namespace AVPCL; + +// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits +// array 0 is always the RGB array and array 1 is always the A array +#define NINDEXARRAYS 2 +#define INDEXARRAY_RGB 0 +#define INDEXARRAY_A 1 +#define INDEXARRAY_2BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB) +#define INDEXARRAY_3BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB) + +#define NINDICES3 4 +#define INDEXBITS3 2 +#define HIGH_INDEXBIT3 (1<<(INDEXBITS3-1)) +#define DENOM3 (NINDICES3-1) +#define BIAS3 (DENOM3/2) + +#define NINDICES2 4 +#define INDEXBITS2 2 +#define HIGH_INDEXBIT2 (1<<(INDEXBITS2-1)) +#define DENOM2 (NINDICES2-1) +#define BIAS2 (DENOM2/2) + +#define NINDICES_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2) +#define INDEXBITS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2) +#define HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2) +#define DENOM_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2) +#define BIAS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2) + +#define NINDICES_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3) +#define INDEXBITS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3) +#define HIGH_INDEXBIT_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3) +#define DENOM_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3) +#define BIAS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3) + +#define NSHAPES 1 + +static int shapes[NSHAPES] = +{ + 0x0000, +}; + +#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) + +#define NREGIONS 1 // keep the region stuff in just in case... + +// encoded index compression location: region 0 is always at 0,0. + +#define NBITSIZES 2 // one endpoint pair + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel + int transform_mode; // x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise. + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define TRANSFORM_MODE_ALPHA 1 +#define TRANSFORM_MODE_RGB 2 + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue alpha xfm mode mb encoding + 7,7, 7,7, 7,7, 8,8, 0x0, 0x20, 6, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGBA]; + int endpt_b_prec[NCHANNELS_RGBA]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 7,7,7,8, 7,7,7,8, +}; + + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + nvAssert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +#define R_0 ep[0].A[i] +#define R_1 ep[0].B[i] + +static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS]) +{ + int i; + + if (transform_mode & TRANSFORM_MODE_RGB) + for (i=CHANNEL_R; i> 2) & 3 and x = index & 3 +static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]) +{ + int index_positions[NREGIONS]; + + index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 + + for (int region = 0; region < NREGIONS; ++region) + { + int x = index_positions[region] & 3; + int y = (index_positions[region] >> 2) & 3; + nvAssert(REGION(x,y,shapeindex) == region); // double check the table + + // swap RGB + if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode)) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; } + + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + if (REGION(x,y,shapeindex) == region) + indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x]; + } + + // swap A + if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode)) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; } + + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + if (REGION(x,y,shapeindex) == region) + indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x]; + } + } +} + +static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p) +{ + return true; +} + +static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out) +{ + // ignore shapeindex + out.write(p.mode, p.modebits); + out.write(rotatemode, ROTATEMODE_BITS); +// out.write(indexmode, INDEXMODE_BITS); + for (int i=0; i= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + shapeindex = 0; // we don't have any + + rotatemode = in.read(ROTATEMODE_BITS); + + indexmode = 0; // we don't have any + + for (int i=0; i>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0] + + // then the 3 bit indices + nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0] +} + +static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]) +{ + // the indices we shorten is always index 0 + + // do the 2 bit indices first + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0)); // read i..[1:0] or i..[0] + + // then the 3 bit indices + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0)); // read i..[1:0] or i..[0] +} + +static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block) +{ + Bits out(block, AVPCL::BITSIZE); + + write_header(endpts, shapeindex, p, rotatemode, indexmode, out); + + write_indices(indices, shapeindex, indexmode, out); + + nvAssert(out.getptr() == AVPCL::BITSIZE); +} + +static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) +{ + // scale endpoints for RGB + int a, b; + + a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]); + b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]); + + // interpolate R + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); + + a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); + b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); + + // interpolate G + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); + + a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); + b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); + + // interpolate B + for (int i = 0; i < NINDICES_RGB(indexmode); ++i) + palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode))); + + a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); + b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]); + + // interpolate A + for (int i = 0; i < NINDICES_A(indexmode); ++i) + palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode))); +} + +static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS]) +{ + for (int i=0; i 0; ++j) + { + err = Utils::metric1(a, palette_a[j], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + palette_alpha = palette_a[j]; + indices[INDEXARRAY_A][i] = j; + } + } + toterr += besterr; // squared-error norms are additive since we don't do the square root + + // do RGB index + besterr = FLT_MAX; + for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : + Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][i] = j; + } + } + toterr += besterr; + if (toterr > current_besterr) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + { + indices[INDEXARRAY_RGB][k] = -1; + indices[INDEXARRAY_A][k] = -1; + } + return FLT_MAX; + } + } + else + { + // do RGB index + besterr = FLT_MAX; + int bestindex; + for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : + Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + bestindex = j; + indices[INDEXARRAY_RGB][i] = j; + } + } + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0); + toterr += besterr; + + // do A index + besterr = FLT_MAX; + for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) : + Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][i] = j; + } + } + toterr += besterr; // squared-error norms are additive since we don't do the square root + if (toterr > current_besterr) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + { + indices[INDEXARRAY_RGB][k] = -1; + indices[INDEXARRAY_A][k] = -1; + } + return FLT_MAX; + } + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) +{ + Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 + float palette_a[NREGIONS][NINDICES3]; // could be nindices2 + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]); + toterr[region] = 0; + } + + Vector3 rgb; + float a; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr; + float palette_alpha = 0, tile_alpha = 0; + + rgb.x = (tile.data[y][x]).x; + rgb.y = (tile.data[y][x]).y; + rgb.z = (tile.data[y][x]).z; + a = (tile.data[y][x]).w; + + if(AVPCL::flag_premult) + tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w; + + // compute the two indices separately + // if we're doing premultiplied alpha, we need to choose first the index that + // determines the alpha value, and then do the other index + + if (rotatemode == ROTATEMODE_RGBA_RGBA) + { + // do A index first as it has the alpha + besterr = FLT_MAX; + for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) + { + err = Utils::metric1(a, palette_a[region][i], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][y][x] = i; + palette_alpha = palette_a[region][i]; + } + } + toterr[region] += besterr; // squared-error norms are additive since we don't do the square root + + // do RGB index + besterr = FLT_MAX; + for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : + Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][y][x] = i; + } + } + toterr[region] += besterr; + } + else + { + // do RGB index first as it has the alpha + besterr = FLT_MAX; + int bestindex; + for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : + Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_RGB][y][x] = i; + bestindex = i; + } + } + palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x : + (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y : + (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0); + toterr[region] += besterr; + + // do A index + besterr = FLT_MAX; + for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) : + Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode); + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[INDEXARRAY_A][y][x] = i; + } + } + toterr[region] += besterr; // squared-error norms are additive since we don't do the square root + } + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, + float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGBA temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + + for (int j=0; j>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int j=0; j 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) +{ + IntEndptsRGBA temp_endpts; + float best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + + for (int j=0; j 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = max(0, opt_endpts.A[ch] - adelta); + int ahigh = min((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGBA new_a, new_b; + IntEndptsRGBA new_endpt; + int do_b; + int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL]; + int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL]; + + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int j=0; j= opt_err) + continue; + + for (int j=0; j= opt_err) + break; + + for (int j=0; j 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + if (v.w < 0.0f) v.w = 0.0f; + if (v.w > 255.0f) v.w = 255.0f; +} + +// compute initial endpoints for the "RGB" portion and the "A" portion. +// Note these channels may have been rotated. +static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + + dp = alphas[i] - mean.w; + if (dp < mina) mina = dp; + if (dp > maxa) maxa = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + Vector4(minp*direction, mina); + endpts[region].B = mean + Vector4(maxp*direction, maxa); + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } +} + +float AVPCL::compress_mode5(const Tile &t, char *block) +{ + FltEndpts endpts[NREGIONS]; + char tempblock[AVPCL::BLOCKSIZE]; + float msebest = FLT_MAX; + int shape = 0; + Tile t1; + + // try all rotations. refine tries the 2 different indexings. + for (int r = 0; r < NROTATEMODES && msebest > 0; ++r) + { + rotate_tile(t, r, t1); + rough(t1, shape, endpts); +// for (int i = 0; i < NINDEXMODES && msebest > 0; ++i) + for (int i = 0; i < 1 && msebest > 0; ++i) + { + float mse = refine(t1, shape, r, i, endpts, tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + } + return msebest; +} Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode6.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode6.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode6.cpp @@ -0,0 +1,1055 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x1000000 7777.1x2 4bi + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" +#include "avpcl_utils.h" +#include "endpts.h" +#include +#include + +using namespace nv; +using namespace AVPCL; + +#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values + +#define NINDICES 16 +#define INDEXBITS 4 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +#define NSHAPES 1 + +static int shapes[NSHAPES] = +{ + 0x0000, +}; + +#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) + +#define NREGIONS 1 + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue alpha mode mb verilog + 7,7, 7,7, 7,7, 7,7, 0x40, 7, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGBA]; + int endpt_b_prec[NCHANNELS_RGBA]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 7,7,7,7, 7,7,7,7, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + nvAssert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +/* +we're using this table to assign lsbs +abgr >=2 correct +0000 0 0 +0001 0 0 +0010 0 0 +0011 1 x1 +0100 0 0 +0101 1 x1 +0110 1 x1 +0111 1 1 +1000 0 0 +1001 1 x0 +1010 1 x0 +1011 1 1 +1100 1 x0 +1101 1 1 +1110 1 1 +1111 1 1 + +we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8. +I choose to assign the lsbs so that the rgb channels are as good as possible. +*/ + +// 8888 ->7777.1, use the "correct" column above to assign the lsb +static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + nvAssert (compr_endpts.A[j] < 128); + } + compr_endpts.a_lsb = onescnt >= 2; + + onescnt = 0; + for (int j=0; j> 1; + nvAssert (compr_endpts.B[j] < 128); + } + compr_endpts.b_lsb = onescnt >= 2; +} + +static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts) +{ + for (int j=0; j> 2) & 3 and x = index & 3 +static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex) +{ + int index_positions[NREGIONS]; + + index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 + + for (int region = 0; region < NREGIONS; ++region) + { + int x = index_positions[region] & 3; + int y = (index_positions[region] >> 2) & 3; + nvAssert(REGION(x,y,shapeindex) == region); // double check the table + if (indices[y][x] & HIGH_INDEXBIT) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=0; i= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); + + p = patterns[pat_index]; + + shapeindex = 0; // we don't have any + + for (int j=0; j>2][i&3], INDEXBITS-1); // write i..[2:0] + else + out.write(indices[i>>2][i&3], INDEXBITS); // write i..[3:0] + } + +} + +static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) +{ + // the index we shorten is always index 0 + for (int i = 0; i < Tile::TILE_TOTAL; ++i) + { + if (i==0) + indices[i>>2][i&3] = in.read(INDEXBITS-1); // read i..[1:0] + else + indices[i>>2][i&3] = in.read(INDEXBITS); // read i..[2:0] + } +} + +static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block) +{ + Bits out(block, AVPCL::BITSIZE); + + write_header(endpts, shapeindex, p, out); + + write_indices(indices, shapeindex, out); + + nvAssert(out.getptr() == AVPCL::BITSIZE); +} + +static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec ®ion_prec, Vector4 palette[NINDICES]) +{ + IntEndptsRGBA endpts; + + uncompress_one(endpts_2, endpts); + + // scale endpoints + int a, b; // really need a IntVec4... + + a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1); // +1 since we are in uncompressed space + b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM)); + + a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); + b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM)); + + a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); + b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM)); + + a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); + b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1); + + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM)); +} + +void AVPCL::decompress_mode6(const char *block, Tile &t) +{ + Bits in(block, AVPCL::BITSIZE); + + Pattern p; + IntEndptsRGBA_2 endpts[NREGIONS]; + int shapeindex, pat_index; + + read_header(in, endpts, shapeindex, p, pat_index); + + Vector4 palette[NREGIONS][NINDICES]; + for (int r = 0; r < NREGIONS; ++r) + generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); + + int indices[Tile::TILE_H][Tile::TILE_W]; + + read_indices(in, shapeindex, indices); + + nvAssert(in.getptr() == AVPCL::BITSIZE); + + // lookup + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; +} + +// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr +static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL]) +{ + Vector4 palette[NINDICES]; + float toterr = 0; + Vector4 err; + + generate_palette_quantized(endpts, region_prec, palette); + + for (int i = 0; i < np; ++i) + { + float err, besterr = FLT_MAX; + + for (int j = 0; j < NINDICES && besterr > 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) : + Utils::metric4premult(colors[i], palette[j]) ; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return FLT_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) : + Utils::metric4premult(tile.data[y][x], palette[region][i]) ; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGBA_2 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGBA_2 temp_endpts; + float best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = max(0, opt_endpts.A[ch] - adelta); + int ahigh = min((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGBA_2 new_a, new_b; + IntEndptsRGBA_2 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i> 1) & 1; + + // make sure we have a valid error for temp_in + // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position) + float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices); + + // now try to optimize these endpoints + float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + + // if we find an improvement, update the best so far and correct the output endpoints and errors + if (temp_out_err < best_err) + { + best_err = temp_out_err; + opt_err[region] = temp_out_err; + opt_endpts[region] = temp_out; + } + } + } +} + +/* optimization algorithm + for each pattern + convert endpoints using pattern precision + assign indices and get initial error + compress indices (and possibly reorder endpoints) + transform endpoints + if transformed endpoints fit pattern + get original endpoints back + optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better + compress new indices + transform new endpoints + if new endpoints fit pattern AND if error is improved + emit compressed block with new data + else + emit compressed block with original data // to try to preserve maximum endpoint precision + + simplify the above given that there is no transform now and that endpoints will always fit +*/ + +static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +{ + float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; + int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; + + for (int sp = 0; sp < NPATTERNS; ++sp) + { + quantize_endpts(endpts, pattern_precs[sp], orig_endpts); + assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err); + swap_indices(orig_endpts, orig_indices, shapeindex_best); + + optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); + + assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); + // (nreed) Commented out asserts because they go off all the time...not sure why + //for (int i=0; i 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + if (v.w < 0.0f) v.w = 0.0f; + if (v.w > 255.0f) v.w = 255.0f; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + float toterr = 0; + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr; + + besterr = Utils::metric4(tile.data[y][x], palette[region][0]); + + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(float *list1, int *list2, int i, int j) +{ + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +float AVPCL::compress_mode6(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=1; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + float roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + float msebest = FLT_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode7.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode7.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_mode7.cpp @@ -0,0 +1,1094 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis + +// x10000000 5555.1x4 64p 2bi (30b) + +#include "bits.h" +#include "tile.h" +#include "avpcl.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Fitting.h" +#include "avpcl_utils.h" +#include "endpts.h" +#include +#include + +#include "shapes_two.h" + +using namespace nv; +using namespace AVPCL; + +#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values + +#define NINDICES 4 +#define INDEXBITS 2 +#define HIGH_INDEXBIT (1<<(INDEXBITS-1)) +#define DENOM (NINDICES-1) +#define BIAS (DENOM/2) + +// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? +// i.e. can we search shapes in a particular order so we can see the global error minima easily and +// stop without having to touch all shapes? + +#define POS_TO_X(pos) ((pos)&3) +#define POS_TO_Y(pos) (((pos)>>2)&3) + +#define NBITSIZES (NREGIONS*2) +#define ABITINDEX(region) (2*(region)+0) +#define BBITINDEX(region) (2*(region)+1) + +struct ChanBits +{ + int nbitsizes[NBITSIZES]; // bitsizes for one channel +}; + +struct Pattern +{ + ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode +}; + +#define NPATTERNS 1 +#define NREGIONS 2 + +static Pattern patterns[NPATTERNS] = +{ + // red green blue alpha xfm mode mb + 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, 0, 0x80, 8, "", +}; + +struct RegionPrec +{ + int endpt_a_prec[NCHANNELS_RGBA]; + int endpt_b_prec[NCHANNELS_RGBA]; +}; + +struct PatternPrec +{ + RegionPrec region_precs[NREGIONS]; +}; + + +// this is the precision for each channel and region +// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! +static PatternPrec pattern_precs[NPATTERNS] = +{ + 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, +}; + +// return # of bits needed to store n. handle signed or unsigned cases properly +static int nbits(int n, bool issigned) +{ + int nb; + if (n==0) + return 0; // no bits needed for 0, signed or not + else if (n > 0) + { + for (nb=0; n; ++nb, n>>=1) ; + return nb + (issigned?1:0); + } + else + { + nvAssert (issigned); + for (nb=0; n<-1; ++nb, n>>=1) ; + return nb + 1; + } +} + +static void transform_forward(IntEndptsRGBA_2 ep[NREGIONS]) +{ + nvUnreachable(); +} + +static void transform_inverse(IntEndptsRGBA_2 ep[NREGIONS]) +{ + nvUnreachable(); +} + +/* +we're using this table to assign lsbs +abgr >=2 correct +0000 0 0 +0001 0 0 +0010 0 0 +0011 1 x1 +0100 0 0 +0101 1 x1 +0110 1 x1 +0111 1 1 +1000 0 0 +1001 1 x0 +1010 1 x0 +1011 1 1 +1100 1 x0 +1101 1 1 +1110 1 1 +1111 1 1 + +we need 8 0's and 8 1's. the x's can be either 0 or 1 as long as you get 8/8. +I choose to assign the lsbs so that the rgb channels are as good as possible. +*/ + +// 6666 ->5555.1, use the "correct" column above to assign the lsb +static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_endpts) +{ + int onescnt; + + onescnt = 0; + for (int j=0; j> 1; + nvAssert (compr_endpts.A[j] < 32); + } + compr_endpts.a_lsb = onescnt >= 2; + + onescnt = 0; + for (int j=0; j> 1; + nvAssert (compr_endpts.B[j] < 32); + } + compr_endpts.b_lsb = onescnt >= 2; +} + +static void uncompress_one(const IntEndptsRGBA_2& compr_endpts, IntEndptsRGBA& endpts) +{ + for (int j=0; j> 2) & 3 and x = index & 3 +static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex) +{ + for (int region = 0; region < NREGIONS; ++region) + { + int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region); + + int x = POS_TO_X(position); + int y = POS_TO_Y(position); + nvAssert(REGION(x,y,shapeindex) == region); // double check the table + if (indices[y][x] & HIGH_INDEXBIT) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=0; i= 0 && pat_index < NPATTERNS); + nvAssert (in.getptr() == patterns[pat_index].modebits); + + shapeindex = in.read(SHAPEBITS); + p = patterns[pat_index]; + + for (int j=0; j 0; ++j) + { + err = !AVPCL::flag_premult ? Utils::metric4(colors[i], palette[j]) : + Utils::metric4premult(colors[i], palette[j]) ; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[i] = j; + } + } + toterr += besterr; + + // check for early exit + if (toterr > current_err) + { + // fill out bogus index values so it's initialized at least + for (int k = i; k < np; ++k) + indices[k] = -1; + + return FLT_MAX; + } + } + return toterr; +} + +// assign indices given a tile, shape, and quantized endpoints, return toterr for each region +static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, + int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + for (int region = 0; region < NREGIONS; ++region) + { + generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); + toterr[region] = 0; + } + + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = !AVPCL::flag_premult ? Utils::metric4(tile.data[y][x], palette[region][i]) : + Utils::metric4premult(tile.data[y][x], palette[region][i]) ; + + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } +} + +// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's +// this function returns either old_err or a value smaller (if it was successful in improving the error) +static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, + float old_err, int do_b, int indices[Tile::TILE_TOTAL]) +{ + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts + + IntEndptsRGBA_2 temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } + + float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices); + + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + for (int i=0; i 5000 perturb endpoints 50% of precision +// if err > 1000 25% +// if err > 200 12.5% +// if err > 40 6.25% +// for np = 16 -- adjust error thresholds as a function of np +// always ensure endpoint ordering is preserved (no need to overlap the scan) +// if orig_err returned from this is less than its input value, then indices[] will contain valid indices +static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) +{ + IntEndptsRGBA_2 temp_endpts; + float best_err = orig_err; + int aprec = region_prec.endpt_a_prec[ch]; + int bprec = region_prec.endpt_b_prec[ch]; + int good_indices[Tile::TILE_TOTAL]; + int temp_indices[Tile::TILE_TOTAL]; + + for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } + else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } + else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } + else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } + adelta = max(adelta, 3); + bdelta = max(bdelta, 3); + +#ifdef DISABLE_EXHAUSTIVE + adelta = bdelta = 3; +#endif + + temp_endpts = opt_endpts; + + // ok figure out the range of A and B + int alow = max(0, opt_endpts.A[ch] - adelta); + int ahigh = min((1<= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndptsRGBA_2 new_a, new_b; + IntEndptsRGBA_2 new_endpt; + int do_b; + int orig_indices[Tile::TILE_TOTAL]; + int new_indices[Tile::TILE_TOTAL]; + int temp_indices0[Tile::TILE_TOTAL]; + int temp_indices1[Tile::TILE_TOTAL]; + + // now optimize each channel separately + // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices + // if they differ, we restart the loop (which then falls back to looking for a first improvement.) + for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B + + if (err0 < err1) + { + if (err0 >= opt_err) + continue; + + for (int i=0; i= opt_err) + continue; + + for (int i=0; i= opt_err) + break; + + for (int i=0; i> 1) & 1; + + // make sure we have a valid error for temp_in + // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts + // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position) + float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices); + + // now try to optimize these endpoints + float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); + + // if we find an improvement, update the best so far and correct the output endpoints and errors + if (temp_out_err < best_err) + { + best_err = temp_out_err; + opt_err[region] = temp_out_err; + opt_endpts[region] = temp_out; + } + } + } +} + +/* optimization algorithm + for each pattern + convert endpoints using pattern precision + assign indices and get initial error + compress indices (and possibly reorder endpoints) + transform endpoints + if transformed endpoints fit pattern + get original endpoints back + optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better + compress new indices + transform new endpoints + if new endpoints fit pattern AND if error is improved + emit compressed block with new data + else + emit compressed block with original data // to try to preserve maximum endpoint precision +*/ + +static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) +{ + float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; + IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; + int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; + + for (int sp = 0; sp < NPATTERNS; ++sp) + { + quantize_endpts(endpts, pattern_precs[sp], orig_endpts); + assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err); + swap_indices(orig_endpts, orig_indices, shapeindex_best); + if (patterns[sp].transformed) + transform_forward(orig_endpts); + // apply a heuristic here -- we check if the endpoints fit before we try to optimize them. + // the assumption made is that if they don't fit now, they won't fit after optimizing. + if (endpts_fit(orig_endpts, patterns[sp])) + { + if (patterns[sp].transformed) + transform_inverse(orig_endpts); + optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); + assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); + // (nreed) Commented out asserts because they go off all the time...not sure why + //for (int i=0; i 255.0f) v.x = 255.0f; + if (v.y < 0.0f) v.y = 0.0f; + if (v.y > 255.0f) v.y = 255.0f; + if (v.z < 0.0f) v.z = 0.0f; + if (v.z > 255.0f) v.z = 255.0f; + if (v.w < 0.0f) v.w = 0.0f; + if (v.w > 255.0f) v.w = 255.0f; +} + +static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) +{ + for (int region = 0; region < NREGIONS; ++region) + for (int i = 0; i < NINDICES; ++i) + palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); +} + +// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined +static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) +{ + // build list of possibles + Vector4 palette[NREGIONS][NINDICES]; + + generate_palette_unquantized(endpts, palette); + + float toterr = 0; + Vector4 err; + + for (int y = 0; y < tile.size_y; y++) + for (int x = 0; x < tile.size_x; x++) + { + int region = REGION(x,y,shapeindex); + float err, besterr = FLT_MAX; + + for (int i = 0; i < NINDICES && besterr > 0; ++i) + { + err = Utils::metric4(tile.data[y][x], palette[region][i]); + + if (err > besterr) // error increased, so we're done searching. this works for most norms. + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; +} + +static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) +{ + for (int region=0; region maxp) maxp = dp; + } + + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; + + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + clamp(endpts[region].A); + clamp(endpts[region].B); + } + + return map_colors(tile, shapeindex, endpts); +} + +static void swap(float *list1, int *list2, int i, int j) +{ + float t = list1[i]; list1[i] = list1[j]; list1[j] = t; + int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; +} + +float AVPCL::compress_mode7(const Tile &t, char *block) +{ + // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES + // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out + const int NITEMS=NSHAPES/4; + + // pick the best NITEMS shapes and refine these. + struct { + FltEndpts endpts[NREGIONS]; + } all[NSHAPES]; + float roughmse[NSHAPES]; + int index[NSHAPES]; + char tempblock[AVPCL::BLOCKSIZE]; + float msebest = FLT_MAX; + + for (int i=0; i roughmse[j]) + swap(roughmse, index, i, j); + + for (int i=0; i0; ++i) + { + int shape = index[i]; + float mse = refine(t, shape, &all[shape].endpts[0], tempblock); + if (mse < msebest) + { + memcpy(block, tempblock, sizeof(tempblock)); + msebest = mse; + } + } + return msebest; +} + Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_utils.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_utils.h +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_utils.h @@ -0,0 +1,61 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// utility class holding common routines +#ifndef _AVPCL_UTILS_H +#define _AVPCL_UTILS_H + +#include "nvmath/Vector.h" + +namespace AVPCL { + +inline int SIGN_EXTEND(int x, int nb) { return ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)); } + +static const int INDEXMODE_BITS = 1; // 2 different index modes +static const int NINDEXMODES = (1<<(INDEXMODE_BITS)); +static const int INDEXMODE_ALPHA_IS_3BITS = 0; +static const int INDEXMODE_ALPHA_IS_2BITS = 1; + +static const int ROTATEMODE_BITS = 2; // 4 different rotate modes +static const int NROTATEMODES = (1<<(ROTATEMODE_BITS)); +static const int ROTATEMODE_RGBA_RGBA = 0; +static const int ROTATEMODE_RGBA_AGBR = 1; +static const int ROTATEMODE_RGBA_RABG = 2; +static const int ROTATEMODE_RGBA_RGAB = 3; + +class Utils +{ +public: + // error metrics + static float metric4(nv::Vector4::Arg a, nv::Vector4::Arg b); + static float metric3(nv::Vector3::Arg a, nv::Vector3::Arg b, int rotatemode); + static float metric1(float a, float b, int rotatemode); + + static float metric4premult(nv::Vector4::Arg rgba0, nv::Vector4::Arg rgba1); + static float metric3premult_alphaout(nv::Vector3::Arg rgb0, float a0, nv::Vector3::Arg rgb1, float a1); + static float metric3premult_alphain(nv::Vector3::Arg rgb0, nv::Vector3::Arg rgb1, int rotatemode); + static float metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode); + + static float premult(float r, float a); + + // quantization and unquantization + static int unquantize(int q, int prec); + static int quantize(float value, int prec); + + // lerping + static int lerp(int a, int b, int i, int bias, int denom); + static nv::Vector4 lerp(nv::Vector4::Arg a, nv::Vector4::Arg b, int i, int bias, int denom); +}; + +} + +#endif \ No newline at end of file Index: ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_utils.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_utils.cpp +++ ps/trunk/libraries/source/nvtt/src/src/bc7/avpcl_utils.cpp @@ -0,0 +1,390 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +// Utility and common routines + +#include "avpcl_utils.h" +#include "avpcl.h" +#include "nvcore/Debug.h" +#include "nvmath/Vector.inl" +#include + +using namespace nv; +using namespace AVPCL; + +static const int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64 +static const int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64 + +int Utils::lerp(int a, int b, int i, int bias, int denom) +{ +#ifdef USE_ZOH_INTERP + nvAssert (denom == 3 || denom == 7 || denom == 15); + nvAssert (i >= 0 && i <= denom); + nvAssert (bias >= 0 && bias <= denom/2); + nvAssert (a >= 0 && b >= 0); + + int round = 0; +#ifdef USE_ZOH_INTERP_ROUNDED + round = 32; +#endif + + switch (denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6; + case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6; + default: nvUnreachable(); return 0; + } +#else + return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation +#endif +} + +Vector4 Utils::lerp(Vector4::Arg a, Vector4::Arg b, int i, int bias, int denom) +{ +#ifdef USE_ZOH_INTERP + nvAssert (denom == 3 || denom == 7 || denom == 15); + nvAssert (i >= 0 && i <= denom); + nvAssert (bias >= 0 && bias <= denom/2); +// nvAssert (a >= 0 && b >= 0); + + // no need to bias these as this is an exact division + + switch (denom) + { + case 3: denom *= 5; i *= 5; // fall through to case 15 + case 15:return (a*float(denom15_weights[denom-i]) + b*float(denom15_weights[i])) / 64.0f; + case 7: return (a*float(denom7_weights[denom-i]) + b*float(denom7_weights[i])) / 64.0f; + default: nvUnreachable(); return Vector4(0); + } +#else + return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation +#endif +} + + +int Utils::unquantize(int q, int prec) +{ + int unq; + + nvAssert (prec > 3); // we only want to do one replicate + +#ifdef USE_ZOH_QUANT + if (prec >= 8) + unq = q; + else if (q == 0) + unq = 0; + else if (q == ((1<> prec; +#else + // avpcl unquantizer -- bit replicate + unq = (q << (8-prec)) | (q >> (2*prec-8)); +#endif + + return unq; +} + +// quantize to the best value -- i.e., minimize unquantize error +int Utils::quantize(float value, int prec) +{ + int q, unq; + + nvAssert (prec > 3); // we only want to do one replicate + + unq = (int)floor(value + 0.5f); + nvAssert (unq <= 255); + +#ifdef USE_ZOH_QUANT + q = (prec >= 8) ? unq : (unq << prec) / 256; +#else + // avpcl quantizer -- scale properly for best possible bit-replicated result + q = (unq * ((1<= 0 && q < (1 << prec)); + + return q; +} + +float Utils::metric4(Vector4::Arg a, Vector4::Arg b) +{ + Vector4 err = a - b; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + float rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; + } + else /*if (AVPCL::flag_nonuniform_ati)*/ + { + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; + } + + // weigh the components + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; + } + + return lengthSquared(err); +} + +// WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go. +float Utils::metric3(Vector3::Arg a, Vector3::Arg b, int rotatemode) +{ + Vector3 err = a - b; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + float rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; + } + + // adjust weights based on rotatemode + switch(rotatemode) + { + case ROTATEMODE_RGBA_RGBA: break; + case ROTATEMODE_RGBA_AGBR: rwt = 1.0f; break; + case ROTATEMODE_RGBA_RABG: gwt = 1.0f; break; + case ROTATEMODE_RGBA_RGAB: bwt = 1.0f; break; + default: nvUnreachable(); + } + + // weigh the components + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; + } + + return lengthSquared(err); +} + +float Utils::metric1(const float a, const float b, int rotatemode) +{ + float err = a - b; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + float rwt, gwt, bwt, awt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; + } + + // adjust weights based on rotatemode + switch(rotatemode) + { + case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break; + case ROTATEMODE_RGBA_AGBR: awt = rwt; break; + case ROTATEMODE_RGBA_RABG: awt = gwt; break; + case ROTATEMODE_RGBA_RGAB: awt = bwt; break; + default: nvUnreachable(); + } + + // weigh the components + err *= awt; + } + + return err * err; +} + +float Utils::premult(float r, float a) +{ + // note that the args are really integers stored in floats + int R = int(r), A = int(a); + + nvAssert ((R==r) && (A==a)); + + return float((R*A + 127)/255); +} + +static void premult4(Vector4& rgba) +{ + rgba.x = Utils::premult(rgba.x, rgba.w); + rgba.y = Utils::premult(rgba.y, rgba.w); + rgba.z = Utils::premult(rgba.z, rgba.w); +} + +static void premult3(Vector3& rgb, float a) +{ + rgb.x = Utils::premult(rgb.x, a); + rgb.y = Utils::premult(rgb.y, a); + rgb.z = Utils::premult(rgb.z, a); +} + +float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b) +{ + Vector4 pma = a, pmb = b; + + premult4(pma); + premult4(pmb); + + Vector4 err = pma - pmb; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + float rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; + } + else /*if (AVPCL::flag_nonuniform_ati)*/ + { + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; + } + + // weigh the components + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; + } + + return lengthSquared(err); +} + +float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg rgb1, float a1) +{ + Vector3 pma = rgb0, pmb = rgb1; + + premult3(pma, a0); + premult3(pmb, a1); + + Vector3 err = pma - pmb; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + float rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; + } + else /*if (AVPCL::flag_nonuniform_ati)*/ + { + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; + } + + // weigh the components + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; + } + + return lengthSquared(err); +} + +float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int rotatemode) +{ + Vector3 pma = rgb0, pmb = rgb1; + + switch(rotatemode) + { + case ROTATEMODE_RGBA_RGBA: + // this function isn't supposed to be called for this rotatemode + nvUnreachable(); + break; + case ROTATEMODE_RGBA_AGBR: + pma.y = premult(pma.y, pma.x); + pma.z = premult(pma.z, pma.x); + pmb.y = premult(pmb.y, pmb.x); + pmb.z = premult(pmb.z, pmb.x); + break; + case ROTATEMODE_RGBA_RABG: + pma.x = premult(pma.x, pma.y); + pma.z = premult(pma.z, pma.y); + pmb.x = premult(pmb.x, pmb.y); + pmb.z = premult(pmb.z, pmb.y); + break; + case ROTATEMODE_RGBA_RGAB: + pma.x = premult(pma.x, pma.z); + pma.y = premult(pma.y, pma.z); + pmb.x = premult(pmb.x, pmb.z); + pmb.y = premult(pmb.y, pmb.z); + break; + default: nvUnreachable(); + } + + Vector3 err = pma - pmb; + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + float rwt, gwt, bwt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; + } + else /*if (AVPCL::flag_nonuniform_ati)*/ + { + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; + } + + // weigh the components + err.x *= rwt; + err.y *= gwt; + err.z *= bwt; + } + + return lengthSquared(err); +} + +float Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode) +{ + float err = premult(rgb0, a0) - premult(rgb1, a1); + + // if nonuniform, select weights and weigh away + if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) + { + float rwt, gwt, bwt, awt; + if (AVPCL::flag_nonuniform) + { + rwt = 0.299f; gwt = 0.587f; bwt = 0.114f; + } + else if (AVPCL::flag_nonuniform_ati) + { + rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f; + } + + // adjust weights based on rotatemode + switch(rotatemode) + { + case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break; + case ROTATEMODE_RGBA_AGBR: awt = rwt; break; + case ROTATEMODE_RGBA_RABG: awt = gwt; break; + case ROTATEMODE_RGBA_RGAB: awt = bwt; break; + default: nvUnreachable(); + } + + // weigh the components + err *= awt; + } + + return err * err; +} Index: ps/trunk/libraries/source/nvtt/src/src/bc7/bits.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/bits.h +++ ps/trunk/libraries/source/nvtt/src/src/bc7/bits.h @@ -0,0 +1,76 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _AVPCL_BITS_H +#define _AVPCL_BITS_H + +// read/write a bitstream + +#include "nvcore/Debug.h" + +namespace AVPCL { + +class Bits +{ +public: + + Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} + Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} + + void write(int value, int nbits) { + nvAssert (nbits >= 0 && nbits < 32); + nvAssert (sizeof(int)>= 4); + for (int i=0; i>i); + } + int read(int nbits) { + nvAssert (nbits >= 0 && nbits < 32); + nvAssert (sizeof(int)>= 4); + int out = 0; + for (int i=0; i= 0 && ptr < maxbits); bptr = ptr; } + int getsize() { return bend; } + +private: + int bptr; // next bit to read + int bend; // last written bit + 1 + char *bits; // ptr to user bit stream + const char *cbits; // ptr to const user bit stream + int maxbits; // max size of user bit stream + char readonly; // 1 if this is a read-only stream + + int readone() { + nvAssert (bptr < bend); + if (bptr >= bend) return 0; + int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7)); + ++bptr; + return bit != 0; + } + void writeone(int bit) { + nvAssert (!readonly); // "Writing a read-only bit stream" + nvAssert (bptr < maxbits); + if (bptr >= maxbits) return; + if (bit&1) + bits[bptr>>3] |= 1 << (bptr & 7); + else + bits[bptr>>3] &= ~(1 << (bptr & 7)); + if (bptr++ >= bend) bend = bptr; + } +}; + +} + +#endif \ No newline at end of file Index: ps/trunk/libraries/source/nvtt/src/src/bc7/endpts.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/endpts.h +++ ps/trunk/libraries/source/nvtt/src/src/bc7/endpts.h @@ -0,0 +1,81 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _AVPCL_ENDPTS_H +#define _AVPCL_ENDPTS_H + +// endpoint definitions and routines to search through endpoint space + +#include "nvmath/Vector.h" + +namespace AVPCL { + +static const int NCHANNELS_RGB = 3; +static const int NCHANNELS_RGBA = 4; +static const int CHANNEL_R = 0; +static const int CHANNEL_G = 1; +static const int CHANNEL_B = 2; +static const int CHANNEL_A = 3; + +struct FltEndpts +{ + nv::Vector4 A; + nv::Vector4 B; +}; + +struct IntEndptsRGB +{ + int A[NCHANNELS_RGB]; + int B[NCHANNELS_RGB]; +}; + +struct IntEndptsRGB_1 +{ + int A[NCHANNELS_RGB]; + int B[NCHANNELS_RGB]; + int lsb; // shared lsb for A and B +}; + +struct IntEndptsRGB_2 +{ + int A[NCHANNELS_RGB]; + int B[NCHANNELS_RGB]; + int a_lsb; // lsb for A + int b_lsb; // lsb for B +}; + + +struct IntEndptsRGBA +{ + int A[NCHANNELS_RGBA]; + int B[NCHANNELS_RGBA]; +}; + +struct IntEndptsRGBA_2 +{ + int A[NCHANNELS_RGBA]; + int B[NCHANNELS_RGBA]; + int a_lsb; // lsb for A + int b_lsb; // lsb for B +}; + +struct IntEndptsRGBA_2a +{ + int A[NCHANNELS_RGBA]; + int B[NCHANNELS_RGBA]; + int a_lsb; // lsb for RGB channels of A + int b_lsb; // lsb for RGB channels of A +}; + +} + +#endif Index: ps/trunk/libraries/source/nvtt/src/src/bc7/shapes_three.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/shapes_three.h +++ ps/trunk/libraries/source/nvtt/src/src/bc7/shapes_three.h @@ -0,0 +1,132 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _AVPCL_SHAPES_THREE_H +#define _AVPCL_SHAPES_THREE_H + +// shapes for 3 regions + +#define NREGIONS 3 +#define NSHAPES 64 +#define SHAPEBITS 6 + +static int shapes[NSHAPES*16] = +{ +0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 2, 2, +0, 0, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 0, 0, 2, 2, +0, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 0, 0, 1, 1, +2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1, +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1, +1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, +1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, + +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, +0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 2, +1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 1, 2, +2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 1, 2, + +0, 1, 1, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, +0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 1, 2, 2, 0, 0, 1, +0, 1, 1, 2, 0, 1, 2, 2, 1, 1, 2, 2, 2, 2, 0, 0, +0, 1, 1, 2, 0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, + +0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 2, +0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, +0, 1, 1, 2, 2, 0, 0, 1, 1, 1, 2, 2, 0, 0, 2, 2, +1, 1, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 1, 1, 1, 1, + +0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, +0, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0, +0, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0, + +0, 1, 2, 2, 0, 0, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0, +0, 1, 2, 2, 0, 0, 1, 2, 1, 2, 2, 1, 0, 1, 1, 0, +0, 0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, +0, 0, 0, 0, 2, 2, 2, 2, 0, 1, 1, 0, 1, 2, 2, 1, + +0, 0, 2, 2, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, +1, 1, 0, 2, 0, 1, 1, 0, 0, 1, 2, 2, 2, 0, 0, 0, +1, 1, 0, 2, 2, 0, 0, 2, 0, 1, 2, 2, 2, 2, 1, 1, +0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 2, 1, + +0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 1, 1, 0, 1, 2, 0, +0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 1, 2, 0, +1, 1, 2, 2, 0, 0, 1, 2, 0, 0, 2, 2, 0, 1, 2, 0, +1, 2, 2, 2, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 2, 0, + +0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 0, 1, 1, +1, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 2, 2, 2, 0, 0, +2, 2, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 1, 1, 2, 2, +0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 0, 1, 1, + +0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 2, 2, +1, 1, 2, 2, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 2, 2, +2, 2, 0, 0, 2, 2, 2, 2, 2, 1, 2, 1, 0, 0, 2, 2, +0, 0, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 2, + +0, 0, 2, 2, 0, 2, 2, 0, 0, 1, 0, 1, 0, 0, 0, 0, +0, 0, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1, +0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 1, 2, 1, +0, 0, 1, 1, 1, 2, 2, 1, 0, 1, 0, 1, 2, 1, 2, 1, + +0, 1, 0, 1, 0, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, +0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, +0, 1, 0, 1, 0, 2, 2, 2, 0, 0, 0, 2, 2, 1, 1, 2, +2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, + +0, 2, 2, 2, 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, +0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0, +0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 0, 2, 1, 1, 2, +0, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2, + +0, 1, 1, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, +0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0, +2, 2, 2, 2, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0, +2, 2, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2, + +0, 0, 0, 2, 0, 2, 2, 2, 0, 1, 0, 1, 0, 1, 1, 1, +0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, +0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, +0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, +}; + +#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16] + +static int shapeindex_to_compressed_indices[NSHAPES*3] = +{ + 0, 3,15, 0, 3, 8, 0,15, 8, 0,15, 3, + 0, 8,15, 0, 3,15, 0,15, 3, 0,15, 8, + 0, 8,15, 0, 8,15, 0, 6,15, 0, 6,15, + 0, 6,15, 0, 5,15, 0, 3,15, 0, 3, 8, + + 0, 3,15, 0, 3, 8, 0, 8,15, 0,15, 3, + 0, 3,15, 0, 3, 8, 0, 6,15, 0,10, 8, + 0, 5, 3, 0, 8,15, 0, 8, 6, 0, 6,10, + 0, 8,15, 0, 5,15, 0,15,10, 0,15, 8, + + 0, 8,15, 0,15, 3, 0, 3,15, 0, 5,10, + 0, 6,10, 0,10, 8, 0, 8, 9, 0,15,10, + 0,15, 6, 0, 3,15, 0,15, 8, 0, 5,15, + 0,15, 3, 0,15, 6, 0,15, 6, 0,15, 8, + + 0, 3,15, 0,15, 3, 0, 5,15, 0, 5,15, + 0, 5,15, 0, 8,15, 0, 5,15, 0,10,15, + 0, 5,15, 0,10,15, 0, 8,15, 0,13,15, + 0,15, 3, 0,12,15, 0, 3,15, 0, 3, 8 + +}; +#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*3+(region)] + +#endif Index: ps/trunk/libraries/source/nvtt/src/src/bc7/shapes_two.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/shapes_two.h +++ ps/trunk/libraries/source/nvtt/src/src/bc7/shapes_two.h @@ -0,0 +1,133 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _AVPCL_SHAPES_TWO_H +#define _AVPCL_SHAPES_TWO_H + +// shapes for two regions + +#define NREGIONS 2 +#define NSHAPES 64 +#define SHAPEBITS 6 + +static int shapes[NSHAPES*16] = +{ +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, +0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, +0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + +0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, +1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, +1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, +1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, + +0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, +0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, +0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, +0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, + +0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, +0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, + +0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, +0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, +1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, +1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, + +0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, +0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, +0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, +0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, + +0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, +1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, +0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, +1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, + +0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, +0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, +1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, +1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, + +0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, +1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, +1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, +0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, + +0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, +1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, + +0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, +1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, +1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, +0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, + +0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, +1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, +1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, +1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, + +0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, +0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, +0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, + +}; + +#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16] + +static int shapeindex_to_compressed_indices[NSHAPES*2] = +{ + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0,15, 0,15, 0,15, + + 0,15, 0, 2, 0, 8, 0, 2, + 0, 2, 0, 8, 0, 8, 0,15, + 0, 2, 0, 8, 0, 2, 0, 2, + 0, 8, 0, 8, 0, 2, 0, 2, + + 0,15, 0,15, 0, 6, 0, 8, + 0, 2, 0, 8, 0,15, 0,15, + 0, 2, 0, 8, 0, 2, 0, 2, + 0, 2, 0,15, 0,15, 0, 6, + + 0, 6, 0, 2, 0, 6, 0, 8, + 0,15, 0,15, 0, 2, 0, 2, + 0,15, 0,15, 0,15, 0,15, + 0,15, 0, 2, 0, 2, 0,15 + +}; +#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*2+(region)] + +#endif Index: ps/trunk/libraries/source/nvtt/src/src/bc7/tile.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/bc7/tile.h +++ ps/trunk/libraries/source/nvtt/src/src/bc7/tile.h @@ -0,0 +1,41 @@ +/* +Copyright 2007 nVidia, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + +You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +See the License for the specific language governing permissions and limitations under the License. +*/ + +#ifndef _AVPCL_TILE_H +#define _AVPCL_TILE_H + +#include "nvmath/Vector.h" +#include +#include "avpcl_utils.h" + +namespace AVPCL { + +// extract a tile of pixels from an array + +class Tile +{ +public: + static const int TILE_H = 4; + static const int TILE_W = 4; + static const int TILE_TOTAL = TILE_H * TILE_W; + nv::Vector4 data[TILE_H][TILE_W]; + float importance_map[TILE_H][TILE_W]; + int size_x, size_y; // actual size of tile + + Tile() {}; + ~Tile(){}; + Tile(int xs, int ys) {size_x = xs; size_y = ys;} +}; + +} + +#endif \ No newline at end of file Index: ps/trunk/libraries/source/nvtt/src/src/nvconfig.h.in =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvconfig.h.in +++ ps/trunk/libraries/source/nvtt/src/src/nvconfig.h.in @@ -7,10 +7,15 @@ #cmakedefine HAVE_EXECINFO_H #cmakedefine HAVE_MALLOC_H -#cmakedefine HAVE_PNG -#cmakedefine HAVE_JPEG -#cmakedefine HAVE_TIFF -#cmakedefine HAVE_OPENEXR +#cmakedefine HAVE_OPENMP +#cmakedefine HAVE_DISPATCH_H + +#define HAVE_STBIMAGE +//#cmakedefine HAVE_PNG +//#cmakedefine HAVE_JPEG +//#cmakedefine HAVE_TIFF +//#cmakedefine HAVE_OPENEXR +//#cmakedefine HAVE_FREEIMAGE #cmakedefine HAVE_MAYA Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.h @@ -0,0 +1,182 @@ +// This code is in the public domain -- Ignacio Castaño + +#pragma once +#ifndef NV_CORE_ARRAY_H +#define NV_CORE_ARRAY_H + +/* +This array class requires the elements to be relocable; it uses memmove and realloc. Ideally I should be +using swap, but I honestly don't care. The only thing that you should be aware of is that internal pointers +are not supported. + +Note also that push_back and resize does not support inserting arguments elements that are in the same +container. This is forbidden to prevent an extra copy. +*/ + + +#include "Memory.h" +#include "Debug.h" +#include "ForEach.h" // PseudoIndex + + +namespace nv +{ + class Stream; + + /** + * Replacement for std::vector that is easier to debug and provides + * some nice foreach enumerators. + */ + template + class NVCORE_CLASS Array { + public: + typedef uint size_type; + + // Default constructor. + NV_FORCEINLINE Array() : m_buffer(NULL), m_capacity(0), m_size(0) {} + + // Copy constructor. + NV_FORCEINLINE Array(const Array & a) : m_buffer(NULL), m_capacity(0), m_size(0) { + copy(a.m_buffer, a.m_size); + } + + // Constructor that initializes the vector with the given elements. + NV_FORCEINLINE Array(const T * ptr, uint num) : m_buffer(NULL), m_capacity(0), m_size(0) { + copy(ptr, num); + } + + // Allocate array. + NV_FORCEINLINE explicit Array(uint capacity) : m_buffer(NULL), m_capacity(0), m_size(0) { + setArrayCapacity(capacity); + } + + // Destructor. + NV_FORCEINLINE ~Array() { + clear(); + free(m_buffer); + } + + + /// Const element access. + NV_FORCEINLINE const T & operator[]( uint index ) const + { + nvDebugCheck(index < m_size); + return m_buffer[index]; + } + NV_FORCEINLINE const T & at( uint index ) const + { + nvDebugCheck(index < m_size); + return m_buffer[index]; + } + + /// Element access. + NV_FORCEINLINE T & operator[] ( uint index ) + { + nvDebugCheck(index < m_size); + return m_buffer[index]; + } + NV_FORCEINLINE T & at( uint index ) + { + nvDebugCheck(index < m_size); + return m_buffer[index]; + } + + /// Get vector size. + NV_FORCEINLINE uint size() const { return m_size; } + + /// Get vector size. + NV_FORCEINLINE uint count() const { return m_size; } + + /// Get vector capacity. + NV_FORCEINLINE uint capacity() const { return m_capacity; } + + /// Get const vector pointer. + NV_FORCEINLINE const T * buffer() const { return m_buffer; } + + /// Get vector pointer. + NV_FORCEINLINE T * buffer() { return m_buffer; } + + /// Provide begin/end pointers for C++11 range-based for loops. + NV_FORCEINLINE T * begin() { return m_buffer; } + NV_FORCEINLINE T * end() { return m_buffer + m_size; } + NV_FORCEINLINE const T * begin() const { return m_buffer; } + NV_FORCEINLINE const T * end() const { return m_buffer + m_size; } + + /// Is vector empty. + NV_FORCEINLINE bool isEmpty() const { return m_size == 0; } + + /// Is a null vector. + NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; } + + + T & append(); + void push_back( const T & val ); + void pushBack( const T & val ); + Array & append( const T & val ); + Array & operator<< ( T & t ); + void pop_back(); + void popBack(uint count = 1); + void popFront(uint count = 1); + const T & back() const; + T & back(); + const T & front() const; + T & front(); + bool contains(const T & e) const; + bool find(const T & element, uint * indexPtr) const; + bool find(const T & element, uint begin, uint end, uint * indexPtr) const; + void removeAt(uint index); + bool remove(const T & element); + void insertAt(uint index, const T & val = T()); + void append(const Array & other); + void append(const T other[], uint count); + void replaceWithLast(uint index); + void resize(uint new_size); + void resize(uint new_size, const T & elem); + void fill(const T & elem); + void clear(); + void shrink(); + void reserve(uint desired_size); + void copy(const T * data, uint count); + Array & operator=( const Array & a ); + T * release(); + + + // Array enumerator. + typedef uint PseudoIndex; + + NV_FORCEINLINE PseudoIndex start() const { return 0; } + NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; } + NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; } + +#if NV_CC_MSVC + NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) { + return m_buffer[i(this)]; + } + NV_FORCEINLINE const T & operator[]( const PseudoIndexWrapper & i ) const { + return m_buffer[i(this)]; + } +#endif + + // Friends. + template + friend Stream & operator<< ( Stream & s, Array & p ); + + template + friend void swap(Array & a, Array & b); + + + protected: + + void setArraySize(uint new_size); + void setArrayCapacity(uint new_capacity); + + T * m_buffer; + uint m_capacity; + uint m_size; + + }; + + +} // nv namespace + +#endif // NV_CORE_ARRAY_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.inl =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.inl +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.inl @@ -0,0 +1,438 @@ +// This code is in the public domain -- Ignacio Castaño + +#pragma once +#ifndef NV_CORE_ARRAY_INL +#define NV_CORE_ARRAY_INL + +#include "Array.h" + +#include "Stream.h" +#include "Utils.h" // swap + +#include // memmove +#include // for placement new + + + +namespace nv +{ + template + NV_FORCEINLINE T & Array::append() + { + uint old_size = m_size; + uint new_size = m_size + 1; + + setArraySize(new_size); + + construct_range(m_buffer, new_size, old_size); + + return m_buffer[old_size]; // Return reference to last element. + } + + // Push an element at the end of the vector. + template + NV_FORCEINLINE void Array::push_back( const T & val ) + { +#if 1 + nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size); + + uint old_size = m_size; + uint new_size = m_size + 1; + + setArraySize(new_size); + + construct_range(m_buffer, new_size, old_size, val); +#else + uint new_size = m_size + 1; + + if (new_size > m_capacity) + { + // @@ Is there any way to avoid this copy? + // @@ Can we create a copy without side effects? Ie. without calls to constructor/destructor. Use alloca + memcpy? + // @@ Assert instead of copy? + const T copy(val); // create a copy in case value is inside of this array. + + setArraySize(new_size); + + new (m_buffer+new_size-1) T(copy); + } + else + { + m_size = new_size; + new(m_buffer+new_size-1) T(val); + } +#endif // 0/1 + } + template + NV_FORCEINLINE void Array::pushBack( const T & val ) + { + push_back(val); + } + template + NV_FORCEINLINE Array & Array::append( const T & val ) + { + push_back(val); + return *this; + } + + // Qt like push operator. + template + NV_FORCEINLINE Array & Array::operator<< ( T & t ) + { + push_back(t); + return *this; + } + + // Pop the element at the end of the vector. + template + NV_FORCEINLINE void Array::pop_back() + { + nvDebugCheck( m_size > 0 ); + resize( m_size - 1 ); + } + template + NV_FORCEINLINE void Array::popBack(uint count) + { + nvDebugCheck(m_size >= count); + resize(m_size - count); + } + + template + NV_FORCEINLINE void Array::popFront(uint count) + { + nvDebugCheck(m_size >= count); + //resize(m_size - count); + + if (m_size == count) { + clear(); + } + else { + destroy_range(m_buffer, 0, count); + + memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count)); + + m_size -= count; + } + + } + + + // Get back element. + template + NV_FORCEINLINE const T & Array::back() const + { + nvDebugCheck( m_size > 0 ); + return m_buffer[m_size-1]; + } + + // Get back element. + template + NV_FORCEINLINE T & Array::back() + { + nvDebugCheck( m_size > 0 ); + return m_buffer[m_size-1]; + } + + // Get front element. + template + NV_FORCEINLINE const T & Array::front() const + { + nvDebugCheck( m_size > 0 ); + return m_buffer[0]; + } + + // Get front element. + template + NV_FORCEINLINE T & Array::front() + { + nvDebugCheck( m_size > 0 ); + return m_buffer[0]; + } + + // Check if the given element is contained in the array. + template + NV_FORCEINLINE bool Array::contains(const T & e) const + { + return find(e, NULL); + } + + // Return true if element found. + template + NV_FORCEINLINE bool Array::find(const T & element, uint * indexPtr) const + { + return find(element, 0, m_size, indexPtr); + } + + // Return true if element found within the given range. + template + NV_FORCEINLINE bool Array::find(const T & element, uint begin, uint end, uint * indexPtr) const + { + return ::nv::find(element, m_buffer, begin, end, indexPtr); + } + + + // Remove the element at the given index. This is an expensive operation! + template + void Array::removeAt(uint index) + { + nvDebugCheck(index >= 0 && index < m_size); + + if (m_size == 1) { + clear(); + } + else { + m_buffer[index].~T(); + + memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index)); + m_size--; + } + } + + // Remove the first instance of the given element. + template + bool Array::remove(const T & element) + { + uint index; + if (find(element, &index)) { + removeAt(index); + return true; + } + return false; + } + + // Insert the given element at the given index shifting all the elements up. + template + void Array::insertAt(uint index, const T & val/*=T()*/) + { + nvDebugCheck( index >= 0 && index <= m_size ); + + setArraySize(m_size + 1); + + if (index < m_size - 1) { + memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index)); + } + + // Copy-construct into the newly opened slot. + new(m_buffer+index) T(val); + } + + // Append the given data to our vector. + template + NV_FORCEINLINE void Array::append(const Array & other) + { + append(other.m_buffer, other.m_size); + } + + // Append the given data to our vector. + template + void Array::append(const T other[], uint count) + { + if (count > 0) { + const uint old_size = m_size; + + setArraySize(m_size + count); + + for (uint i = 0; i < count; i++ ) { + new(m_buffer + old_size + i) T(other[i]); + } + } + } + + + // Remove the given element by replacing it with the last one. + template + void Array::replaceWithLast(uint index) + { + nvDebugCheck( index < m_size ); + nv::swap(m_buffer[index], back()); // @@ Is this OK when index == size-1? + (m_buffer+m_size-1)->~T(); + m_size--; + } + + // Resize the vector preserving existing elements. + template + void Array::resize(uint new_size) + { + uint old_size = m_size; + + // Destruct old elements (if we're shrinking). + destroy_range(m_buffer, new_size, old_size); + + setArraySize(new_size); + + // Call default constructors + construct_range(m_buffer, new_size, old_size); + } + + + // Resize the vector preserving existing elements and initializing the + // new ones with the given value. + template + void Array::resize(uint new_size, const T & elem) + { + nvDebugCheck(&elem < m_buffer || &elem > m_buffer+m_size); + + uint old_size = m_size; + + // Destruct old elements (if we're shrinking). + destroy_range(m_buffer, new_size, old_size); + + setArraySize(new_size); + + // Call copy constructors + construct_range(m_buffer, new_size, old_size, elem); + } + + // Fill array with the given value. + template + void Array::fill(const T & elem) + { + fill(m_buffer, m_size, elem); + } + + // Clear the buffer. + template + NV_FORCEINLINE void Array::clear() + { + nvDebugCheck(isValidPtr(m_buffer)); + + // Destruct old elements + destroy_range(m_buffer, 0, m_size); + + m_size = 0; + } + + // Shrink the allocated vector. + template + NV_FORCEINLINE void Array::shrink() + { + if (m_size < m_capacity) { + setArrayCapacity(m_size); + } + } + + // Preallocate space. + template + NV_FORCEINLINE void Array::reserve(uint desired_size) + { + if (desired_size > m_capacity) { + setArrayCapacity(desired_size); + } + } + + // Copy elements to this array. Resizes it if needed. + template + NV_FORCEINLINE void Array::copy(const T * data, uint count) + { +#if 1 // More simple, but maybe not be as efficient? + destroy_range(m_buffer, 0, m_size); + + setArraySize(count); + + construct_range(m_buffer, count, 0, data); +#else + const uint old_size = m_size; + + destroy_range(m_buffer, count, old_size); + + setArraySize(count); + + copy_range(m_buffer, data, old_size); + + construct_range(m_buffer, count, old_size, data); +#endif + } + + // Assignment operator. + template + NV_FORCEINLINE Array & Array::operator=( const Array & a ) + { + copy(a.m_buffer, a.m_size); + return *this; + } + + // Release ownership of allocated memory and returns pointer to it. + template + T * Array::release() { + T * tmp = m_buffer; + m_buffer = NULL; + m_capacity = 0; + m_size = 0; + return tmp; + } + + + + // Change array size. + template + inline void Array::setArraySize(uint new_size) { + m_size = new_size; + + if (new_size > m_capacity) { + uint new_buffer_size; + if (m_capacity == 0) { + // first allocation is exact + new_buffer_size = new_size; + } + else { + // following allocations grow array by 25% + new_buffer_size = new_size + (new_size >> 2); + } + + setArrayCapacity( new_buffer_size ); + } + } + + // Change array capacity. + template + inline void Array::setArrayCapacity(uint new_capacity) { + nvDebugCheck(new_capacity >= m_size); + + if (new_capacity == 0) { + // free the buffer. + if (m_buffer != NULL) { + free(m_buffer); + m_buffer = NULL; + } + } + else { + // realloc the buffer + m_buffer = realloc(m_buffer, new_capacity); + } + + m_capacity = new_capacity; + } + + // Array serialization. + template + inline Stream & operator<< ( Stream & s, Array & p ) + { + if (s.isLoading()) { + uint size; + s << size; + p.resize( size ); + } + else { + s << p.m_size; + } + + for (uint i = 0; i < p.m_size; i++) { + s << p.m_buffer[i]; + } + + return s; + } + + // Swap the members of the two given vectors. + template + inline void swap(Array & a, Array & b) + { + nv::swap(a.m_buffer, b.m_buffer); + nv::swap(a.m_capacity, b.m_capacity); + nv::swap(a.m_size, b.m_size); + } + + +} // nv namespace + +#endif // NV_CORE_ARRAY_INL Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/BitArray.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/BitArray.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/BitArray.h @@ -1,168 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_CORE_BITARRAY_H -#define NV_CORE_BITARRAY_H - -#include -#include - -namespace nv -{ - -/// Count the bits of @a x. -inline uint bitsSet(uint8 x) { - uint count = 0; - for(; x != 0; x >>= 1) { - count += (x & 1); - } - return count; -} - - -/// Count the bits of @a x. -inline uint bitsSet(uint32 x, int bits) { - uint count = 0; - for(; x != 0 && bits != 0; x >>= 1, bits--) { - count += (x & 1); - } - return count; -} - - -/// Simple bit array. -class BitArray -{ -public: - - /// Default ctor. - BitArray() {} - - /// Ctor with initial m_size. - BitArray(uint sz) - { - resize(sz); - } - - /// Get array m_size. - uint size() const { return m_size; } - - /// Clear array m_size. - void clear() { resize(0); } - - /// Set array m_size. - void resize(uint sz) - { - m_size = sz; - m_bitArray.resize( (m_size + 7) >> 3 ); - } - - /// Get bit. - bool bitAt(uint b) const - { - nvDebugCheck( b < m_size ); - return (m_bitArray[b >> 3] & (1 << (b & 7))) != 0; - } - - /// Set a bit. - void setBitAt(uint b) - { - nvDebugCheck( b < m_size ); - m_bitArray[b >> 3] |= (1 << (b & 7)); - } - - /// Clear a bit. - void clearBitAt( uint b ) - { - nvDebugCheck( b < m_size ); - m_bitArray[b >> 3] &= ~(1 << (b & 7)); - } - - /// Clear all the bits. - void clearAll() - { - memset(m_bitArray.unsecureBuffer(), 0, m_bitArray.size()); - } - - /// Set all the bits. - void setAll() - { - memset(m_bitArray.unsecureBuffer(), 0xFF, m_bitArray.size()); - } - - /// Toggle all the bits. - void toggleAll() - { - const uint byte_num = m_bitArray.size(); - for(uint b = 0; b < byte_num; b++) { - m_bitArray[b] ^= 0xFF; - } - } - - /// Get a byte of the bit array. - const uint8 & byteAt(uint index) const - { - return m_bitArray[index]; - } - - /// Set the given byte of the byte array. - void setByteAt(uint index, uint8 b) - { - m_bitArray[index] = b; - } - - /// Count the number of bits set. - uint countSetBits() const - { - const uint num = m_bitArray.size(); - if( num == 0 ) { - return 0; - } - - uint count = 0; - for(uint i = 0; i < num - 1; i++) { - count += bitsSet(m_bitArray[i]); - } - count += bitsSet(m_bitArray[num-1], m_size & 0x7); - - //piDebugCheck(count + countClearBits() == m_size); - return count; - } - - /// Count the number of bits clear. - uint countClearBits() const { - - const uint num = m_bitArray.size(); - if( num == 0 ) { - return 0; - } - - uint count = 0; - for(uint i = 0; i < num - 1; i++) { - count += bitsSet(~m_bitArray[i]); - } - count += bitsSet(~m_bitArray[num-1], m_size & 0x7); - - //piDebugCheck(count + countSetBits() == m_size); - return count; - } - - friend void swap(BitArray & a, BitArray & b) - { - swap(a.m_size, b.m_size); - swap(a.m_bitArray, b.m_bitArray); - } - - -private: - - /// Number of bits stored. - uint m_size; - - /// Array of bits. - Array m_bitArray; - -}; - -} // nv namespace - -#endif // _PI_CORE_BITARRAY_H_ Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/CMakeLists.txt @@ -1,27 +1,24 @@ PROJECT(nvcore) -ADD_SUBDIRECTORY(poshlib) SET(CORE_SRCS - nvcore.h - Ptr.h - BitArray.h - Memory.h - Memory.cpp - Debug.h - Debug.cpp - Containers.h - StrLib.h - StrLib.cpp - Stream.h - StdStream.h - TextReader.h - TextReader.cpp - TextWriter.h - TextWriter.cpp - Radix.h - Radix.cpp - Library.h - Library.cpp) + nvcore.h + Array.h + Debug.h Debug.cpp + DefsGnucDarwin.h + DefsGnucLinux.h + DefsGnucWin32.h + DefsVcWin32.h + FileSystem.h FileSystem.cpp + ForEach.h + Memory.h Memory.cpp + Ptr.h + RefCounted.h + StrLib.h StrLib.cpp + Stream.h + StdStream.h + TextWriter.h TextWriter.cpp + Timer.h Timer.cpp + Utils.h) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) @@ -29,19 +26,24 @@ ADD_DEFINITIONS(-DNVCORE_EXPORTS) IF(UNIX) - SET(LIBS ${LIBS} ${CMAKE_DL_LIBS}) + SET(LIBS ${LIBS} ${CMAKE_DL_LIBS}) ENDIF(UNIX) IF(NVCORE_SHARED) - ADD_DEFINITIONS(-DNVCORE_SHARED=1) - ADD_LIBRARY(nvcore SHARED ${CORE_SRCS}) + ADD_DEFINITIONS(-DNVCORE_SHARED=1) + ADD_LIBRARY(nvcore SHARED ${CORE_SRCS}) ELSE(NVCORE_SHARED) - ADD_LIBRARY(nvcore ${CORE_SRCS}) + ADD_LIBRARY(nvcore ${CORE_SRCS}) ENDIF(NVCORE_SHARED) TARGET_LINK_LIBRARIES(nvcore ${LIBS}) +# On NetBSD and FreeBSD backtrace() is provided by libexecinfo, not libc. +if (CMAKE_SYSTEM_NAME MATCHES "NetBSD" OR CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + TARGET_LINK_LIBRARIES(nvcore execinfo) +endif() + INSTALL(TARGETS nvcore - RUNTIME DESTINATION ${BINDIR} - LIBRARY DESTINATION ${LIBDIR} - ARCHIVE DESTINATION ${LIBDIR}) + RUNTIME DESTINATION ${BINDIR} + LIBRARY DESTINATION ${LIBDIR} + ARCHIVE DESTINATION ${LIBDIR}) Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Containers.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Containers.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Containers.h @@ -1,1059 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_CORE_CONTAINER_H -#define NV_CORE_CONTAINER_H - -/* -These containers are based on Thatcher Ulrich containers, -donated to the Public Domain. - -I've also borrowed some ideas from the Qt toolkit, specially the cool -foreach iterator. - -TODO -Do not use memmove in insert & remove, use copy ctors instead. -*/ - - -// nvcore -#include "nvcore.h" -#include "Memory.h" -#include "Debug.h" - -#include // memmove -#include // for placement new - - -#if NV_CC_GNUC // If typeof is available: - -#define NV_FOREACH(i, container) \ - typedef typeof(container) NV_STRING_JOIN2(cont,__LINE__); \ - for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i)) -/* -#define NV_FOREACH(i, container) \ - for(typename typeof(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i)) -*/ - -#else // If typeof not available: - -struct PseudoIndexWrapper { - template - PseudoIndexWrapper(const T & container) { - nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory)); - new (memory) typename T::PseudoIndex(container.start()); - } - // PseudoIndex cannot have a dtor! - - template typename T::PseudoIndex & operator()(const T * container) { - return *reinterpret_cast(memory); - } - template const typename T::PseudoIndex & operator()(const T * container) const { - return *reinterpret_cast(memory); - } - - uint8 memory[4]; // Increase the size if we have bigger enumerators. -}; - -#define NV_FOREACH(i, container) \ - for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container)))) - -#endif - -// Declare foreach keyword. -#if !defined NV_NO_USE_KEYWORDS -# define foreach NV_FOREACH -#endif - - - -namespace nv -{ - // Templates - - /// Return the maximum of two values. - template - inline const T & max(const T & a, const T & b) - { - //return std::max(a, b); - if( a < b ) { - return b; - } - return a; - } - - /// Return the minimum of two values. - template - inline const T & min(const T & a, const T & b) - { - //return std::min(a, b); - if( b < a ) { - return b; - } - return a; - } - - /// Clamp between two values. - template - inline const T & clamp(const T & x, const T & a, const T & b) - { - return min(max(x, a), b); - } - - /// Swap two values. - template - inline void swap(T & a, T & b) - { - //return std::swap(a, b); - T temp = a; - a = b; - b = temp; - } - - template struct hash - { - inline uint sdbm_hash(const void * data_in, uint size, uint h = 5381) - { - const uint8 * data = (const uint8 *) data_in; - uint i = 0; - while (i < size) { - h = (h << 16) + (h << 6) - h + (uint) data[i++]; - } - return h; - } - - uint operator()(const Key & k) { - return sdbm_hash(&k, sizeof(Key)); - } - }; - template <> struct hash - { - uint operator()(int x) const { return x; } - }; - template <> struct hash - { - uint operator()(uint x) const { return x; } - }; - - /// Delete all the elements of a container. - template - void deleteAll(T & container) - { - for(typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i)) - { - delete container[i]; - } - } - - - /** Return the next power of two. - * @see http://graphics.stanford.edu/~seander/bithacks.html - * @warning Behaviour for 0 is undefined. - * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x - * @note nextPowerOfTwo(x) = 2 << log2(x-1) - */ - inline uint nextPowerOfTwo( uint x ) - { - nvDebugCheck( x != 0 ); - #if 1 // On modern CPUs this is as fast as using the bsr instruction. - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - return x+1; - #else - uint p = 1; - while( x > p ) { - p += p; - } - return p; - #endif - } - - /// Return true if @a n is a power of two. - inline bool isPowerOfTwo( uint n ) - { - return (n & (n-1)) == 0; - } - - /// Simple iterator interface. - template - struct Iterator - { - virtual void advance(); - virtual bool isDone(); - virtual T current(); - }; - - - /** - * Replacement for std::vector that is easier to debug and provides - * some nice foreach enumerators. - */ - template - class NVCORE_CLASS Array { - public: - - /// Ctor. - Array() : m_buffer(NULL), m_size(0), m_buffer_size(0) - { - } - - /// Copy ctor. - Array( const Array & a ) : m_buffer(NULL), m_size(0), m_buffer_size(0) - { - copy(a.m_buffer, a.m_size); - } - - /// Ctor that initializes the vector with the given elements. - Array( const T * ptr, int num ) : m_buffer(NULL), m_size(0), m_buffer_size(0) - { - copy(ptr, num); - } - - /// Allocate array. - explicit Array(uint capacity) : m_buffer(NULL), m_size(0), m_buffer_size(0) - { - allocate(capacity); - } - - - /// Dtor. - ~Array() - { - clear(); - allocate(0); - } - - - /// Const and save vector access. - const T & operator[]( uint index ) const - { - nvDebugCheck(index < m_size); - return m_buffer[index]; - } - - /// Safe vector access. - T & operator[] ( uint index ) - { - nvDebugCheck(index < m_size); - return m_buffer[index]; - } - - - /// Get vector size. - uint size() const { return m_size; } - - /// Get vector size. - uint count() const { return m_size; } - - /// Get const vector pointer. - const T * buffer() const { return m_buffer; } - - /// Get vector pointer. - T * unsecureBuffer() { return m_buffer; } - - /// Is vector empty. - bool isEmpty() const { return m_size == 0; } - - /// Is a null vector. - bool isNull() const { return m_buffer == NULL; } - - - /// Push an element at the end of the vector. - void push_back( const T & val ) - { - uint new_size = m_size + 1; - - if (new_size > m_buffer_size) - { - const T copy(val); // create a copy in case value is inside of this array. - resize(new_size); - m_buffer[new_size-1] = copy; - } - else - { - m_size = new_size; - new(m_buffer+new_size-1) T(val); - } - } - void pushBack( const T & val ) - { - push_back(val); - } - void append( const T & val ) - { - push_back(val); - } - - /// Qt like push operator. - Array & operator<< ( T & t ) - { - push_back(t); - return *this; - } - - /// Pop and return element at the end of the vector. - void pop_back() - { - nvDebugCheck( m_size > 0 ); - resize( m_size - 1 ); - } - void popBack() - { - pop_back(); - } - - /// Get back element. - const T & back() const - { - nvDebugCheck( m_size > 0 ); - return m_buffer[m_size-1]; - } - - /// Get back element. - T & back() - { - nvDebugCheck( m_size > 0 ); - return m_buffer[m_size-1]; - } - - /// Get front element. - const T & front() const - { - nvDebugCheck( m_size > 0 ); - return m_buffer[0]; - } - - /// Get front element. - T & front() - { - nvDebugCheck( m_size > 0 ); - return m_buffer[0]; - } - - /// Check if the given element is contained in the array. - bool contains(const T & e) const - { - for (uint i = 0; i < m_size; i++) { - if (m_buffer[i] == e) return true; - } - return false; - } - - /// Remove the element at the given index. This is an expensive operation! - void removeAt( uint index ) - { - nvCheck(index >= 0 && index < m_size); - - if( m_size == 1 ) { - clear(); - } - else { - m_buffer[index].~T(); - - memmove( m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index) ); - m_size--; - } - } - - /// Remove the first instance of the given element. - void remove(const T & element) - { - for(PseudoIndex i = start(); !isDone(i); advance(i)) { - removeAt(i); - break; - } - } - - /// Insert the given element at the given index shifting all the elements up. - void insertAt( uint index, const T & val = T() ) - { - nvCheck( index >= 0 && index <= m_size ); - - resize( m_size + 1 ); - - if( index < m_size - 1 ) { - memmove( m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index) ); - } - - // Copy-construct into the newly opened slot. - new(m_buffer+index) T(val); - } - - /// Append the given data to our vector. - void append(const Array & other) - { - append(other.m_buffer, other.m_size); - } - - /// Append the given data to our vector. - void append(const T other[], uint count) - { - if( count > 0 ) { - const uint old_size = m_size; - resize(m_size + count); - // Must use operator=() to copy elements, in case of side effects (e.g. ref-counting). - for( uint i = 0; i < count; i++ ) { - m_buffer[old_size + i] = other[i]; - } - } - } - - - /// Remove the given element by replacing it with the last one. - void replaceWithLast(uint index) - { - nvDebugCheck( index < m_size ); - m_buffer[index] = back(); - (m_buffer+m_size-1)->~T(); - m_size--; - } - - - /// Resize the vector preserving existing elements. - void resize(uint new_size) - { - uint i; - uint old_size = m_size; - m_size = new_size; - - // Destruct old elements (if we're shrinking). - for( i = new_size; i < old_size; i++ ) { - (m_buffer+i)->~T(); // Explicit call to the destructor - } - - if( m_size == 0 ) { - //Allocate(0); // Don't shrink automatically. - } - else if( m_size <= m_buffer_size/* && m_size > m_buffer_size >> 1*/) { - // don't compact yet. - nvDebugCheck(m_buffer != NULL); - } - else { - uint new_buffer_size; - if( m_buffer_size == 0 ) { - // first allocation - new_buffer_size = m_size; - } - else { - // growing - new_buffer_size = m_size + (m_size >> 2); - } - allocate( new_buffer_size ); - } - - // Call default constructors - for( i = old_size; i < new_size; i++ ) { - new(m_buffer+i) T; // placement new - } - } - - - /// Resize the vector preserving existing elements and initializing the - /// new ones with the given value. - void resize( uint new_size, const T &elem ) - { - uint i; - uint old_size = m_size; - m_size = new_size; - - // Destruct old elements (if we're shrinking). - for( i = new_size; i < old_size; i++ ) { - (m_buffer+i)->~T(); // Explicit call to the destructor - } - - if( m_size == 0 ) { - //Allocate(0); // Don't shrink automatically. - } - else if( m_size <= m_buffer_size && m_size > m_buffer_size >> 1 ) { - // don't compact yet. - } - else { - uint new_buffer_size; - if( m_buffer_size == 0 ) { - // first allocation - new_buffer_size = m_size; - } - else { - // growing - new_buffer_size = m_size + (m_size >> 2); - } - allocate( new_buffer_size ); - } - - // Call copy constructors - for( i = old_size; i < new_size; i++ ) { - new(m_buffer+i) T( elem ); // placement new - } - } - - /// Tighten the memory used by the container. - void tighten() - { - // TODO Reallocate only if worth. - } - - /// Clear the buffer. - void clear() - { - resize(0); - } - - /// Shrink the allocated vector. - void shrink() - { - if( m_size < m_buffer_size ) { - allocate(m_size); - } - } - - /// Preallocate space. - void reserve(uint desired_size) - { - if( desired_size > m_buffer_size ) { - allocate( desired_size ); - } - } - - /// Copy memory to our vector. Resizes the vector if needed. - void copy( const T * ptr, uint num ) - { - resize( num ); - for(uint i = 0; i < m_size; i++) { - m_buffer[i] = ptr[i]; - } - } - - /// Assignment operator. - void operator=( const Array & a ) - { - copy( a.m_buffer, a.m_size ); - } - - /* - /// Array serialization. - friend Stream & operator<< ( Stream & s, Array & p ) - { - if( s.isLoading() ) { - uint size; - s << size; - p.resize( size ); - } - else { - s << p.m_size; - } - - for( uint i = 0; i < p.m_size; i++ ) { - s << p.m_buffer[i]; - } - - return s; - } - */ - - // Array enumerator. - typedef uint PseudoIndex; - - PseudoIndex start() const { return 0; } - bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }; - void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; } - - #if NV_CC_MSVC - T & operator[]( const PseudoIndexWrapper & i ) { - return m_buffer[i(this)]; - } - const T & operator[]( const PseudoIndexWrapper & i ) const { - return m_buffer[i(this)]; - } - #endif - - - /// Swap the members of this vector and the given vector. - friend void swap(Array & a, Array & b) - { - swap(a.m_buffer, b.m_buffer); - swap(a.m_size, b.m_size); - swap(a.m_buffer_size, b.m_buffer_size); - } - - - private: - - /// Change buffer size. - void allocate( uint rsize ) - { - m_buffer_size = rsize; - - // free the buffer. - if( m_buffer_size == 0 ) { - if( m_buffer ) { - free( m_buffer ); - m_buffer = NULL; - } - } - - // realloc the buffer - else { - if( m_buffer ) m_buffer = (T *) realloc(m_buffer, sizeof(T) * m_buffer_size); - else m_buffer = (T *) ::malloc(sizeof(T) * m_buffer_size); - } - } - - - private: - T * m_buffer; - uint m_size; - uint m_buffer_size; - }; - - - - /** Thatcher Ulrich's hash table. - * - * Hash table, linear probing, internal chaining. One - * interesting/nice thing about this implementation is that the table - * itself is a flat chunk of memory containing no pointers, only - * relative indices. If the key and value types of the hash contain - * no pointers, then the hash can be serialized using raw IO. Could - * come in handy. - * - * Never shrinks, unless you explicitly clear() it. Expands on - * demand, though. For best results, if you know roughly how big your - * table will be, default it to that size when you create it. - */ - template > - class NVCORE_CLASS HashMap - { - NV_FORBID_COPY(HashMap) - public: - - /// Default ctor. - HashMap() : entry_count(0), size_mask(-1), table(NULL) { } - - /// Ctor with size hint. - explicit HashMap(int size_hint) : entry_count(0), size_mask(-1), table(NULL) { setCapacity(size_hint); } - - /// Dtor. - ~HashMap() { clear(); } - - - /// Set a new or existing value under the key, to the value. - void set(const T& key, const U& value) - { - int index = findIndex(key); - if (index >= 0) - { - E(index).value = value; - return; - } - - // Entry under key doesn't exist. - add(key, value); - } - - - /// Add a new value to the hash table, under the specified key. - void add(const T& key, const U& value) - { - nvCheck(findIndex(key) == -1); - - checkExpand(); - nvCheck(table != NULL); - entry_count++; - - const uint hash_value = hash_functor()(key); - const int index = hash_value & size_mask; - - Entry * natural_entry = &(E(index)); - - if (natural_entry->isEmpty()) - { - // Put the new entry in. - new (natural_entry) Entry(key, value, -1, hash_value); - } - else - { - // Find a blank spot. - int blank_index = index; - for (;;) - { - blank_index = (blank_index + 1) & size_mask; - if (E(blank_index).isEmpty()) break; // found it - } - Entry * blank_entry = &E(blank_index); - - if (int(natural_entry->hash_value & size_mask) == index) - { - // Collision. Link into this chain. - - // Move existing list head. - new (blank_entry) Entry(*natural_entry); // placement new, copy ctor - - // Put the new info in the natural entry. - natural_entry->key = key; - natural_entry->value = value; - natural_entry->next_in_chain = blank_index; - natural_entry->hash_value = hash_value; - } - else - { - // Existing entry does not naturally - // belong in this slot. Existing - // entry must be moved. - - // Find natural location of collided element (i.e. root of chain) - int collided_index = natural_entry->hash_value & size_mask; - for (;;) - { - Entry * e = &E(collided_index); - if (e->next_in_chain == index) - { - // Here's where we need to splice. - new (blank_entry) Entry(*natural_entry); - e->next_in_chain = blank_index; - break; - } - collided_index = e->next_in_chain; - nvCheck(collided_index >= 0 && collided_index <= size_mask); - } - - // Put the new data in the natural entry. - natural_entry->key = key; - natural_entry->value = value; - natural_entry->hash_value = hash_value; - natural_entry->next_in_chain = -1; - } - } - } - - - /// Remove the first value under the specified key. - bool remove(const T& key) - { - if (table == NULL) - { - return false; - } - - int index = findIndex(key); - if (index < 0) - { - return false; - } - - Entry * entry = &E(index); - - if( entry->isEndOfChain() ) { - entry->clear(); - } - else { - // Get next entry. - Entry & next_entry = E(entry->next_in_chain); - - // Copy next entry in this place. - new (entry) Entry(next_entry); - - next_entry.clear(); - } - - entry_count--; - - return true; - } - - - /// Remove all entries from the hash table. - void clear() - { - if (table != NULL) - { - // Delete the entries. - for (int i = 0, n = size_mask; i <= n; i++) - { - Entry * e = &E(i); - if (e->isEmpty() == false) - { - e->clear(); - } - } - free(table); - table = NULL; - entry_count = 0; - size_mask = -1; - } - } - - - /// Returns true if the hash is empty. - bool isEmpty() const - { - return table == NULL || entry_count == 0; - } - - - /** Retrieve the value under the given key. - * - * If there's no value under the key, then return false and leave - * *value alone. - * - * If there is a value, return true, and set *value to the entry's - * value. - * - * If value == NULL, return true or false according to the - * presence of the key, but don't touch *value. - */ - bool get(const T& key, U* value = NULL) const - { - int index = findIndex(key); - if (index >= 0) - { - if (value) { - *value = E(index).value; // take care with side-effects! - } - return true; - } - return false; - } - - /// Determine if the given key is contained in the hash. - bool contains(const T & key) const - { - return get(key); - } - - /// Number of entries in the hash. - int size() const - { - return entry_count; - } - - /// Number of entries in the hash. - int count() const - { - return size(); - } - - - /** - * Resize the hash table to fit one more entry. Often this - * doesn't involve any action. - */ - void checkExpand() - { - if (table == NULL) { - // Initial creation of table. Make a minimum-sized table. - setRawCapacity(16); - } - else if (entry_count * 3 > (size_mask + 1) * 2) { - // Table is more than 2/3rds full. Expand. - setRawCapacity(entry_count * 2); - } - } - - - /// Hint the bucket count to >= n. - void resize(int n) - { - // Not really sure what this means in relation to - // STLport's hash_map... they say they "increase the - // bucket count to at least n" -- but does that mean - // their real capacity after resize(n) is more like - // n*2 (since they do linked-list chaining within - // buckets?). - setCapacity(n); - } - - /** - * Size the hash so that it can comfortably contain the given - * number of elements. If the hash already contains more - * elements than new_size, then this may be a no-op. - */ - void setCapacity(int new_size) - { - int new_raw_size = (new_size * 3) / 2; - if (new_raw_size < size()) { return; } - - setRawCapacity(new_raw_size); - } - - /// Behaves much like std::pair. - struct Entry - { - int next_in_chain; // internal chaining for collisions - uint hash_value; // avoids recomputing. Worthwhile? - T key; - U value; - - Entry() : next_in_chain(-2) {} - Entry(const Entry& e) - : next_in_chain(e.next_in_chain), hash_value(e.hash_value), key(e.key), value(e.value) - { - } - Entry(const T& k, const U& v, int next, int hash) - : next_in_chain(next), hash_value(hash), key(k), value(v) - { - } - bool isEmpty() const { return next_in_chain == -2; } - bool isEndOfChain() const { return next_in_chain == -1; } - - void clear() - { - key.~T(); // placement delete - value.~U(); // placement delete - next_in_chain = -2; - } - }; - - - // HashMap enumerator. - typedef int PseudoIndex; - PseudoIndex start() const { PseudoIndex i = 0; findNext(i); return i; } - bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= size_mask+1); return i == size_mask+1; }; - void advance(PseudoIndex & i) const { nvDebugCheck(i <= size_mask+1); i++; findNext(i); } - - #if NV_CC_GNUC - Entry & operator[]( const PseudoIndex & i ) { - return E(i); - } - const Entry & operator[]( const PseudoIndex & i ) const { - return E(i); - } - #elif NV_CC_MSVC - Entry & operator[]( const PseudoIndexWrapper & i ) { - return E(i(this)); - } - const Entry & operator[]( const PseudoIndexWrapper & i ) const { - return E(i(this)); - } - #endif - - - - private: - - // Find the index of the matching entry. If no match, then return -1. - int findIndex(const T& key) const - { - if (table == NULL) return -1; - - uint hash_value = hash_functor()(key); - int index = hash_value & size_mask; - - const Entry * e = &E(index); - if (e->isEmpty()) return -1; - if (int(e->hash_value & size_mask) != index) return -1; // occupied by a collider - - for (;;) - { - nvCheck((e->hash_value & size_mask) == (hash_value & size_mask)); - - if (e->hash_value == hash_value && e->key == key) - { - // Found it. - return index; - } - nvDebugCheck(! (e->key == key)); // keys are equal, but hash differs! - - // Keep looking through the chain. - index = e->next_in_chain; - if (index == -1) break; // end of chain - - nvCheck(index >= 0 && index <= size_mask); - e = &E(index); - - nvCheck(e->isEmpty() == false); - } - return -1; - } - - // Helpers. - Entry & E(int index) - { - nvDebugCheck(table != NULL); - nvDebugCheck(index >= 0 && index <= size_mask); - return table[index]; - } - const Entry & E(int index) const - { - nvDebugCheck(table != NULL); - nvDebugCheck(index >= 0 && index <= size_mask); - return table[index]; - } - - - /** - * Resize the hash table to the given size (Rehash the - * contents of the current table). The arg is the number of - * hash table entries, not the number of elements we should - * actually contain (which will be less than this). - */ - void setRawCapacity(int new_size) - { - if (new_size <= 0) { - // Special case. - clear(); - return; - } - - // Force new_size to be a power of two. - new_size = nextPowerOfTwo(new_size); - - HashMap new_hash; - new_hash.table = (Entry *) ::malloc(sizeof(Entry) * new_size); - nvDebugCheck(new_hash.table != NULL); - - new_hash.entry_count = 0; - new_hash.size_mask = new_size - 1; - for (int i = 0; i < new_size; i++) - { - new_hash.E(i).next_in_chain = -2; // mark empty - } - - // Copy stuff to new_hash - if (table != NULL) - { - for (int i = 0, n = size_mask; i <= n; i++) - { - Entry * e = &E(i); - if (e->isEmpty() == false) - { - // Insert old entry into new hash. - new_hash.add(e->key, e->value); - e->clear(); // placement delete of old element - } - } - - // Delete our old data buffer. - free(table); - } - - // Steal new_hash's data. - entry_count = new_hash.entry_count; - size_mask = new_hash.size_mask; - table = new_hash.table; - new_hash.entry_count = 0; - new_hash.size_mask = -1; - new_hash.table = NULL; - } - - // Move the enumerator to the next valid element. - void findNext(PseudoIndex & i) const { - while (i <= size_mask && E(i).isEmpty()) { - i++; - } - } - - - int entry_count; - int size_mask; - Entry * table; - - }; - - - -} // nv namespace - -#endif // NV_CORE_CONTAINER_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h @@ -1,131 +1,217 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño +#pragma once #ifndef NV_CORE_DEBUG_H #define NV_CORE_DEBUG_H -#include +#include "nvcore.h" + +#include // va_list -#if defined(HAVE_STDARG_H) -# include // va_list -#endif -#define NV_ABORT_DEBUG 1 -#define NV_ABORT_IGNORE 2 -#define NV_ABORT_EXIT 3 +// Make sure we are using our assert. +#undef assert -#undef assert // avoid conflicts with assert method. +#define NV_ABORT_DEBUG 1 +#define NV_ABORT_IGNORE 2 +#define NV_ABORT_EXIT 3 #define nvNoAssert(exp) \ - do { \ - (void)sizeof(exp); \ - } while(0) + NV_MULTI_LINE_MACRO_BEGIN \ + (void)sizeof(exp); \ + NV_MULTI_LINE_MACRO_END #if NV_NO_ASSERT -# define nvAssert(exp) nvNoAssert(exp) -# define nvCheck(exp) nvNoAssert(exp) -# define nvDebugAssert(exp) nvNoAssert(exp) -# define nvDebugCheck(exp) nvNoAssert(exp) -# define nvDebugBreak() nvNoAssert(0) +# define nvAssert(exp) nvNoAssert(exp) +# define nvCheck(exp) nvNoAssert(exp) +# define nvDebugAssert(exp) nvNoAssert(exp) +# define nvDebugCheck(exp) nvNoAssert(exp) +# define nvDebugBreak() nvNoAssert(0) #else // NV_NO_ASSERT -# if NV_CC_MSVC - // @@ Does this work in msvc-6 and earlier? - // @@ Do I have to include ? -# define nvDebugBreak() __debugbreak() - // define nvDebugBreak() __asm int 3 -# elif NV_CC_GNUC && NV_CPU_PPC && NV_OS_DARWIN -# define nvDebugBreak() __asm__ volatile ("trap"); -# elif NV_CC_GNUC && NV_CPU_X86 && NV_OS_DARWIN -# define nvDebugBreak() __asm__ volatile ("int3"); -# elif NV_CC_GNUC && NV_CPU_X86 -# define nvDebugBreak() __asm__ ( "int %0" : :"I"(3) ) -# else -# include -# define nvDebugBreak() raise(SIGTRAP); - // define nvDebugBreak() *((int *)(0)) = 0 -# endif - -# define nvAssertMacro(exp) \ - do { \ - if(!(exp)) { \ - if( nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG ) { \ - nvDebugBreak(); \ - } \ - } \ - } while(false) - -# define nvAssert(exp) nvAssertMacro(exp) -# define nvCheck(exp) nvAssertMacro(exp) - -# if defined(_DEBUG) -# define nvDebugAssert(exp) nvAssertMacro(exp) -# define nvDebugCheck(exp) nvAssertMacro(exp) -# else // _DEBUG -# define nvDebugAssert(exp) nvNoAssert(exp) -# define nvDebugCheck(exp) nvNoAssert(exp) -# endif // _DEBUG +# if NV_CC_MSVC + // @@ Does this work in msvc-6 and earlier? +# define nvDebugBreak() __debugbreak() +//# define nvDebugBreak() __asm { int 3 } +# elif NV_OS_ORBIS +# define nvDebugBreak() __debugbreak() +# elif NV_CC_GNUC +# define nvDebugBreak() __builtin_trap() +# else +# error "No nvDebugBreak()!" +# endif + +/* +# elif NV_CC_GNUC || NV_CPU_PPC && NV_OS_DARWIN + // @@ Use __builtin_trap() on GCC +# define nvDebugBreak() __asm__ volatile ("trap") +# elif (NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64) && NV_OS_DARWIN +# define nvDebugBreak() __asm__ volatile ("int3") +# elif NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64 +# define nvDebugBreak() __asm__ ( "int %0" : :"I"(3) ) +# else +# include +# define nvDebugBreak() raise(SIGTRAP) +# endif +*/ + +#define nvDebugBreakOnce() \ + NV_MULTI_LINE_MACRO_BEGIN \ + static bool firstTime = true; \ + if (firstTime) { firstTime = false; nvDebugBreak(); } \ + NV_MULTI_LINE_MACRO_END + +#define nvAssertMacro(exp) \ + NV_MULTI_LINE_MACRO_BEGIN \ + if (!(exp)) { \ + if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \ + nvDebugBreak(); \ + } \ + } \ + NV_MULTI_LINE_MACRO_END + +// GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care +#define nvAssertMacroWithIgnoreAll(exp,...) \ + NV_MULTI_LINE_MACRO_BEGIN \ + static bool ignoreAll = false; \ + if (!ignoreAll && !(exp)) { \ + int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \ + if (result == NV_ABORT_DEBUG) { \ + nvDebugBreak(); \ + } else if (result == NV_ABORT_IGNORE) { \ + ignoreAll = true; \ + } \ + } \ + NV_MULTI_LINE_MACRO_END + +// Interesting assert macro from Insomniac: +// http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to +// Used as follows: +// if (nvCheck(i < count)) { +// normal path +// } else { +// fixup code. +// } +// This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely. +#define nvCheckMacro(exp) \ + (\ + (exp) ? true : ( \ + (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \ + ) \ + ) + + +#define nvAssert(exp) nvAssertMacro(exp) +#define nvCheck(exp) nvAssertMacro(exp) + +#if defined(_DEBUG) +# define nvDebugAssert(exp) nvAssertMacro(exp) +# define nvDebugCheck(exp) nvAssertMacro(exp) +#else // _DEBUG +# define nvDebugAssert(exp) nvNoAssert(exp) +# define nvDebugCheck(exp) nvNoAssert(exp) +#endif // _DEBUG #endif // NV_NO_ASSERT // Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc. +/*#if !defined(_DEBUG) +# if NV_CC_MSVC +# define nvAssume(exp) __assume(exp) +# else +# define nvAssume(exp) nvCheck(exp) +# endif +#else +# define nvAssume(exp) nvCheck(exp) +#endif*/ + #if defined(_DEBUG) -# if NV_CC_MSVC -# define nvAssume(exp) __assume(exp) -# else -# define nvAssume(exp) nvCheck(exp) -# endif +# if NV_CC_MSVC +# define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0) +# else +# define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable() +# endif #else -# define nvAssume(exp) nvCheck(exp) +# if NV_CC_MSVC +# define nvUnreachable() __assume(0) +# else +# define nvUnreachable() __builtin_unreachable() +# endif #endif -#define nvError(x) nvAbort(x, __FILE__, __LINE__, __FUNC__) -#define nvWarning(x) nvDebug("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x)) +#define nvError(x) nvAbort(x, __FILE__, __LINE__, __FUNC__) +#define nvWarning(x) nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x)) +#ifndef NV_DEBUG_PRINT +#define NV_DEBUG_PRINT 1 //defined(_DEBUG) +#endif -#if PI_CC_MSVC -// @@ I'm not sure it's a good idea to use the default static assert. -# define nvStaticCheck(x) _STATIC_ASSERT(x) +#if NV_DEBUG_PRINT +#define nvDebug(...) nvDebugPrint(__VA_ARGS__) +#else +#if NV_CC_MSVC +#define nvDebug(...) __noop(__VA_ARGS__) #else -# define nvStaticCheck(x) typedef char NV_DO_STRING_JOIN2(__static_assert_,__LINE__)[(x)] -// define nvStaticCheck(x) switch(0) { case 0: case x:; } +#define nvDebug(...) ((void)0) // Non-msvc platforms do not evaluate arguments? #endif +#endif + -NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = 0); -NVCORE_API void NV_CDECL nvDebug( const char *msg, ... ) __attribute__((format (printf, 1, 2))); +NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6))); +NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2))); namespace nv { - /** Message handler interface. */ - struct MessageHandler { - virtual void log(const char * str, va_list arg) = 0; - virtual ~MessageHandler() {} - }; - - /** Assert handler interface. */ - struct AssertHandler { - virtual int assert(const char *exp, const char *file, int line, const char *func = 0) = 0; - virtual ~AssertHandler() {} - }; - - - namespace debug - { - NVCORE_API void dumpInfo(); - - // These functions are not thread safe. - NVCORE_API void setMessageHandler( MessageHandler * messageHandler ); - NVCORE_API void resetMessageHandler(); - - NVCORE_API void setAssertHandler( AssertHandler * assertHanlder ); - NVCORE_API void resetAssertHandler(); - - NVCORE_API void enableSigHandler(); - NVCORE_API void disableSigHandler(); - } + inline bool isValidPtr(const void * ptr) { + #if NV_CPU_X86_64 || POSH_CPU_PPC64 + if (ptr == NULL) return true; + if (reinterpret_cast(ptr) < 0x10000ULL) return false; + if (reinterpret_cast(ptr) >= 0x000007FFFFFEFFFFULL) return false; + #else + if (reinterpret_cast(ptr) == 0xcccccccc) return false; + if (reinterpret_cast(ptr) == 0xcdcdcdcd) return false; + if (reinterpret_cast(ptr) == 0xdddddddd) return false; + if (reinterpret_cast(ptr) == 0xffffffff) return false; + #endif + return true; + } + + // Message handler interface. + struct MessageHandler { + virtual void log(const char * str, va_list arg) = 0; + virtual ~MessageHandler() {} + }; + + // Assert handler interface. + struct AssertHandler { + virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0; + virtual ~AssertHandler() {} + }; + + + namespace debug + { + NVCORE_API void dumpInfo(); + NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 ); + + NVCORE_API void setMessageHandler( MessageHandler * messageHandler ); + NVCORE_API void resetMessageHandler(); + + NVCORE_API void setAssertHandler( AssertHandler * assertHanlder ); + NVCORE_API void resetAssertHandler(); + + NVCORE_API void enableSigHandler(bool interactive); + NVCORE_API void disableSigHandler(); + + NVCORE_API bool isDebuggerPresent(); + NVCORE_API bool attachToDebugger(); + + NVCORE_API void terminate(int code); + } } // nv namespace -#endif // NV_CORE_DEBUG_H +#endif // NV_CORE_DEBUG_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp @@ -1,489 +1,1030 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño -#include -#include +#include "Debug.h" +#include "Array.inl" +#include "StrLib.h" // StringBuilder + +#include "StdStream.h" // fileOpen + +#include // Extern #if NV_OS_WIN32 //&& NV_CC_MSVC -# define WIN32_LEAN_AND_MEAN -# define VC_EXTRALEAN -# include -# include -# if NV_CC_MSVC -# include -# if _MSC_VER < 1300 -# define DECLSPEC_DEPRECATED - // VC6: change this path to your Platform SDK headers -# include // must be XP version of file -// include "M:\\dev7\\vs\\devtools\\common\\win32sdk\\include\\dbghelp.h" -# else - // VC7: ships with updated headers -# include -# endif -# endif +# define WIN32_LEAN_AND_MEAN +# define VC_EXTRALEAN +# include +# include +# if NV_CC_MSVC +# include +# if _MSC_VER < 1300 +# define DECLSPEC_DEPRECATED +// VC6: change this path to your Platform SDK headers +# include // must be XP version of file +// include "M:\\dev7\\vs\\devtools\\common\\win32sdk\\include\\dbghelp.h" +# else +// VC7: ships with updated headers +# include +# endif +# endif +# pragma comment(lib,"dbghelp.lib") #endif +#if NV_OS_XBOX +# include +# ifdef _DEBUG +# include +# endif //_DEBUG +#endif //NV_OS_XBOX + #if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) -# include +# include #endif -#if NV_OS_LINUX || NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD -# include // getpid +#if NV_OS_UNIX +# include // getpid #endif #if NV_OS_LINUX && defined(HAVE_EXECINFO_H) -# include // backtrace -# if NV_CC_GNUC // defined(HAVE_CXXABI_H) -# include -# endif -#endif - -#if NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD -# include -# include -# include // sysctl -# if !NV_OS_OPENBSD -# include -# endif -# undef HAVE_EXECINFO_H -# if defined(HAVE_EXECINFO_H) // only after OSX 10.5 -# include // backtrace -# if NV_CC_GNUC // defined(HAVE_CXXABI_H) -# include -# endif -# endif +# include // backtrace +# if NV_CC_GNUC // defined(HAVE_CXXABI_H) +# include +# endif #endif -#include // std::runtime_error -#undef assert // defined on mingw +#if NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD +# include +# include +# include // sysctl +# if !defined(NV_OS_OPENBSD) +# include +# endif +# if defined(HAVE_EXECINFO_H) // only after OSX 10.5 +# include // backtrace +# if NV_CC_GNUC // defined(HAVE_CXXABI_H) +# include +# endif +# endif +#endif + +#if NV_OS_ORBIS +#include +#endif + +#define NV_USE_SEPARATE_THREAD 1 + using namespace nv; namespace { - static MessageHandler * s_message_handler = NULL; - static AssertHandler * s_assert_handler = NULL; - - static bool s_sig_handler_enabled = false; + static MessageHandler * s_message_handler = NULL; + static AssertHandler * s_assert_handler = NULL; + + static bool s_sig_handler_enabled = false; + static bool s_interactive = true; #if NV_OS_WIN32 && NV_CC_MSVC - // Old exception filter. - static LPTOP_LEVEL_EXCEPTION_FILTER s_old_exception_filter = NULL; + // Old exception filter. + static LPTOP_LEVEL_EXCEPTION_FILTER s_old_exception_filter = NULL; #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) - // Old signal handlers. - struct sigaction s_old_sigsegv; - struct sigaction s_old_sigtrap; - struct sigaction s_old_sigfpe; - struct sigaction s_old_sigbus; - + // Old signal handlers. + struct sigaction s_old_sigsegv; + struct sigaction s_old_sigtrap; + struct sigaction s_old_sigfpe; + struct sigaction s_old_sigbus; + #endif #if NV_OS_WIN32 && NV_CC_MSVC - // TODO write minidump - - static LONG WINAPI nvTopLevelFilter( struct _EXCEPTION_POINTERS * pExceptionInfo) - { - NV_UNUSED(pExceptionInfo); - /* BOOL (WINAPI * Dump) (HANDLE, DWORD, HANDLE, MINIDUMP_TYPE, PMINIDUMP_EXCEPTION_INFORMATION, PMINIDUMP_USER_STREAM_INFORMATION, PMINIDUMP_CALLBACK_INFORMATION ); - - AutoString dbghelp_path(512); - getcwd(dbghelp_path, 512); - dbghelp_path.Append("\\DbgHelp.dll"); - nvTranslatePath(dbghelp_path); - - PiLibrary DbgHelp_lib(dbghelp_path, true); - - if( !DbgHelp_lib.IsValid() ) { - nvDebug("*** 'DbgHelp.dll' not found.\n"); - return EXCEPTION_CONTINUE_SEARCH; - } - - if( !DbgHelp_lib.BindSymbol( (void **)&Dump, "MiniDumpWriteDump" ) ) { - nvDebug("*** 'DbgHelp.dll' too old.\n"); - return EXCEPTION_CONTINUE_SEARCH; - } - - // create the file - HANDLE hFile = ::CreateFile( "nv.dmp", GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ); - if( hFile == INVALID_HANDLE_VALUE ) { - nvDebug("*** Failed to create dump file.\n"); - return EXCEPTION_CONTINUE_SEARCH; - } - - - _MINIDUMP_EXCEPTION_INFORMATION ExInfo; - - ExInfo.ThreadId = ::GetCurrentThreadId(); - ExInfo.ExceptionPointers = pExceptionInfo; - ExInfo.ClientPointers = NULL; - - // write the dump - bool ok = Dump( GetCurrentProcess(), GetCurrentProcessId(), hFile, MiniDumpNormal, &ExInfo, NULL, NULL )!=0; - ::CloseHandle(hFile); - - if( !ok ) { - nvDebug("*** Failed to save dump file.\n"); - return EXCEPTION_CONTINUE_SEARCH; - } - - nvDebug("--- Dump file saved.\n"); - */ - return EXCEPTION_CONTINUE_SEARCH; - } + // We should try to simplify the top level filter as much as possible. + // http://www.nynaeve.net/?p=128 + +#if NV_USE_SEPARATE_THREAD + + // The critical section enforcing the requirement that only one exception be + // handled by a handler at a time. + static CRITICAL_SECTION s_handler_critical_section; + + // Semaphores used to move exception handling between the exception thread + // and the handler thread. handler_start_semaphore_ is signalled by the + // exception thread to wake up the handler thread when an exception occurs. + // handler_finish_semaphore_ is signalled by the handler thread to wake up + // the exception thread when handling is complete. + static HANDLE s_handler_start_semaphore = NULL; + static HANDLE s_handler_finish_semaphore = NULL; + + // The exception handler thread. + static HANDLE s_handler_thread = NULL; + + static DWORD s_requesting_thread_id = 0; + static EXCEPTION_POINTERS * s_exception_info = NULL; + +#endif // NV_USE_SEPARATE_THREAD + + + struct MinidumpCallbackContext { + ULONG64 memory_base; + ULONG memory_size; + bool finished; + }; + + // static + static BOOL CALLBACK miniDumpWriteDumpCallback(PVOID context, const PMINIDUMP_CALLBACK_INPUT callback_input, PMINIDUMP_CALLBACK_OUTPUT callback_output) + { + switch (callback_input->CallbackType) + { + case MemoryCallback: { + MinidumpCallbackContext* callback_context = reinterpret_cast(context); + if (callback_context->finished) + return FALSE; + + // Include the specified memory region. + callback_output->MemoryBase = callback_context->memory_base; + callback_output->MemorySize = callback_context->memory_size; + callback_context->finished = true; + return TRUE; + } + + // Include all modules. + case IncludeModuleCallback: + case ModuleCallback: + return TRUE; + + // Include all threads. + case IncludeThreadCallback: + case ThreadCallback: + return TRUE; + + // Stop receiving cancel callbacks. + case CancelCallback: + callback_output->CheckCancel = FALSE; + callback_output->Cancel = FALSE; + return TRUE; + } + + // Ignore other callback types. + return FALSE; + } + + static bool writeMiniDump(EXCEPTION_POINTERS * pExceptionInfo) + { + // create the file + HANDLE hFile = CreateFileA("crash.dmp", GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + //nvDebug("*** Failed to create dump file.\n"); + return false; + } + + MINIDUMP_EXCEPTION_INFORMATION * pExInfo = NULL; + MINIDUMP_CALLBACK_INFORMATION * pCallback = NULL; + + if (pExceptionInfo != NULL) { + MINIDUMP_EXCEPTION_INFORMATION ExInfo; + ExInfo.ThreadId = ::GetCurrentThreadId(); + ExInfo.ExceptionPointers = pExceptionInfo; + ExInfo.ClientPointers = NULL; + pExInfo = &ExInfo; + + MINIDUMP_CALLBACK_INFORMATION callback; + MinidumpCallbackContext context; + + // Find a memory region of 256 bytes centered on the + // faulting instruction pointer. + const ULONG64 instruction_pointer = + #if defined(_M_IX86) + pExceptionInfo->ContextRecord->Eip; + #elif defined(_M_AMD64) + pExceptionInfo->ContextRecord->Rip; + #else + #error Unsupported platform + #endif + + MEMORY_BASIC_INFORMATION info; + + if (VirtualQuery(reinterpret_cast(instruction_pointer), &info, sizeof(MEMORY_BASIC_INFORMATION)) != 0 && info.State == MEM_COMMIT) + { + // Attempt to get 128 bytes before and after the instruction + // pointer, but settle for whatever's available up to the + // boundaries of the memory region. + const ULONG64 kIPMemorySize = 256; + context.memory_base = max(reinterpret_cast(info.BaseAddress), instruction_pointer - (kIPMemorySize / 2)); + ULONG64 end_of_range = min(instruction_pointer + (kIPMemorySize / 2), reinterpret_cast(info.BaseAddress) + info.RegionSize); + context.memory_size = static_cast(end_of_range - context.memory_base); + context.finished = false; + + callback.CallbackRoutine = miniDumpWriteDumpCallback; + callback.CallbackParam = reinterpret_cast(&context); + pCallback = &callback; + } + } + + MINIDUMP_TYPE miniDumpType = (MINIDUMP_TYPE)(MiniDumpNormal|MiniDumpWithHandleData|MiniDumpWithThreadInfo); + + // write the dump + BOOL ok = MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, miniDumpType, pExInfo, NULL, pCallback) != 0; + CloseHandle(hFile); + + if (ok == FALSE) { + //nvDebug("*** Failed to save dump file.\n"); + return false; + } + + //nvDebug("\nDump file saved.\n"); + + return true; + } + +#if NV_USE_SEPARATE_THREAD + + static DWORD WINAPI ExceptionHandlerThreadMain(void* lpParameter) { + nvDebugCheck(s_handler_start_semaphore != NULL); + nvDebugCheck(s_handler_finish_semaphore != NULL); + + while (true) { + if (WaitForSingleObject(s_handler_start_semaphore, INFINITE) == WAIT_OBJECT_0) { + writeMiniDump(s_exception_info); + + // Allow the requesting thread to proceed. + ReleaseSemaphore(s_handler_finish_semaphore, 1, NULL); + } + } + + // This statement is not reached when the thread is unconditionally + // terminated by the ExceptionHandler destructor. + return 0; + } + +#endif // NV_USE_SEPARATE_THREAD + + static bool hasStackTrace() { + return true; + } + + /*static NV_NOINLINE int backtrace(void * trace[], int maxcount) { + + // In Windows XP and Windows Server 2003, the sum of the FramesToSkip and FramesToCapture parameters must be less than 63. + int xp_maxcount = min(63-1, maxcount); + + int count = RtlCaptureStackBackTrace(1, xp_maxcount, trace, NULL); + nvDebugCheck(count <= maxcount); + + return count; + }*/ + + static NV_NOINLINE int backtraceWithSymbols(CONTEXT * ctx, void * trace[], int maxcount, int skip = 0) { + + // Init the stack frame for this function + STACKFRAME64 stackFrame = { 0 }; + + #if NV_CPU_X86_64 + DWORD dwMachineType = IMAGE_FILE_MACHINE_AMD64; + stackFrame.AddrPC.Offset = ctx->Rip; + stackFrame.AddrFrame.Offset = ctx->Rbp; + stackFrame.AddrStack.Offset = ctx->Rsp; + #elif NV_CPU_X86 + DWORD dwMachineType = IMAGE_FILE_MACHINE_I386; + stackFrame.AddrPC.Offset = ctx->Eip; + stackFrame.AddrFrame.Offset = ctx->Ebp; + stackFrame.AddrStack.Offset = ctx->Esp; + #else + #error "Platform not supported!" + #endif + stackFrame.AddrPC.Mode = AddrModeFlat; + stackFrame.AddrFrame.Mode = AddrModeFlat; + stackFrame.AddrStack.Mode = AddrModeFlat; + + // Walk up the stack + const HANDLE hThread = GetCurrentThread(); + const HANDLE hProcess = GetCurrentProcess(); + int i; + for (i = 0; i < maxcount; i++) + { + // walking once first makes us skip self + if (!StackWalk64(dwMachineType, hProcess, hThread, &stackFrame, ctx, NULL, &SymFunctionTableAccess64, &SymGetModuleBase64, NULL)) { + break; + } + + /*if (stackFrame.AddrPC.Offset == stackFrame.AddrReturn.Offset || stackFrame.AddrPC.Offset == 0) { + break; + }*/ + + if (i >= skip) { + trace[i - skip] = (PVOID)stackFrame.AddrPC.Offset; + } + } + + return i - skip; + } + +#pragma warning(push) +#pragma warning(disable:4748) + static NV_NOINLINE int backtrace(void * trace[], int maxcount) { + CONTEXT ctx = { 0 }; +#if NV_CPU_X86 && !NV_CPU_X86_64 + ctx.ContextFlags = CONTEXT_CONTROL; + _asm { + call x + x: pop eax + mov ctx.Eip, eax + mov ctx.Ebp, ebp + mov ctx.Esp, esp + } +#else + RtlCaptureContext(&ctx); // Not implemented correctly in x86. +#endif + + return backtraceWithSymbols(&ctx, trace, maxcount, 1); + } +#pragma warning(pop) + + static NV_NOINLINE void writeStackTrace(void * trace[], int size, int start, Array & lines) + { + StringBuilder builder(512); + + HANDLE hProcess = GetCurrentProcess(); + + // Resolve PC to function names + for (int i = start; i < size; i++) + { + // Check for end of stack walk + DWORD64 ip = (DWORD64)trace[i]; + if (ip == NULL) + break; + + // Get function name + #define MAX_STRING_LEN (512) + unsigned char byBuffer[sizeof(IMAGEHLP_SYMBOL64) + MAX_STRING_LEN] = { 0 }; + IMAGEHLP_SYMBOL64 * pSymbol = (IMAGEHLP_SYMBOL64*)byBuffer; + pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64); + pSymbol->MaxNameLength = MAX_STRING_LEN; + + DWORD64 dwDisplacement; + + if (SymGetSymFromAddr64(hProcess, ip, &dwDisplacement, pSymbol)) + { + pSymbol->Name[MAX_STRING_LEN-1] = 0; + + /* + // Make the symbol readable for humans + UnDecorateSymbolName( pSym->Name, lpszNonUnicodeUnDSymbol, BUFFERSIZE, + UNDNAME_COMPLETE | + UNDNAME_NO_THISTYPE | + UNDNAME_NO_SPECIAL_SYMS | + UNDNAME_NO_MEMBER_TYPE | + UNDNAME_NO_MS_KEYWORDS | + UNDNAME_NO_ACCESS_SPECIFIERS ); + */ + + // pSymbol->Name + const char * pFunc = pSymbol->Name; + + // Get file/line number + IMAGEHLP_LINE64 theLine = { 0 }; + theLine.SizeOfStruct = sizeof(theLine); + + DWORD dwDisplacement; + if (!SymGetLineFromAddr64(hProcess, ip, &dwDisplacement, &theLine)) + { + // Do not print unknown symbols anymore. + break; + //builder.format("unknown(%08X) : %s\n", (uint32)ip, pFunc); + } + else + { + /* + const char* pFile = strrchr(theLine.FileName, '\\'); + if ( pFile == NULL ) pFile = theLine.FileName; + else pFile++; + */ + const char * pFile = theLine.FileName; + + int line = theLine.LineNumber; + + builder.format("%s(%d) : %s\n", pFile, line, pFunc); + } + + lines.append(builder.release()); + + if (pFunc != NULL && strcmp(pFunc, "WinMain") == 0) { + break; + } + } + } + } + + + // Write mini dump and print stack trace. + static LONG WINAPI handleException(EXCEPTION_POINTERS * pExceptionInfo) + { + EnterCriticalSection(&s_handler_critical_section); +#if NV_USE_SEPARATE_THREAD + s_requesting_thread_id = GetCurrentThreadId(); + s_exception_info = pExceptionInfo; + + // This causes the handler thread to call writeMiniDump. + ReleaseSemaphore(s_handler_start_semaphore, 1, NULL); + + // Wait until WriteMinidumpWithException is done and collect its return value. + WaitForSingleObject(s_handler_finish_semaphore, INFINITE); + //bool status = s_handler_return_value; + + // Clean up. + s_requesting_thread_id = 0; + s_exception_info = NULL; +#else + // First of all, write mini dump. + writeMiniDump(pExceptionInfo); +#endif + LeaveCriticalSection(&s_handler_critical_section); + + nvDebug("\nDump file saved.\n"); + + // Try to attach to debugger. + if (s_interactive && debug::attachToDebugger()) { + nvDebugBreak(); + return EXCEPTION_CONTINUE_EXECUTION; + } + + // If that fails, then try to pretty print a stack trace and terminate. + void * trace[64]; + + int size = backtraceWithSymbols(pExceptionInfo->ContextRecord, trace, 64); + + // @@ Use win32's CreateFile? + FILE * fp = fileOpen("crash.txt", "wb"); + if (fp != NULL) { + Array lines; + writeStackTrace(trace, size, 0, lines); + + for (uint i = 0; i < lines.count(); i++) { + fputs(lines[i], fp); + delete lines[i]; + } + + // @@ Add more info to crash.txt? + + fclose(fp); + } + + // This should terminate the process and set the error exit code. + TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 2); + + return EXCEPTION_EXECUTE_HANDLER; // Terminate app. In case terminate process did not succeed. + } + + static void handlePureVirtualCall() { + nvDebugBreak(); + TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8); + } + + static void handleInvalidParameter(const wchar_t * wexpresion, const wchar_t * wfunction, const wchar_t * wfile, unsigned int line, uintptr_t reserved) { + + size_t convertedCharCount = 0; + + StringBuilder expresion; + if (wexpresion != NULL) { + uint size = U32(wcslen(wexpresion) + 1); + expresion.reserve(size); + wcstombs_s(&convertedCharCount, expresion.str(), size, wexpresion, _TRUNCATE); + } + + StringBuilder file; + if (wfile != NULL) { + uint size = U32(wcslen(wfile) + 1); + file.reserve(size); + wcstombs_s(&convertedCharCount, file.str(), size, wfile, _TRUNCATE); + } + + StringBuilder function; + if (wfunction != NULL) { + uint size = U32(wcslen(wfunction) + 1); + function.reserve(size); + wcstombs_s(&convertedCharCount, function.str(), size, wfunction, _TRUNCATE); + } + + int result = nvAbort(expresion.str(), file.str(), line, function.str()); + if (result == NV_ABORT_DEBUG) { + nvDebugBreak(); + } + } #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN -#if defined(HAVE_EXECINFO_H) // NV_OS_LINUX +#if defined(HAVE_EXECINFO_H) + + static bool hasStackTrace() { + return true; + } + + + static void writeStackTrace(void * trace[], int size, int start, Array & lines) { + StringBuilder builder(512); + char ** string_array = backtrace_symbols(trace, size); + + for(int i = start; i < size-1; i++ ) { +# if NV_CC_GNUC // defined(HAVE_CXXABI_H) + // @@ Write a better parser for the possible formats. + char * begin = strchr(string_array[i], '('); + char * end = strrchr(string_array[i], '+'); + char * module = string_array[i]; + + if (begin == 0 && end != 0) { + *(end - 1) = '\0'; + begin = strrchr(string_array[i], ' '); + module = NULL; // Ignore module. + } + + if (begin != 0 && begin < end) { + int stat; + *end = '\0'; + *begin = '\0'; + char * name = abi::__cxa_demangle(begin+1, 0, 0, &stat); + if (module == NULL) { + if (name == NULL || stat != 0) { + builder.format(" In: '%s'\n", begin+1); + } + else { + builder.format(" In: '%s'\n", name); + } + } + else { + if (name == NULL || stat != 0) { + builder.format(" In: [%s] '%s'\n", module, begin+1); + } + else { + builder.format(" In: [%s] '%s'\n", module, name); + } + } + free(name); + } + else { + builder.format(" In: '%s'\n", string_array[i]); + } +# else + builder.format(" In: '%s'\n", string_array[i]); +# endif + lines.append(builder.release()); + } + + free(string_array); + } + + static void printStackTrace(void * trace[], int size, int start=0) { + nvDebug( "\nDumping stacktrace:\n" ); + + Array lines; + writeStackTrace(trace, size, 1, lines); + + for (uint i = 0; i < lines.count(); i++) { + nvDebug("%s", lines[i]); + delete lines[i]; + } - static bool nvHasStackTrace() { + nvDebug("\n"); + } + +#endif // defined(HAVE_EXECINFO_H) + + static void * callerAddress(void * secret) + { #if NV_OS_DARWIN - return backtrace != NULL; +# if defined(_STRUCT_MCONTEXT) +# if NV_CPU_PPC + ucontext_t * ucp = (ucontext_t *)secret; + return (void *) ucp->uc_mcontext->__ss.__srr0; +# elif NV_CPU_X86_64 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *) ucp->uc_mcontext->__ss.__rip; +# elif NV_CPU_X86 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *) ucp->uc_mcontext->__ss.__eip; +# elif NV_CPU_ARM + ucontext_t * ucp = (ucontext_t *)secret; + return (void *) ucp->uc_mcontext->__ss.__pc; +# else +# error "Unknown CPU" +# endif +# else +# if NV_CPU_PPC + ucontext_t * ucp = (ucontext_t *)secret; + return (void *) ucp->uc_mcontext->ss.srr0; +# elif NV_CPU_X86 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *) ucp->uc_mcontext->ss.eip; +# else +# error "Unknown CPU" +# endif +# endif +#elif NV_OS_FREEBSD +# if NV_CPU_X86_64 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *)ucp->uc_mcontext.mc_rip; +# elif NV_CPU_X86 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *)ucp->uc_mcontext.mc_eip; +# else +# error "Unknown CPU" +# endif +#elif NV_OS_NETBSD +# if NV_CPU_X86_64 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *)ucp->uc_mcontext.__gregs[_REG_RIP]; +# elif NV_CPU_X86 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *)ucp->uc_mcontext.__gregs[_REG_EIP]; +# elif NV_CPU_PPC + ucontext_t * ucp = (ucontext_t *)secret; + return (void *) ucp->uc_mcontext.__gregs[_REG_PC]; +# else +# error "Unknown CPU" +# endif +#elif NV_OS_OPENBSD +# if NV_CPU_X86_64 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *)ucp->sc_rip; +# elif NV_CPU_X86 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *)ucp->sc_eip; +# else +# error "Unknown CPU" +# endif #else - return true; +# if NV_CPU_X86_64 + // #define REG_RIP REG_INDEX(rip) // seems to be 16 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *)ucp->uc_mcontext.gregs[REG_RIP]; +# elif NV_CPU_X86 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/]; +# elif NV_CPU_PPC + ucontext_t * ucp = (ucontext_t *)secret; + return (void *) ucp->uc_mcontext.regs->nip; +# elif NV_CPU_AARCH64 + ucontext_t * ucp = (ucontext_t *)secret; + return (void *) ucp->uc_mcontext.pc; +# else +# error "Unknown CPU" +# endif #endif - } - static void nvPrintStackTrace(void * trace[], int size, int start=0) { - char ** string_array = backtrace_symbols(trace, size); - - nvDebug( "\nDumping stacktrace:\n" ); - for(int i = start; i < size-1; i++ ) { -# if NV_CC_GNUC // defined(HAVE_CXXABI_H) - char * begin = strchr(string_array[i], '('); - char * end = strchr(string_array[i], '+'); - if( begin != 0 && begin < end ) { - int stat; - *end = '\0'; - *begin = '\0'; - char * module = string_array[i]; - char * name = abi::__cxa_demangle(begin+1, 0, 0, &stat); - if( name == NULL || begin[1] != '_' || begin[2] != 'Z' ) { - nvDebug( " In: [%s] '%s'\n", module, begin+1 ); - } - else { - nvDebug( " In: [%s] '%s'\n", module, name ); - } - free(name); - } - else { - nvDebug( " In: '%s'\n", string_array[i] ); - } -# else - nvDebug( " In: '%s'\n", string_array[i] ); -# endif - } - nvDebug("\n"); - - free(string_array); - } + // How to obtain the instruction pointers in different platforms, from mlton's source code. + // http://mlton.org/ + // OpenBSD + // ucp->sc_eip + // FreeBSD: + // ucp->uc_mcontext.mc_eip + // HPUX: + // ucp->uc_link + // Solaris: + // ucp->uc_mcontext.gregs[REG_PC] + // Linux hppa: + // uc->uc_mcontext.sc_iaoq[0] & ~0x3UL + // Linux sparc: + // ((struct sigcontext*) secret)->sigc_regs.tpc + // Linux sparc64: + // ((struct sigcontext*) secret)->si_regs.pc + + // potentially correct for other archs: + // Linux alpha: ucp->m_context.sc_pc + // Linux arm: ucp->m_context.ctx.arm_pc + // Linux ia64: ucp->m_context.sc_ip & ~0x3UL + // Linux mips: ucp->m_context.sc_pc + // Linux s390: ucp->m_context.sregs->regs.psw.addr + } + + static void nvSigHandler(int sig, siginfo_t *info, void *secret) + { + void * pnt = callerAddress(secret); + + // Do something useful with siginfo_t + if (sig == SIGSEGV) { + if (pnt != NULL) nvDebug("Got signal %d, faulty address is %p, from %p\n", sig, info->si_addr, pnt); + else nvDebug("Got signal %d, faulty address is %p\n", sig, info->si_addr); + } + else if(sig == SIGTRAP) { + nvDebug("Breakpoint hit.\n"); + } + else { + nvDebug("Got signal %d\n", sig); + } + +#if defined(HAVE_EXECINFO_H) + if (hasStackTrace()) // in case of weak linking + { + void * trace[64]; + int size = backtrace(trace, 64); + + if (pnt != NULL) { + // Overwrite sigaction with caller's address. + trace[1] = pnt; + } + printStackTrace(trace, size, 1); + } #endif // defined(HAVE_EXECINFO_H) - static void * callerAddress(void * secret) - { -# if NV_OS_DARWIN -# if defined(_STRUCT_MCONTEXT) -# if NV_CPU_PPC - ucontext_t * ucp = (ucontext_t *)secret; - return (void *) ucp->uc_mcontext->__ss.__srr0; -# elif NV_CPU_X86 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *) ucp->uc_mcontext->__ss.__eip; -# endif -# else -# if NV_CPU_PPC - ucontext_t * ucp = (ucontext_t *)secret; - return (void *) ucp->uc_mcontext->ss.srr0; -# elif NV_CPU_X86 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *) ucp->uc_mcontext->ss.eip; -# endif -# endif -# elif NV_OS_FREEBSD -# if NV_CPU_X86_64 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *)ucp->uc_mcontext.mc_rip; -# elif NV_CPU_X86 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *)ucp->uc_mcontext.mc_eip; -# endif -# elif NV_OS_OPENBSD -# if NV_CPU_X86_64 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *)ucp->sc_rip; -# elif NV_CPU_X86 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *)ucp->sc_eip; -# endif -# else -# if NV_CPU_X86_64 - // #define REG_RIP REG_INDEX(rip) // seems to be 16 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *)ucp->uc_mcontext.gregs[REG_RIP]; -# elif NV_CPU_X86 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/]; -# elif NV_CPU_PPC - ucontext_t * ucp = (ucontext_t *)secret; - return (void *) ucp->uc_mcontext.regs->nip; -# elif NV_CPU_AARCH64 - ucontext_t * ucp = (ucontext_t *)secret; - return (void *) ucp->uc_mcontext.pc; -# endif -# endif - - // How to obtain the instruction pointers in different platforms, from mlton's source code. - // http://mlton.org/ - // OpenBSD && NetBSD - // ucp->sc_eip - // FreeBSD: - // ucp->uc_mcontext.mc_eip - // HPUX: - // ucp->uc_link - // Solaris: - // ucp->uc_mcontext.gregs[REG_PC] - // Linux hppa: - // uc->uc_mcontext.sc_iaoq[0] & ~0x3UL - // Linux sparc: - // ((struct sigcontext*) secret)->sigc_regs.tpc - // Linux sparc64: - // ((struct sigcontext*) secret)->si_regs.pc - - // potentially correct for other archs: - // Linux alpha: ucp->m_context.sc_pc - // Linux arm: ucp->m_context.ctx.arm_pc - // Linux ia64: ucp->m_context.sc_ip & ~0x3UL - // Linux mips: ucp->m_context.sc_pc - // Linux s390: ucp->m_context.sregs->regs.psw.addr - } - - static void nvSigHandler(int sig, siginfo_t *info, void *secret) - { - void * pnt = callerAddress(secret); - - // Do something useful with siginfo_t - if (sig == SIGSEGV) { - if (pnt != NULL) nvDebug("Got signal %d, faulty address is %p, from %p\n", sig, info->si_addr, pnt); - else nvDebug("Got signal %d, faulty address is %p\n", sig, info->si_addr); - } - else if(sig == SIGTRAP) { - nvDebug("Breakpoint hit.\n"); - } - else { - nvDebug("Got signal %d\n", sig); - } - -# if defined(HAVE_EXECINFO_H) - if (nvHasStackTrace()) // in case of weak linking - { - void * trace[64]; - int size = backtrace(trace, 64); - - if (pnt != NULL) { - // Overwrite sigaction with caller's address. - trace[1] = pnt; - } - - nvPrintStackTrace(trace, size, 1); - } -# endif // defined(HAVE_EXECINFO_H) - - exit(0); - } + exit(0); + } #endif // defined(HAVE_SIGNAL_H) #if NV_OS_WIN32 //&& NV_CC_MSVC - - /** Win32 asset handler. */ - struct Win32AssertHandler : public AssertHandler - { - // Code from Daniel Vogel. - static bool isDebuggerPresent() - { - bool result = false; - - HINSTANCE kern_lib = LoadLibraryExA( "kernel32.dll", NULL, 0 ); - if( kern_lib ) { - FARPROC lIsDebuggerPresent = GetProcAddress( kern_lib, "IsDebuggerPresent" ); - if( lIsDebuggerPresent && lIsDebuggerPresent() ) { - result = true; - } - - FreeLibrary( kern_lib ); - } - return result; - } - - // Flush the message queue. This is necessary for the message box to show up. - static void flushMessageQueue() - { - MSG msg; - while( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) { - if( msg.message == WM_QUIT ) break; - TranslateMessage( &msg ); - DispatchMessage( &msg ); - } - } - - // Assert handler method. - virtual int assert( const char * exp, const char * file, int line, const char * func/*=NULL*/ ) - { - int ret = NV_ABORT_EXIT; - - StringBuilder error_string; - if( func != NULL ) { - error_string.format( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); - nvDebug( error_string ); - } - else { - error_string.format( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); - nvDebug( error_string ); - } - - #if _DEBUG - - if( isDebuggerPresent() ) { - return NV_ABORT_DEBUG; - } - - flushMessageQueue(); - int action = MessageBoxA(NULL, error_string, "Assertion failed", MB_ABORTRETRYIGNORE|MB_ICONERROR); - switch( action ) { - case IDRETRY: - ret = NV_ABORT_DEBUG; - break; - case IDIGNORE: - ret = NV_ABORT_IGNORE; - break; - case IDABORT: - default: - ret = NV_ABORT_EXIT; - break; - } - /*if( _CrtDbgReport( _CRT_ASSERT, file, line, module, exp ) == 1 ) { - return NV_ABORT_DEBUG; - }*/ - - #endif - - if( ret == NV_ABORT_EXIT ) { - // Exit cleanly. - throw std::runtime_error("Assertion failed"); - } - - return ret; - } - }; - + + /** Win32 assert handler. */ + struct Win32AssertHandler : public AssertHandler + { + // Flush the message queue. This is necessary for the message box to show up. + static void flushMessageQueue() + { + MSG msg; + while( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) { + //if( msg.message == WM_QUIT ) break; + TranslateMessage( &msg ); + DispatchMessage( &msg ); + } + } + + // Assert handler method. + virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) + { + int ret = NV_ABORT_EXIT; + + StringBuilder error_string; + error_string.format("*** Assertion failed: %s\n On file: %s\n On line: %d\n", exp, file, line ); + if (func != NULL) { + error_string.appendFormat(" On function: %s\n", func); + } + if (msg != NULL) { + error_string.append(" Message: "); + va_list tmp; + va_copy(tmp, arg); + error_string.appendFormatList(msg, tmp); + va_end(tmp); + error_string.append("\n"); + } + nvDebug( error_string.str() ); + + // Print stack trace: + debug::dumpInfo(); + + if (debug::isDebuggerPresent()) { + return NV_ABORT_DEBUG; + } + + if (s_interactive) { + flushMessageQueue(); + int action = MessageBoxA(NULL, error_string.str(), "Assertion failed", MB_ABORTRETRYIGNORE | MB_ICONERROR | MB_TOPMOST); + switch( action ) { + case IDRETRY: + ret = NV_ABORT_DEBUG; + break; + case IDIGNORE: + ret = NV_ABORT_IGNORE; + break; + case IDABORT: + default: + ret = NV_ABORT_EXIT; + break; + } + /*if( _CrtDbgReport( _CRT_ASSERT, file, line, module, exp ) == 1 ) { + return NV_ABORT_DEBUG; + }*/ + } + + if (ret == NV_ABORT_EXIT) { + // Exit cleanly. + exit(EXIT_FAILURE + 1); + } + + return ret; + } + }; +#elif NV_OS_XBOX + + /** Xbox360 assert handler. */ + struct Xbox360AssertHandler : public AssertHandler + { + // Assert handler method. + virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) + { + int ret = NV_ABORT_EXIT; + + StringBuilder error_string; + if( func != NULL ) { + error_string.format( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); + nvDebug( error_string.str() ); + } + else { + error_string.format( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); + nvDebug( error_string.str() ); + } + + if (debug::isDebuggerPresent()) { + return NV_ABORT_DEBUG; + } + + if( ret == NV_ABORT_EXIT ) { + // Exit cleanly. + exit(EXIT_FAILURE + 1); + } + + return ret; + } + }; +#elif NV_OS_ORBIS + + /** Orbis assert handler. */ + struct OrbisAssertHandler : public AssertHandler + { + // Assert handler method. + virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) + { + if( func != NULL ) { + nvDebug( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); + } + else { + nvDebug( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); + } + + //SBtodoORBIS print stack trace + /*if (hasStackTrace()) + { + void * trace[64]; + int size = backtrace(trace, 64); + printStackTrace(trace, size, 2); + }*/ + + if (debug::isDebuggerPresent()) + return NV_ABORT_DEBUG; + + return NV_ABORT_IGNORE; + } + }; + #else - - /** Unix asset handler. */ - struct UnixAssertHandler : public AssertHandler - { - bool isDebuggerPresent() - { -# if NV_OS_DARWIN - int mib[4]; - struct kinfo_proc info; - size_t size; - mib[0] = CTL_KERN; - mib[1] = KERN_PROC; - mib[2] = KERN_PROC_PID; - mib[3] = getpid(); - size = sizeof(info); - info.kp_proc.p_flag = 0; - sysctl(mib,4,&info,&size,NULL,0); - return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED); -# else - // if ppid != sid, some process spawned our app, probably a debugger. - return getsid(getpid()) != getppid(); -# endif - } - - // Assert handler method. - virtual int assert(const char * exp, const char * file, int line, const char * func) - { - if( func != NULL ) { - nvDebug( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); - } - else { - nvDebug( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); - } - -# if _DEBUG - if( isDebuggerPresent() ) { - return NV_ABORT_DEBUG; - } -# endif - -# if defined(HAVE_EXECINFO_H) - if (nvHasStackTrace()) - { - void * trace[64]; - int size = backtrace(trace, 64); - nvPrintStackTrace(trace, size, 2); - } -# endif - - // Exit cleanly. - throw std::runtime_error("Assertion failed"); - } - }; - + + /** Unix assert handler. */ + struct UnixAssertHandler : public AssertHandler + { + // Assert handler method. + virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) + { + int ret = NV_ABORT_EXIT; + + if( func != NULL ) { + nvDebug( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); + } + else { + nvDebug( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); + } + +#if _DEBUG + if (debug::isDebuggerPresent()) { + return NV_ABORT_DEBUG; + } +#endif + +#if defined(HAVE_EXECINFO_H) + if (hasStackTrace()) + { + void * trace[64]; + int size = backtrace(trace, 64); + printStackTrace(trace, size, 2); + } +#endif + + if( ret == NV_ABORT_EXIT ) { + // Exit cleanly. + exit(EXIT_FAILURE + 1); + } + + return ret; + } + }; + #endif } // namespace -/// Handle assertion through the asset handler. -int nvAbort(const char * exp, const char * file, int line, const char * func) +/// Handle assertion through the assert handler. +int nvAbort(const char * exp, const char * file, int line, const char * func/*=NULL*/, const char * msg/*= NULL*/, ...) { #if NV_OS_WIN32 //&& NV_CC_MSVC - static Win32AssertHandler s_default_assert_handler; + static Win32AssertHandler s_default_assert_handler; +#elif NV_OS_XBOX + static Xbox360AssertHandler s_default_assert_handler; +#elif NV_OS_ORBIS + static OrbisAssertHandler s_default_assert_handler; #else - static UnixAssertHandler s_default_assert_handler; + static UnixAssertHandler s_default_assert_handler; +#endif + + va_list arg; + va_start(arg,msg); + + AssertHandler * handler = s_assert_handler != NULL ? s_assert_handler : &s_default_assert_handler; + int result = handler->assertion(exp, file, line, func, msg, arg); + + va_end(arg); + + return result; +} + +// Abnormal termination. Create mini dump and output call stack. +void debug::terminate(int code) +{ +#if NV_OS_WIN32 + EnterCriticalSection(&s_handler_critical_section); + + writeMiniDump(NULL); + + const int max_stack_size = 64; + void * trace[max_stack_size]; + int size = backtrace(trace, max_stack_size); + + // @@ Use win32's CreateFile? + FILE * fp = fileOpen("crash.txt", "wb"); + if (fp != NULL) { + Array lines; + writeStackTrace(trace, size, 0, lines); + + for (uint i = 0; i < lines.count(); i++) { + fputs(lines[i], fp); + delete lines[i]; + } + + // @@ Add more info to crash.txt? + + fclose(fp); + } + + LeaveCriticalSection(&s_handler_critical_section); #endif - - if( s_assert_handler != NULL ) { - return s_assert_handler->assert( exp, file, line, func ); - } - else { - return s_default_assert_handler.assert( exp, file, line, func ); - } + + exit(code); } /// Shows a message through the message handler. -void NV_CDECL nvDebug(const char *msg, ...) +void NV_CDECL nvDebugPrint(const char *msg, ...) { - va_list arg; - va_start(arg,msg); - if( s_message_handler != NULL ) { - s_message_handler->log( msg, arg ); - } - va_end(arg); + va_list arg; + va_start(arg,msg); + if (s_message_handler != NULL) { + s_message_handler->log( msg, arg ); + } + va_end(arg); } /// Dump debug info. void debug::dumpInfo() { -#if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H) - if (nvHasStackTrace()) - { - void * trace[64]; - int size = backtrace(trace, 64); - nvPrintStackTrace(trace, size, 1); - } +#if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)) + if (hasStackTrace()) + { + void * trace[64]; + int size = backtrace(trace, 64); + + nvDebug( "\nDumping stacktrace:\n" ); + + Array lines; + writeStackTrace(trace, size, 1, lines); + + for (uint i = 0; i < lines.count(); i++) { + nvDebug("%s", lines[i]); + delete lines[i]; + } + } +#endif +} + +/// Dump callstack using the specified handler. +void debug::dumpCallstack(MessageHandler *messageHandler, int callstackLevelsToSkip /*= 0*/) +{ +#if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)) + if (hasStackTrace()) + { + void * trace[64]; + int size = backtrace(trace, 64); + + Array lines; + writeStackTrace(trace, size, callstackLevelsToSkip + 1, lines); // + 1 to skip the call to dumpCallstack + + for (uint i = 0; i < lines.count(); i++) { + messageHandler->log(lines[i], NULL); + delete lines[i]; + } + } #endif } @@ -491,72 +1032,239 @@ /// Set the debug message handler. void debug::setMessageHandler(MessageHandler * message_handler) { - s_message_handler = message_handler; + s_message_handler = message_handler; } /// Reset the debug message handler. void debug::resetMessageHandler() { - s_message_handler = NULL; + s_message_handler = NULL; } /// Set the assert handler. void debug::setAssertHandler(AssertHandler * assert_handler) { - s_assert_handler = assert_handler; + s_assert_handler = assert_handler; } /// Reset the assert handler. void debug::resetAssertHandler() { - s_assert_handler = NULL; + s_assert_handler = NULL; +} + +#if NV_OS_WIN32 +#if NV_USE_SEPARATE_THREAD + +static void initHandlerThread() +{ + static const int kExceptionHandlerThreadInitialStackSize = 64 * 1024; + + // Set synchronization primitives and the handler thread. Each + // ExceptionHandler object gets its own handler thread because that's the + // only way to reliably guarantee sufficient stack space in an exception, + // and it allows an easy way to get a snapshot of the requesting thread's + // context outside of an exception. + InitializeCriticalSection(&s_handler_critical_section); + + s_handler_start_semaphore = CreateSemaphore(NULL, 0, 1, NULL); + nvDebugCheck(s_handler_start_semaphore != NULL); + + s_handler_finish_semaphore = CreateSemaphore(NULL, 0, 1, NULL); + nvDebugCheck(s_handler_finish_semaphore != NULL); + + // Don't attempt to create the thread if we could not create the semaphores. + if (s_handler_finish_semaphore != NULL && s_handler_start_semaphore != NULL) { + DWORD thread_id; + s_handler_thread = CreateThread(NULL, // lpThreadAttributes + kExceptionHandlerThreadInitialStackSize, + ExceptionHandlerThreadMain, + NULL, // lpParameter + 0, // dwCreationFlags + &thread_id); + nvDebugCheck(s_handler_thread != NULL); + } + + /* @@ We should avoid loading modules in the exception handler! + dbghelp_module_ = LoadLibrary(L"dbghelp.dll"); + if (dbghelp_module_) { + minidump_write_dump_ = reinterpret_cast(GetProcAddress(dbghelp_module_, "MiniDumpWriteDump")); + } + */ +} + +static void shutHandlerThread() { + // @@ Free stuff. Terminate thread. } +#endif // NV_USE_SEPARATE_THREAD +#endif // NV_OS_WIN32 + -/// Enable signal handler. -void debug::enableSigHandler() +// Enable signal handler. +void debug::enableSigHandler(bool interactive) { - nvCheck(s_sig_handler_enabled != true); - s_sig_handler_enabled = true; - + nvCheck(s_sig_handler_enabled != true); + s_sig_handler_enabled = true; + s_interactive = interactive; + #if NV_OS_WIN32 && NV_CC_MSVC - - s_old_exception_filter = ::SetUnhandledExceptionFilter( nvTopLevelFilter ); - + if (interactive) { + // Do not display message boxes on error. + // http://msdn.microsoft.com/en-us/library/windows/desktop/ms680621(v=vs.85).aspx + SetErrorMode(SEM_FAILCRITICALERRORS|SEM_NOGPFAULTERRORBOX|SEM_NOOPENFILEERRORBOX); + + // CRT reports errors to debug output only. + // http://msdn.microsoft.com/en-us/library/1y71x448(v=vs.80).aspx + _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_DEBUG); + _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_DEBUG); + _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_DEBUG); + } + + +#if NV_USE_SEPARATE_THREAD + initHandlerThread(); +#endif + + s_old_exception_filter = ::SetUnhandledExceptionFilter( handleException ); + +#if _MSC_VER >= 1400 // MSVC 2005/8 + _set_invalid_parameter_handler(handleInvalidParameter); +#endif // _MSC_VER >= 1400 + + _set_purecall_handler(handlePureVirtualCall); + + + // SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces + SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_FAIL_CRITICAL_ERRORS|SYMOPT_LOAD_LINES|SYMOPT_UNDNAME); + + if (!SymInitialize(GetCurrentProcess(), NULL, TRUE)) { + DWORD error = GetLastError(); + nvDebug("SymInitialize returned error : %d\n", error); + } + #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) - - // Install our signal handler - struct sigaction sa; - sa.sa_sigaction = nvSigHandler; - sigemptyset (&sa.sa_mask); - sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO; - - sigaction(SIGSEGV, &sa, &s_old_sigsegv); - sigaction(SIGTRAP, &sa, &s_old_sigtrap); - sigaction(SIGFPE, &sa, &s_old_sigfpe); - sigaction(SIGBUS, &sa, &s_old_sigbus); - + + // Install our signal handler + struct sigaction sa; + sa.sa_sigaction = nvSigHandler; + sigemptyset (&sa.sa_mask); + sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO; + + sigaction(SIGSEGV, &sa, &s_old_sigsegv); + sigaction(SIGTRAP, &sa, &s_old_sigtrap); + sigaction(SIGFPE, &sa, &s_old_sigfpe); + sigaction(SIGBUS, &sa, &s_old_sigbus); + #endif } /// Disable signal handler. void debug::disableSigHandler() { - nvCheck(s_sig_handler_enabled == true); - s_sig_handler_enabled = false; + nvCheck(s_sig_handler_enabled == true); + s_sig_handler_enabled = false; #if NV_OS_WIN32 && NV_CC_MSVC - ::SetUnhandledExceptionFilter( s_old_exception_filter ); - s_old_exception_filter = NULL; + ::SetUnhandledExceptionFilter( s_old_exception_filter ); + s_old_exception_filter = NULL; + + SymCleanup(GetCurrentProcess()); #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) - - sigaction(SIGSEGV, &s_old_sigsegv, NULL); - sigaction(SIGTRAP, &s_old_sigtrap, NULL); - sigaction(SIGFPE, &s_old_sigfpe, NULL); - sigaction(SIGBUS, &s_old_sigbus, NULL); - + + sigaction(SIGSEGV, &s_old_sigsegv, NULL); + sigaction(SIGTRAP, &s_old_sigtrap, NULL); + sigaction(SIGFPE, &s_old_sigfpe, NULL); + sigaction(SIGBUS, &s_old_sigbus, NULL); + #endif } + +bool debug::isDebuggerPresent() +{ +#if NV_OS_WIN32 + HINSTANCE kernel32 = GetModuleHandleA("kernel32.dll"); + if (kernel32) { + FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent"); + if (IsDebuggerPresent != NULL && IsDebuggerPresent()) { + return true; + } + } + return false; +#elif NV_OS_XBOX +#ifdef _DEBUG + return DmIsDebuggerPresent() == TRUE; +#else + return false; +#endif +#elif NV_OS_ORBIS + #if PS4_FINAL_REQUIREMENTS + return false; + #else + return sceDbgIsDebuggerAttached() == 1; + #endif +#elif NV_OS_DARWIN + int mib[4]; + struct kinfo_proc info; + size_t size; + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = getpid(); + size = sizeof(info); + info.kp_proc.p_flag = 0; + sysctl(mib,4,&info,&size,NULL,0); + return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED); +#else + // if ppid != sid, some process spawned our app, probably a debugger. + return getsid(getpid()) != getppid(); +#endif +} + +bool debug::attachToDebugger() +{ +#if NV_OS_WIN32 + if (isDebuggerPresent() == FALSE) { + Path process(1024); + process.copy("\""); + GetSystemDirectoryA(process.str() + 1, 1024 - 1); + + process.appendSeparator(); + + process.appendFormat("VSJitDebugger.exe\" -p %lu", ::GetCurrentProcessId()); + + STARTUPINFOA sSi; + memset(&sSi, 0, sizeof(sSi)); + + PROCESS_INFORMATION sPi; + memset(&sPi, 0, sizeof(sPi)); + + BOOL b = CreateProcessA(NULL, process.str(), NULL, NULL, FALSE, 0, NULL, NULL, &sSi, &sPi); + if (b != FALSE) { + ::WaitForSingleObject(sPi.hProcess, INFINITE); + + DWORD dwExitCode; + ::GetExitCodeProcess(sPi.hProcess, &dwExitCode); + if (dwExitCode != 0) //if exit code is zero, a debugger was selected + b = FALSE; + } + + if (sPi.hThread != NULL) ::CloseHandle(sPi.hThread); + if (sPi.hProcess != NULL) ::CloseHandle(sPi.hProcess); + + if (b == FALSE) + return false; + + for (int i = 0; i < 5*60; i++) { + if (isDebuggerPresent()) + break; + ::Sleep(200); + } + } +#endif // NV_OS_WIN32 + + return true; +} Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsGnucDarwin.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsGnucDarwin.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsGnucDarwin.h @@ -2,7 +2,8 @@ #error "Do not include this file directly." #endif -//#include // uint8_t, int8_t, ... +#include // uint8_t, int8_t, ... uintptr_t +#include // operator new, size_t, NULL // Function linkage #define DLL_IMPORT @@ -24,8 +25,9 @@ #endif #define NV_FASTCALL __attribute__((fastcall)) -#define NV_FORCEINLINE __attribute__((always_inline)) +#define NV_FORCEINLINE __attribute__((always_inline)) inline #define NV_DEPRECATED __attribute__((deprecated)) +#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX #if __GNUC__ > 2 #define NV_PURE __attribute__((pure)) @@ -35,6 +37,8 @@ #define NV_CONST #endif +#define NV_NOINLINE __attribute__((noinline)) + // Define __FUNC__ properly. #if __STDC_VERSION__ < 199901L # if __GNUC__ >= 2 @@ -47,21 +51,3 @@ #endif #define restrict __restrict__ - -/* -// Type definitions -typedef uint8_t uint8; -typedef int8_t int8; - -typedef uint16_t uint16; -typedef int16_t int16; - -typedef uint32_t uint32; -typedef int32_t int32; - -typedef uint64_t uint64; -typedef int64_t int64; - -// Aliases -typedef uint32 uint; -*/ Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsGnucLinux.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsGnucLinux.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsGnucLinux.h @@ -2,29 +2,38 @@ #error "Do not include this file directly." #endif +#include // uint8_t, int8_t, ... uintptr_t +#include // operator new, size_t, NULL + // Function linkage #define DLL_IMPORT #if __GNUC__ >= 4 -# define DLL_EXPORT __attribute__((visibility("default"))) -# define DLL_EXPORT_CLASS DLL_EXPORT +# define DLL_EXPORT __attribute__((visibility("default"))) +# define DLL_EXPORT_CLASS DLL_EXPORT #else -# define DLL_EXPORT -# define DLL_EXPORT_CLASS +# define DLL_EXPORT +# define DLL_EXPORT_CLASS #endif // Function calling modes #if NV_CPU_X86 -# define NV_CDECL __attribute__((cdecl)) -# define NV_STDCALL __attribute__((stdcall)) +# define NV_CDECL __attribute__((cdecl)) +# define NV_STDCALL __attribute__((stdcall)) #else -# define NV_CDECL -# define NV_STDCALL +# define NV_CDECL +# define NV_STDCALL #endif #define NV_FASTCALL __attribute__((fastcall)) -#define NV_FORCEINLINE __attribute__((always_inline)) +//#if __GNUC__ > 3 +// It seems that GCC does not assume always_inline implies inline. I think this depends on the GCC version :( +#define NV_FORCEINLINE inline __attribute__((always_inline)) +//#else +// Some compilers complain that inline and always_inline are redundant. +//#define NV_FORCEINLINE __attribute__((always_inline)) +//#endif #define NV_DEPRECATED __attribute__((deprecated)) - +#define NV_THREAD_LOCAL __thread #if __GNUC__ > 2 #define NV_PURE __attribute__((pure)) @@ -34,33 +43,17 @@ #define NV_CONST #endif +#define NV_NOINLINE __attribute__((noinline)) + // Define __FUNC__ properly. #if __STDC_VERSION__ < 199901L -# if __GNUC__ >= 2 -# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__ -# else -# define __FUNC__ "" -# endif +# if __GNUC__ >= 2 +# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__ +# else +# define __FUNC__ "" +# endif #else -# define __FUNC__ __PRETTY_FUNCTION__ +# define __FUNC__ __PRETTY_FUNCTION__ #endif #define restrict __restrict__ - -/* -// Type definitions -typedef unsigned char uint8; -typedef signed char int8; - -typedef unsigned short uint16; -typedef signed short int16; - -typedef unsigned int uint32; -typedef signed int int32; - -typedef unsigned long long uint64; -typedef signed long long int64; - -// Aliases -typedef uint32 uint; -*/ Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsGnucWin32.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsGnucWin32.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsGnucWin32.h @@ -2,6 +2,8 @@ #error "Do not include this file directly." #endif +//#include // size_t, NULL + // Function linkage #define DLL_IMPORT __declspec(dllimport) #define DLL_EXPORT __declspec(dllexport) @@ -28,6 +30,8 @@ #define NV_CONST #endif +#define NV_NOINLINE __attribute__((noinline)) + // Define __FUNC__ properly. #if __STDC_VERSION__ < 199901L # if __GNUC__ >= 2 @@ -58,3 +62,4 @@ // Aliases typedef uint32 uint; */ + Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsVcWin32.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsVcWin32.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/DefsVcWin32.h @@ -1,3 +1,5 @@ +// This code is in the public domain -- Ignacio Castaño + #ifndef NV_CORE_H #error "Do not include this file directly." #endif @@ -11,22 +13,28 @@ #define NV_CDECL __cdecl #define NV_STDCALL __stdcall #define NV_FASTCALL __fastcall -#define NV_FORCEINLINE __forceinline #define NV_DEPRECATED #define NV_PURE #define NV_CONST // Set standard function names. -#define snprintf _snprintf +#if _MSC_VER < 1900 +# define snprintf _snprintf +#endif #if _MSC_VER < 1500 -# define vsnprintf _vsnprintf +# define vsnprintf _vsnprintf +#endif +#if _MSC_VER < 1700 +# define strtoll _strtoi64 +# define strtoull _strtoui64 #endif -#define vsscanf _vsscanf #define chdir _chdir #define getcwd _getcwd -#define va_copy(a, b) a = b +#if _MSC_VER < 1800 // Not sure what version introduced this. +#define va_copy(a, b) (a) = (b) +#endif #if !defined restrict #define restrict @@ -39,6 +47,13 @@ #define __FUNC__ __FUNCTION__ #endif +#define NV_NOINLINE __declspec(noinline) +#define NV_FORCEINLINE __forceinline + +#define NV_THREAD_LOCAL __declspec(thread) + +#include + /* // Type definitions typedef unsigned char uint8; @@ -59,20 +74,23 @@ // Unwanted VC++ warnings to disable. /* -#pragma warning(disable : 4244) // conversion to float, possible loss of data -#pragma warning(disable : 4245) // conversion from 'enum ' to 'unsigned long', signed/unsigned mismatch -#pragma warning(disable : 4100) // unreferenced formal parameter -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4710) // inline function not expanded -#pragma warning(disable : 4127) // Conditional expression is constant -#pragma warning(disable : 4305) // truncation from 'const double' to 'float' -#pragma warning(disable : 4505) // unreferenced local function has been removed - -#pragma warning(disable : 4702) // unreachable code in inline expanded function -#pragma warning(disable : 4711) // function selected for automatic inlining -#pragma warning(disable : 4725) // Pentium fdiv bug +#pragma warning(disable : 4244) // conversion to float, possible loss of data +#pragma warning(disable : 4245) // conversion from 'enum ' to 'unsigned long', signed/unsigned mismatch +#pragma warning(disable : 4100) // unreferenced formal parameter +#pragma warning(disable : 4514) // unreferenced inline function has been removed +#pragma warning(disable : 4710) // inline function not expanded +#pragma warning(disable : 4127) // Conditional expression is constant +#pragma warning(disable : 4305) // truncation from 'const double' to 'float' +#pragma warning(disable : 4505) // unreferenced local function has been removed + +#pragma warning(disable : 4702) // unreachable code in inline expanded function +#pragma warning(disable : 4711) // function selected for automatic inlining +#pragma warning(disable : 4725) // Pentium fdiv bug -#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged. +#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged. -#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup +#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup */ + +#pragma warning(1 : 4705) // Report unused local variables. +#pragma warning(1 : 4555) // Expression has no effect. Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/FileSystem.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/FileSystem.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/FileSystem.h @@ -0,0 +1,24 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_CORE_FILESYSTEM_H +#define NV_CORE_FILESYSTEM_H + +#include "nvcore.h" + +namespace nv +{ + + namespace FileSystem + { + NVCORE_API bool exists(const char * path); + NVCORE_API bool createDirectory(const char * path); + NVCORE_API bool changeDirectory(const char * path); + NVCORE_API bool removeFile(const char * path); + + } // FileSystem namespace + +} // nv namespace + + +#endif // NV_CORE_FILESYSTEM_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/FileSystem.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/FileSystem.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/FileSystem.cpp @@ -0,0 +1,80 @@ +// This code is in the public domain -- castano@gmail.com + +#include "FileSystem.h" + +#if NV_OS_WIN32 +#define _CRT_NONSTDC_NO_WARNINGS // _chdir is defined deprecated, but that's a bug, chdir is deprecated, _chdir is *not*. +//#include // PathFileExists +#include // GetFileAttributes +#include // _mkdir +#elif NV_OS_XBOX +#include +#elif NV_OS_ORBIS +#include +#else +#include +#include +#include +#endif +#include // remove, unlink + +using namespace nv; + + +bool FileSystem::exists(const char * path) +{ +#if NV_OS_UNIX + return access(path, F_OK|R_OK) == 0; + //struct stat buf; + //return stat(path, &buf) == 0; +#elif NV_OS_WIN32 || NV_OS_XBOX + // PathFileExists requires linking to shlwapi.lib + //return PathFileExists(path) != 0; + return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES; +#elif NV_OS_ORBIS + const int BUFFER_SIZE = 2048; + char file_fullpath[BUFFER_SIZE]; + snprintf(file_fullpath, BUFFER_SIZE, "/app0/%s", path); + return sceFiosExistsSync(NULL, file_fullpath); +#else + if (FILE * fp = fopen(path, "r")) + { + fclose(fp); + return true; + } + return false; +#endif +} + +bool FileSystem::createDirectory(const char * path) +{ +#if NV_OS_WIN32 || NV_OS_XBOX + return CreateDirectoryA(path, NULL) != 0; +#elif NV_OS_ORBIS + // not implemented + return false; +#else + return mkdir(path, 0777) != -1; +#endif +} + +bool FileSystem::changeDirectory(const char * path) +{ +#if NV_OS_WIN32 + return _chdir(path) != -1; +#elif NV_OS_XBOX + // Xbox doesn't support Current Working Directory! + return false; +#elif NV_OS_ORBIS + // Orbis doesn't support Current Working Directory! + return false; +#else + return chdir(path) != -1; +#endif +} + +bool FileSystem::removeFile(const char * path) +{ + // @@ Use unlink or remove? + return remove(path) == 0; +} Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/ForEach.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/ForEach.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/ForEach.h @@ -0,0 +1,68 @@ +// This code is in the public domain -- Ignacio Castaño + +#pragma once +#ifndef NV_CORE_FOREACH_H +#define NV_CORE_FOREACH_H + +/* +These foreach macros are very non-standard and somewhat confusing, but I like them. +*/ + +#include "nvcore.h" + +#if NV_CC_GNUC // If typeof or decltype is available: +#if !NV_CC_CPP11 +# define NV_DECLTYPE typeof // Using a non-standard extension over typeof that behaves as C++11 decltype +#else +# define NV_DECLTYPE decltype +#endif + +/* +Ideally we would like to write this: + +#define NV_FOREACH(i, container) \ + for(NV_DECLTYPE(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i)) + +But gcc versions prior to 4.7 required an intermediate type. See: +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709 +*/ + +#define NV_FOREACH(i, container) \ + typedef NV_DECLTYPE(container) NV_STRING_JOIN2(cont,__LINE__); \ + for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i)) + +#else // If typeof not available: + +#include // placement new + +struct PseudoIndexWrapper { + template + PseudoIndexWrapper(const T & container) { + nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory)); + new (memory) typename T::PseudoIndex(container.start()); + } + // PseudoIndex cannot have a dtor! + + template typename T::PseudoIndex & operator()(const T * /*container*/) { + return *reinterpret_cast(memory); + } + template const typename T::PseudoIndex & operator()(const T * /*container*/) const { + return *reinterpret_cast(memory); + } + + uint8 memory[4]; // Increase the size if we have bigger enumerators. +}; + +#define NV_FOREACH(i, container) \ + for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container)))) + +#endif + +// Declare foreach keyword. +#if !defined NV_NO_USE_KEYWORDS +# define foreach NV_FOREACH +# define foreach_index NV_FOREACH +#endif + + +#endif // NV_CORE_FOREACH_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Hash.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Hash.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Hash.h @@ -0,0 +1,83 @@ +// This code is in the public domain -- Ignacio Castaño + +#pragma once +#ifndef NV_CORE_HASH_H +#define NV_CORE_HASH_H + +#include "nvcore.h" + +namespace nv +{ + inline uint sdbmHash(const void * data_in, uint size, uint h = 5381) + { + const uint8 * data = (const uint8 *) data_in; + uint i = 0; + while (i < size) { + h = (h << 16) + (h << 6) - h + (uint) data[i++]; + } + return h; + } + + // Note that this hash does not handle NaN properly. + inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381) + { + for (uint i = 0; i < count; i++) { + //nvDebugCheck(nv::isFinite(*f)); + union { float f; uint32 i; } x = { f[i] }; + if (x.i == 0x80000000) x.i = 0; + h = sdbmHash(&x, 4, h); + } + return h; + } + + + template + inline uint hash(const T & t, uint h = 5381) + { + return sdbmHash(&t, sizeof(T), h); + } + + template <> + inline uint hash(const float & f, uint h) + { + return sdbmFloatHash(&f, 1, h); + } + + + // Functors for hash table: + template struct Hash + { + uint operator()(const Key & k) const { + return hash(k); + } + }; + + template struct Equal + { + bool operator()(const Key & k0, const Key & k1) const { + return k0 == k1; + } + }; + + + // @@ Move to Utils.h? + template + struct Pair { + T1 first; + T2 second; + }; + + template + bool operator==(const Pair & p0, const Pair & p1) { + return p0.first == p1.first && p0.second == p1.second; + } + + template + uint hash(const Pair & p, uint h = 5381) { + return hash(p.second, hash(p.first)); + } + + +} // nv namespace + +#endif // NV_CORE_HASH_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Library.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Library.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Library.h @@ -1,50 +0,0 @@ -// This code is in the public domain -- castano@gmail.com - -#ifndef NV_CORE_LIBRARY_H -#define NV_CORE_LIBRARY_H - -#include - -#if NV_OS_WIN32 -#define LIBRARY_NAME(name) #name ".dll" -#elif NV_OS_DARWIN -#define NV_LIBRARY_NAME(name) "lib" #name ".dylib" -#else -#define NV_LIBRARY_NAME(name) "lib" #name ".so" -#endif - -NVCORE_API void * nvLoadLibrary(const char * name); -NVCORE_API void nvUnloadLibrary(void * lib); -NVCORE_API void * nvBindSymbol(void * lib, const char * symbol); - -class NVCORE_CLASS Library -{ -public: - Library(const char * name) - { - handle = nvLoadLibrary(name); - } - ~Library() - { - if (isValid()) - { - nvUnloadLibrary(handle); - } - } - - bool isValid() const - { - return handle != NULL; - } - - void * bindSymbol(const char * symbol) - { - return nvBindSymbol(handle, symbol); - } - -private: - void * handle; -}; - - -#endif // NV_CORE_LIBRARY_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Library.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Library.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Library.cpp @@ -1,41 +0,0 @@ - -#include "Library.h" -#include "Debug.h" - -#if NV_OS_WIN32 -#define WIN32_LEAN_AND_MEAN -#define VC_EXTRALEAN -#include -#else -#include -#endif - - - -void * nvLoadLibrary(const char * name) -{ -#if NV_OS_WIN32 - return (void *)LoadLibraryExA( name, NULL, 0 ); -#else - return dlopen(name, RTLD_LAZY); -#endif -} - -void nvUnloadLibrary(void * handle) -{ - nvDebugCheck(handle != NULL); -#if NV_OS_WIN32 - FreeLibrary((HMODULE)handle); -#else - dlclose(handle); -#endif -} - -void * nvBindSymbol(void * handle, const char * symbol) -{ -#if NV_OS_WIN32 - return (void *)GetProcAddress((HMODULE)handle, symbol); -#else - return (void *)dlsym(handle, symbol); -#endif -} Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.h @@ -7,9 +7,17 @@ #include "nvcore.h" #include // malloc(), realloc() and free() -#include // size_t +//#include // size_t + +//#include // new and delete + + +#if NV_CC_GNUC +# define NV_ALIGN_16 __attribute__ ((__aligned__ (16))) +#else +# define NV_ALIGN_16 __declspec(align(16)) +#endif -#include // new and delete #define NV_OVERRIDE_ALLOC 0 @@ -35,18 +43,22 @@ namespace nv { // C++ helpers. - template T * malloc(size_t count) { + template NV_FORCEINLINE T * malloc(size_t count) { return (T *)::malloc(sizeof(T) * count); } - template T * realloc(T * ptr, size_t count) { + template NV_FORCEINLINE T * realloc(T * ptr, size_t count) { return (T *)::realloc(ptr, sizeof(T) * count); } - template void free(const T * ptr) { + template NV_FORCEINLINE void free(const T * ptr) { ::free((void *)ptr); } + template NV_FORCEINLINE void zero(T & data) { + memset(&data, 0, sizeof(T)); + } + } // nv namespace #endif // NV_CORE_MEMORY_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.cpp @@ -114,5 +114,6 @@ #endif // 0 - #endif // NV_OVERRIDE_ALLOC + + Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Prefetch.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Prefetch.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Prefetch.h @@ -1,31 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_CORE_PREFETCH_H -#define NV_CORE_PREFETCH_H - -#include - -// nvPrefetch -#if NV_CC_GNUC - -#define nvPrefetch(ptr) __builtin_prefetch(ptr) - -#elif NV_CC_MSVC - -#if NV_CPU_X86 -__forceinline void nvPrefetch(const void * mem) -{ - __asm mov ecx, mem - __asm prefetcht0 [ecx]; -// __asm prefetchnta [ecx]; -} -#endif // NV_CPU_X86 - -#else // NV_CC_MSVC - -// do nothing in other case. -#define nvPrefetch(ptr) - -#endif // NV_CC_MSVC - -#endif // NV_CORE_PREFETCH_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Ptr.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Ptr.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Ptr.h @@ -1,363 +1,321 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño #ifndef NV_CORE_PTR_H #define NV_CORE_PTR_H -#include -#include +#include "nvcore.h" +#include "Debug.h" -#include // NULL +#include "RefCounted.h" namespace nv { - -/** Simple auto pointer template class. - * - * This is very similar to the standard auto_ptr class, but with some - * additional limitations to make its use less error prone: - * - Copy constructor and assignment operator are disabled. - * - reset method is removed. - * - * The semantics of the standard auto_ptr are not clear and change depending - * on the std implementation. For a discussion of the problems of auto_ptr read: - * http://www.awprofessional.com/content/images/020163371X/autoptrupdate\auto_ptr_update.html - */ -template -class AutoPtr -{ - NV_FORBID_COPY(AutoPtr); - NV_FORBID_HEAPALLOC(); -public: - - /// Default ctor. - AutoPtr() : m_ptr(NULL) { } - - /// Ctor. - explicit AutoPtr( T * p ) : m_ptr(p) { } - - /** Dtor. Deletes owned pointer. */ - ~AutoPtr() { - delete m_ptr; - m_ptr = NULL; - } - - /** Delete owned pointer and assign new one. */ - void operator=( T * p ) { - if (p != m_ptr) - { - delete m_ptr; - m_ptr = p; - } - } - - /** Member access. */ - T * operator -> () const { - nvDebugCheck(m_ptr != NULL); - return m_ptr; - } - - /** Get reference. */ - T & operator*() const { - nvDebugCheck(m_ptr != NULL); - return *m_ptr; - } - - /** Get pointer. */ - T * ptr() const { return m_ptr; } - - /** Relinquish ownership of the underlying pointer and returns that pointer. */ - T * release() { - T * tmp = m_ptr; - m_ptr = NULL; - return tmp; - } - - /** Const pointer equal comparation. */ - friend bool operator == (const AutoPtr & ap, const T * const p) { - return (ap.ptr() == p); - } - - /** Const pointer nequal comparation. */ - friend bool operator != (const AutoPtr & ap, const T * const p) { - return (ap.ptr() != p); - } - - /** Const pointer equal comparation. */ - friend bool operator == (const T * const p, const AutoPtr & ap) { - return (ap.ptr() == p); - } - - /** Const pointer nequal comparation. */ - friend bool operator != (const T * const p, const AutoPtr & ap) { - return (ap.ptr() != p); - } - -private: - T * m_ptr; -}; - -#if 0 -/** Reference counted base class to be used with Pointer. - * - * The only requirement of the Pointer class is that the RefCounted class implements the - * addRef and release methods. - */ -class RefCounted -{ - NV_FORBID_COPY(RefCounted); -public: + class WeakProxy; + + /** Simple auto pointer template class. + * + * This is very similar to the standard auto_ptr class, but with some + * additional limitations to make its use less error prone: + * - Copy constructor and assignment operator are disabled. + * - reset method is removed. + * + * The semantics of the standard auto_ptr are not clear and change depending + * on the std implementation. For a discussion of the problems of auto_ptr read: + * http://www.awprofessional.com/content/images/020163371X/autoptrupdate\auto_ptr_update.html + */ + template + class AutoPtr + { + NV_FORBID_COPY(AutoPtr); + NV_FORBID_HEAPALLOC(); + public: + + /// Ctor. + AutoPtr(T * p = NULL) : m_ptr(p) { } + + template + AutoPtr(Q * p) : m_ptr(static_cast(p)) { } + + /// Dtor. Deletes owned pointer. + ~AutoPtr() { + delete m_ptr; + m_ptr = NULL; + } + + /// Delete owned pointer and assign new one. + void operator=( T * p ) { + if (p != m_ptr) + { + delete m_ptr; + m_ptr = p; + } + } + + template + void operator=( Q * p ) { + if (p != m_ptr) + { + delete m_ptr; + m_ptr = static_cast(p); + } + } + + /// Member access. + T * operator -> () const { + nvDebugCheck(m_ptr != NULL); + return m_ptr; + } + + /// Get reference. + T & operator*() const { + nvDebugCheck(m_ptr != NULL); + return *m_ptr; + } + + /// Get pointer. + T * ptr() const { return m_ptr; } + + /// Relinquish ownership of the underlying pointer and returns that pointer. + T * release() { + T * tmp = m_ptr; + m_ptr = NULL; + return tmp; + } + + /// Const pointer equal comparation. + friend bool operator == (const AutoPtr & ap, const T * const p) { + return (ap.ptr() == p); + } + + /// Const pointer nequal comparation. + friend bool operator != (const AutoPtr & ap, const T * const p) { + return (ap.ptr() != p); + } + + /// Const pointer equal comparation. + friend bool operator == (const T * const p, const AutoPtr & ap) { + return (ap.ptr() == p); + } + + /// Const pointer nequal comparation. + friend bool operator != (const T * const p, const AutoPtr & ap) { + return (ap.ptr() != p); + } + + private: + T * m_ptr; + }; + + + /// Smart pointer template class. + template + class SmartPtr { + public: + + // BaseClass must implement addRef() and release(). + typedef SmartPtr ThisType; + + /// Default ctor. + SmartPtr() : m_ptr(NULL) + { + } + + /// Other type assignment. + template + SmartPtr( const SmartPtr & tc ) + { + m_ptr = static_cast( tc.ptr() ); + if (m_ptr) { + m_ptr->addRef(); + } + } + + /// Copy ctor. + SmartPtr( const ThisType & bc ) + { + m_ptr = bc.ptr(); + if (m_ptr) { + m_ptr->addRef(); + } + } + + /// Copy cast ctor. SmartPtr(NULL) is valid. + explicit SmartPtr( BaseClass * bc ) + { + m_ptr = bc; + if (m_ptr) { + m_ptr->addRef(); + } + } + + /// Dtor. + ~SmartPtr() + { + set(NULL); + } + + + /// -> operator. + BaseClass * operator -> () const + { + nvCheck( m_ptr != NULL ); + return m_ptr; + } + + /// * operator. + BaseClass & operator*() const + { + nvCheck( m_ptr != NULL ); + return *m_ptr; + } + + /// Get pointer. + BaseClass * ptr() const + { + return m_ptr; + } + + /// Other type assignment. + template + void operator = ( const SmartPtr & tc ) + { + set( static_cast(tc.ptr()) ); + } + + /// This type assignment. + void operator = ( const ThisType & bc ) + { + set( bc.ptr() ); + } + + /// Pointer assignment. + void operator = ( BaseClass * bc ) + { + set( bc ); + } + + + /// Other type equal comparation. + template + bool operator == ( const SmartPtr & other ) const + { + return m_ptr == other.ptr(); + } + + /// This type equal comparation. + bool operator == ( const ThisType & bc ) const + { + return m_ptr == bc.ptr(); + } + + /// Const pointer equal comparation. + bool operator == ( const BaseClass * const bc ) const + { + return m_ptr == bc; + } + + /// Other type not equal comparation. + template + bool operator != ( const SmartPtr & other ) const + { + return m_ptr != other.ptr(); + } + + /// Other type not equal comparation. + bool operator != ( const ThisType & bc ) const + { + return m_ptr != bc.ptr(); + } + + /// Const pointer not equal comparation. + bool operator != (const BaseClass * const bc) const + { + return m_ptr != bc; + } + + /// This type lower than comparation. + bool operator < (const ThisType & p) const + { + return m_ptr < p.ptr(); + } + + bool isValid() const { + return isValidPtr(m_ptr); + } + + private: + + // Set this pointer. + void set( BaseClass * p ) + { + if (p) p->addRef(); + if (m_ptr) m_ptr->release(); + m_ptr = p; + } + + private: + + BaseClass * m_ptr; + + }; + + + /// Smart pointer template class. + template + class WeakPtr { + public: + + WeakPtr() {} + + WeakPtr(T * p) { operator=(p); } + WeakPtr(const SmartPtr & p) { operator=(p.ptr()); } + + // Default constructor and assignment from weak_ptr are OK. + + void operator=(T * p) + { + if (p) { + m_proxy = p->getWeakProxy(); + nvDebugCheck(m_proxy != NULL); + nvDebugCheck(m_proxy->ptr() == p); + } + else { + m_proxy = NULL; + } + } + + void operator=(const SmartPtr & ptr) { operator=(ptr.ptr()); } + + bool operator==(const SmartPtr & p) const { return ptr() == p.ptr(); } + bool operator!=(const SmartPtr & p) const { return ptr() != p.ptr(); } + + bool operator==(const WeakPtr & p) const { return ptr() == p.ptr(); } + bool operator!=(const WeakPtr & p) const { return ptr() != p.ptr(); } + + bool operator==(T * p) const { return ptr() == p; } + bool operator!=(T * p) const { return ptr() != p; } + + T * operator->() const + { + T * p = ptr(); + nvDebugCheck(p != NULL); + return p; + } + + T * ptr() const + { + if (m_proxy != NULL) { + return static_cast(m_proxy->ptr()); + } + return NULL; + } - /// Ctor. - RefCounted() : m_count(0), m_weak_proxy(NULL) - { - s_total_obj_count++; - } - - /// Virtual dtor. - virtual ~RefCounted() - { - nvCheck( m_count == 0 ); - nvCheck( s_total_obj_count > 0 ); - s_total_obj_count--; - } - - - /// Increase reference count. - uint addRef() const - { - s_total_ref_count++; - m_count++; - return m_count; - } - - - /// Decrease reference count and remove when 0. - uint release() const - { - nvCheck( m_count > 0 ); - - s_total_ref_count--; - m_count--; - if( m_count == 0 ) { - releaseWeakProxy(); - delete this; - return 0; - } - return m_count; - } - - /// Get weak proxy. - WeakProxy * getWeakProxy() const - { - if (m_weak_proxy == NULL) { - m_weak_proxy = new WeakProxy; - m_weak_proxy->AddRef(); - } - return m_weak_proxy; - } - - /// Release the weak proxy. - void releaseWeakProxy() const - { - if (m_weak_proxy != NULL) { - m_weak_proxy->NotifyObjectDied(); - m_weak_proxy->Release(); - m_weak_proxy = NULL; - } - } - - /** @name Debug methods: */ - //@{ - /// Get reference count. - int refCount() const - { - return m_count; - } - - /// Get total number of objects. - static int totalObjectCount() - { - return s_total_obj_count; - } - - /// Get total number of references. - static int totalReferenceCount() - { - return s_total_ref_count; - } - //@} - - -private: - - NVCORE_API static int s_total_ref_count; - NVCORE_API static int s_total_obj_count; - - mutable int m_count; - mutable WeakProxy * weak_proxy; - -}; -#endif - -/// Smart pointer template class. -template -class Pointer { -public: - - // BaseClass must implement addRef() and release(). - typedef Pointer ThisType; - - /// Default ctor. - Pointer() : m_ptr(NULL) - { - } - - /** Other type assignment. */ - template - Pointer( const Pointer & tc ) - { - m_ptr = static_cast( tc.ptr() ); - if( m_ptr ) { - m_ptr->addRef(); - } - } - - /** Copy ctor. */ - Pointer( const ThisType & bc ) - { - m_ptr = bc.ptr(); - if( m_ptr ) { - m_ptr->addRef(); - } - } - - /** Copy cast ctor. Pointer(NULL) is valid. */ - explicit Pointer( BaseClass * bc ) - { - m_ptr = bc; - if( m_ptr ) { - m_ptr->addRef(); - } - } - - /** Dtor. */ - ~Pointer() - { - set(NULL); - } - - - /** @name Accessors: */ - //@{ - /** -> operator. */ - BaseClass * operator -> () const - { - nvCheck( m_ptr != NULL ); - return m_ptr; - } - - /** * operator. */ - BaseClass & operator*() const - { - nvCheck( m_ptr != NULL ); - return *m_ptr; - } - - /** Get pointer. */ - BaseClass * ptr() const - { - return m_ptr; - } - //@} - - - /** @name Mutators: */ - //@{ - /** Other type assignment. */ - template - void operator = ( const Pointer & tc ) - { - set( static_cast(tc.ptr()) ); - } - - /** This type assignment. */ - void operator = ( const ThisType & bc ) - { - set( bc.ptr() ); - } - - /** Pointer assignment. */ - void operator = ( BaseClass * bc ) - { - set( bc ); - } - //@} - - - /** @name Comparators: */ - //@{ - /** Other type equal comparation. */ - template - bool operator == ( const Pointer & other ) const - { - return m_ptr == other.ptr(); - } - - /** This type equal comparation. */ - bool operator == ( const ThisType & bc ) const - { - return m_ptr == bc.ptr(); - } - - /** Const pointer equal comparation. */ - bool operator == ( const BaseClass * const bc ) const - { - return m_ptr == bc; - } - - /** Other type not equal comparation. */ - template - bool operator != ( const Pointer & other ) const - { - return m_ptr != other.ptr(); - } - - /** Other type not equal comparation. */ - bool operator != ( const ThisType & bc ) const - { - return m_ptr != bc.ptr(); - } - - /** Const pointer not equal comparation. */ - bool operator != (const BaseClass * const bc) const - { - return m_ptr != bc; - } - - /** This type lower than comparation. */ - bool operator < (const ThisType & p) const - { - return m_ptr < p.ptr(); - } - //@} - -private: - - /** Set this pointer. */ - void set( BaseClass * p ) - { - if( m_ptr != p ) { - if( m_ptr ) m_ptr->release(); - if( p ) p->addRef(); - m_ptr = p; - } - } + private: -private: + mutable SmartPtr m_proxy; - BaseClass * m_ptr; + }; -}; } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Radix.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Radix.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Radix.h @@ -1,69 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/** - * Contains source code from the article "Radix Sort Revisited". - * \file Radix.h - * \author Pierre Terdiman - * \date April, 4, 2000 - */ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Include Guard -#ifndef NV_CORE_RADIXSORT_H -#define NV_CORE_RADIXSORT_H - -#include - - -#define RADIX_LOCAL_RAM - - -class NVCORE_API RadixSort { - NV_FORBID_COPY(RadixSort); -public: - // Constructor/Destructor - RadixSort(); - ~RadixSort(); - - // Sorting methods - RadixSort & sort(const uint32* input, uint32 nb, bool signedvalues=true); - RadixSort & sort(const float* input, uint32 nb); - - //! Access to results. mIndices is a list of indices in sorted order, i.e. in the order you may further process your data - inline uint32 * indices() const { return mIndices; } - - //! mIndices2 gets trashed on calling the sort routine, but otherwise you can recycle it the way you want. - inline uint32 * recyclable() const { return mIndices2; } - - // Stats - uint32 usedRam() const; - - //! Returns the total number of calls to the radix sorter. - inline uint32 totalCalls() const { return mTotalCalls; } - - //! Returns the number of premature exits due to temporal coherence. - inline uint32 hits() const { return mNbHits; } - - - private: -#ifndef RADIX_LOCAL_RAM - uint32* mHistogram; //!< Counters for each byte - uint32* mOffset; //!< Offsets (nearly a cumulative distribution function) -#endif - uint32 mCurrentSize; //!< Current size of the indices list - uint32 mPreviousSize; //!< Size involved in previous call - uint32* mIndices; //!< Two lists, swapped each pass - uint32* mIndices2; - - // Stats - uint32 mTotalCalls; - uint32 mNbHits; - - // Internal methods - bool resize(uint32 nb); - void resetIndices(); - -}; - - -#endif // NV_CORE_RADIXSORT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Radix.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Radix.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Radix.cpp @@ -1,429 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/** - * Contains source code from the article "Radix Sort Revisited". - * \file Radix.cpp - * \author Pierre Terdiman - * \date April, 4, 2000 - */ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/** - * Revisited Radix Sort. - * This is my new radix routine: - * - it uses indices and doesn't recopy the values anymore, hence wasting less ram - * - it creates all the histograms in one run instead of four - * - it sorts words faster than dwords and bytes faster than words - * - it correctly sorts negative floating-point values by patching the offsets - * - it automatically takes advantage of temporal coherence - * - multiple keys support is a side effect of temporal coherence - * - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway] - * - * History: - * - 08.15.98: very first version - * - 04.04.00: recoded for the radix article - * - 12.xx.00: code lifting - * - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here) - * - 10.11.01: added local ram support - * - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting...... - * - * \class RadixSort - * \author Pierre Terdiman - * \version 1.3 - * \date August, 15, 1998 - */ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -/* -To do: - - add an offset parameter between two input values (avoid some data recopy sometimes) - - unroll ? asm ? -*/ - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Header - -#include - -#include // memset - -//using namespace IceCore; - -#define DELETEARRAY(a) { delete [] a; a = NULL; } -#define CHECKALLOC(a) - - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/** - * Constructor. - */ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -RadixSort::RadixSort() : mCurrentSize(0), mPreviousSize(0), mIndices(NULL), mIndices2(NULL), mTotalCalls(0), mNbHits(0) -{ -#ifndef RADIX_LOCAL_RAM - // Allocate input-independent ram - mHistogram = new uint32[256*4]; - mOffset = new uint32[256]; -#endif - // Initialize indices - resetIndices(); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/** - * Destructor. - */ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -RadixSort::~RadixSort() -{ - // Release everything -#ifndef RADIX_LOCAL_RAM - DELETEARRAY(mOffset); - DELETEARRAY(mHistogram); -#endif - DELETEARRAY(mIndices2); - DELETEARRAY(mIndices); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/** - * Resizes the inner lists. - * \param nb [in] new size (number of dwords) - * \return true if success - */ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool RadixSort::resize(uint32 nb) -{ - // Free previously used ram - DELETEARRAY(mIndices2); - DELETEARRAY(mIndices); - - // Get some fresh one - mIndices = new uint32[nb]; CHECKALLOC(mIndices); - mIndices2 = new uint32[nb]; CHECKALLOC(mIndices2); - mCurrentSize = nb; - - // Initialize indices so that the input buffer is read in sequential order - resetIndices(); - - return true; -} - -#define CHECK_RESIZE(n) \ - if(n!=mPreviousSize) \ - { \ - if(n>mCurrentSize) resize(n); \ - else resetIndices(); \ - mPreviousSize = n; \ - } - -#define CREATE_HISTOGRAMS(type, buffer) \ - /* Clear counters */ \ - memset(mHistogram, 0, 256*4*sizeof(uint32)); \ - \ - /* Prepare for temporal coherence */ \ - type PrevVal = (type)buffer[mIndices[0]]; \ - bool AlreadySorted = true; /* Optimism... */ \ - uint32* Indices = mIndices; \ - \ - /* Prepare to count */ \ - uint8* p = (uint8*)input; \ - uint8* pe = &p[nb*4]; \ - uint32* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \ - uint32* h1= &mHistogram[256]; /* Histogram for second pass */ \ - uint32* h2= &mHistogram[512]; /* Histogram for third pass */ \ - uint32* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \ - \ - while(p!=pe) \ - { \ - /* Read input buffer in previous sorted order */ \ - type Val = (type)buffer[*Indices++]; \ - /* Check whether already sorted or not */ \ - if(Val>24; // Radix byte, same as above. AND is useless here (uint32). - // ### cmp to be killed. Not good. Later. - if(Radix<128) mIndices2[mOffset[Radix]++] = mIndices[i]; // Number is positive, same as above - else mIndices2[--mOffset[Radix]] = mIndices[i]; // Number is negative, flip the sorting order - } - // Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap. - uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp; - } - else - { - // The pass is useless, yet we still have to reverse the order of current list if all values are negative. - if(UniqueVal>=128) - { - for(i=0;i + +#ifndef NV_CORE_REFCOUNTED_H +#define NV_CORE_REFCOUNTED_H + +#include "nvcore.h" +#include "Debug.h" + +#define NV_DECLARE_PTR(Class) \ + template class SmartPtr; \ + typedef SmartPtr Class ## Ptr; \ + typedef SmartPtr Class ## ConstPtr + + +namespace nv +{ + /// Weak proxy. + class WeakProxy + { + NV_FORBID_COPY(WeakProxy); + public: + /// Ctor. + WeakProxy(void * ptr) : m_count(0), m_ptr(ptr) { } + + /// Dtor. + ~WeakProxy() + { + nvCheck( m_count == 0 ); + } + + /// Increase reference count. + uint addRef() const + { + m_count++; + return m_count; + } + + /// Decrease reference count and remove when 0. + uint release() const + { + nvCheck( m_count > 0 ); + + m_count--; + if( m_count == 0 ) { + delete this; + return 0; + } + return m_count; + } + + /// WeakPtr's call this to determine if their pointer is valid or not. + bool isAlive() const { + return m_ptr != NULL; + } + + /// Only the actual object should call this. + void notifyObjectDied() { + m_ptr = NULL; + } + + /// Return proxy pointer. + void * ptr() const { + return m_ptr; + } + + private: + mutable int m_count; + void * m_ptr; + }; + + + /// Reference counted base class to be used with SmartPtr and WeakPtr. + class RefCounted + { + NV_FORBID_COPY(RefCounted); + public: + + /// Ctor. + RefCounted() : m_count(0), m_weak_proxy(NULL) + { + } + + /// Virtual dtor. + virtual ~RefCounted() + { + nvCheck( m_count == 0 ); + releaseWeakProxy(); + } + + + /// Increase reference count. + uint addRef() const + { + m_count++; + return m_count; + } + + + /// Decrease reference count and remove when 0. + uint release() const + { + nvCheck( m_count > 0 ); + + m_count--; + if( m_count == 0 ) { + delete this; + return 0; + } + return m_count; + } + + /// Get weak proxy. + WeakProxy * getWeakProxy() const + { + if (m_weak_proxy == NULL) { + m_weak_proxy = new WeakProxy((void *)this); + m_weak_proxy->addRef(); + } + return m_weak_proxy; + } + + /// Release the weak proxy. + void releaseWeakProxy() const + { + if (m_weak_proxy != NULL) { + m_weak_proxy->notifyObjectDied(); + m_weak_proxy->release(); + m_weak_proxy = NULL; + } + } + + /// Get reference count. + int refCount() const + { + return m_count; + } + + + private: + + mutable int m_count; + mutable WeakProxy * m_weak_proxy; + + }; + +} // nv namespace + + +#endif // NV_CORE_REFCOUNTED_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/StdStream.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/StdStream.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/StdStream.h @@ -1,369 +1,463 @@ -#ifndef NV_STDSTREAM_H -#define NV_STDSTREAM_H +// This code is in the public domain -- Ignacio Castaño -#include +//#pragma once +//#ifndef NV_CORE_STDSTREAM_H +//#define NV_CORE_STDSTREAM_H + +#include "nvcore.h" +#include "Stream.h" +#include "Array.h" #include // fopen #include // memcpy -#include // std::exception namespace nv { -// Portable version of fopen. -inline FILE * fileOpen(const char * fileName, const char * mode) -{ - nvCheck(fileName != NULL); + // Portable version of fopen. + inline FILE * fileOpen(const char * fileName, const char * mode) + { + nvCheck(fileName != NULL); #if NV_CC_MSVC && _MSC_VER >= 1400 - FILE * fp; - if (fopen_s(&fp, fileName, mode) == 0) { - return fp; - } - return NULL; + FILE * fp; + if (fopen_s(&fp, fileName, mode) == 0) { + return fp; + } + return NULL; #else - return fopen(fileName, mode); + return fopen(fileName, mode); #endif -} - - -/// Base stdio stream. -class NVCORE_CLASS StdStream : public Stream -{ - NV_FORBID_COPY(StdStream); -public: - - /// Ctor. - StdStream( FILE * fp, bool autoclose=true ) : - m_fp(fp), m_autoclose(autoclose) { } - - /// Dtor. - virtual ~StdStream() - { - if( m_fp != NULL && m_autoclose ) { - fclose( m_fp ); - } - } - - - /** @name Stream implementation. */ - //@{ - virtual void seek( uint pos ) - { - nvDebugCheck(m_fp != NULL); - nvDebugCheck(pos < size()); - fseek(m_fp, pos, SEEK_SET); - } - - virtual uint tell() const - { - nvDebugCheck(m_fp != NULL); - return ftell(m_fp); - } - - virtual uint size() const - { - nvDebugCheck(m_fp != NULL); - uint pos = ftell(m_fp); - fseek(m_fp, 0, SEEK_END); - uint end = ftell(m_fp); - fseek(m_fp, pos, SEEK_SET); - return end; - } - - virtual bool isError() const - { - return m_fp == NULL || ferror( m_fp ) != 0; - } - - virtual void clearError() - { - nvDebugCheck(m_fp != NULL); - clearerr(m_fp); - } - - virtual bool isAtEnd() const - { - nvDebugCheck(m_fp != NULL); - return feof( m_fp ) != 0; - } - - /// Always true. - virtual bool isSeekable() const { return true; } - //@} - -protected: - - FILE * m_fp; - bool m_autoclose; - -}; + } -/// Standard output stream. -class NVCORE_CLASS StdOutputStream : public StdStream -{ - NV_FORBID_COPY(StdOutputStream); -public: - - /// Construct stream by file name. - StdOutputStream( const char * name ) : - StdStream(fileOpen(name, "wb")) { } - - /// Construct stream by file handle. - StdOutputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose) - { - } - - /** @name Stream implementation. */ - //@{ - /// Write data. - virtual uint serialize( void * data, uint len ) - { - nvDebugCheck(data != NULL); - nvDebugCheck(m_fp != NULL); - return (uint)fwrite(data, 1, len, m_fp); - } - - virtual bool isLoading() const - { - return false; - } - - virtual bool isSaving() const - { - return true; - } - //@} - -}; + /// Base stdio stream. + class NVCORE_CLASS StdStream : public Stream + { + NV_FORBID_COPY(StdStream); + public: + + /// Ctor. + StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { } + + /// Dtor. + virtual ~StdStream() + { + if( m_fp != NULL && m_autoclose ) { +#if NV_OS_WIN32 + _fclose_nolock( m_fp ); +#else + fclose( m_fp ); +#endif + } + } -/// Standard input stream. -class NVCORE_CLASS StdInputStream : public StdStream -{ - NV_FORBID_COPY(StdInputStream); -public: + /** @name Stream implementation. */ + //@{ + virtual void seek( uint pos ) + { + nvDebugCheck(m_fp != NULL); + nvDebugCheck(pos <= size()); +#if NV_OS_WIN32 + _fseek_nolock(m_fp, pos, SEEK_SET); +#else + fseek(m_fp, pos, SEEK_SET); +#endif + } - /// Construct stream by file name. - StdInputStream( const char * name ) : - StdStream(fileOpen(name, "rb")) { } - - /// Construct stream by file handle. - StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose) - { - } - - /** @name Stream implementation. */ - //@{ - /// Read data. - virtual uint serialize( void * data, uint len ) - { - nvDebugCheck(data != NULL); - nvDebugCheck(m_fp != NULL); - return (uint)fread(data, 1, len, m_fp); - } - - virtual bool isLoading() const - { - return true; - } - - virtual bool isSaving() const - { - return false; - } - //@} -}; + virtual uint tell() const + { + nvDebugCheck(m_fp != NULL); +#if NV_OS_WIN32 + return _ftell_nolock(m_fp); +#else + return (uint)ftell(m_fp); +#endif + } + virtual uint size() const + { + nvDebugCheck(m_fp != NULL); +#if NV_OS_WIN32 + uint pos = _ftell_nolock(m_fp); + _fseek_nolock(m_fp, 0, SEEK_END); + uint end = _ftell_nolock(m_fp); + _fseek_nolock(m_fp, pos, SEEK_SET); +#else + uint pos = (uint)ftell(m_fp); + fseek(m_fp, 0, SEEK_END); + uint end = (uint)ftell(m_fp); + fseek(m_fp, pos, SEEK_SET); +#endif + return end; + } + virtual bool isError() const + { + return m_fp == NULL || ferror( m_fp ) != 0; + } + + virtual void clearError() + { + nvDebugCheck(m_fp != NULL); + clearerr(m_fp); + } + + // @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream. + // That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better + // implementation uses use ftell and fseek to determine our location within the file. + virtual bool isAtEnd() const + { + if (m_fp == NULL) return true; + //nvDebugCheck(m_fp != NULL); + //return feof( m_fp ) != 0; +#if NV_OS_WIN32 + uint pos = _ftell_nolock(m_fp); + _fseek_nolock(m_fp, 0, SEEK_END); + uint end = _ftell_nolock(m_fp); + _fseek_nolock(m_fp, pos, SEEK_SET); +#else + uint pos = (uint)ftell(m_fp); + fseek(m_fp, 0, SEEK_END); + uint end = (uint)ftell(m_fp); + fseek(m_fp, pos, SEEK_SET); +#endif + return pos == end; + } -/// Memory input stream. -class NVCORE_CLASS MemoryInputStream : public Stream -{ - NV_FORBID_COPY(MemoryInputStream); -public: + /// Always true. + virtual bool isSeekable() const { return true; } + //@} + + protected: + + FILE * m_fp; + bool m_autoclose; + + }; + + + /// Standard output stream. + class NVCORE_CLASS StdOutputStream : public StdStream + { + NV_FORBID_COPY(StdOutputStream); + public: + + /// Construct stream by file name. + StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { } + + /// Construct stream by file handle. + StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose) + { + } + + /** @name Stream implementation. */ + //@{ + /// Write data. + virtual uint serialize( void * data, uint len ) + { + nvDebugCheck(data != NULL); + nvDebugCheck(m_fp != NULL); +#if NV_OS_WIN32 + return (uint)_fwrite_nolock(data, 1, len, m_fp); +#elif NV_OS_LINUX + return (uint)fwrite_unlocked(data, 1, len, m_fp); +#elif NV_OS_DARWIN + // @@ No error checking, always returns len. + for (uint i = 0; i < len; i++) { + putc_unlocked(((char *)data)[i], m_fp); + } + return len; +#else + return (uint)fwrite(data, 1, len, m_fp); +#endif + } - /// Ctor. - MemoryInputStream( const uint8 * mem, uint size ) : - m_mem(mem), m_ptr(mem), m_size(size) { } - - /** @name Stream implementation. */ - //@{ - /// Read data. - virtual uint serialize( void * data, uint len ) - { - nvDebugCheck(data != NULL); - nvDebugCheck(!isError()); - - uint left = m_size - tell(); - if (len > left) len = left; - - memcpy( data, m_ptr, len ); - m_ptr += len; - - return len; - } - - virtual void seek( uint pos ) - { - nvDebugCheck(!isError()); - m_ptr = m_mem + pos; - nvDebugCheck(!isError()); - } - - virtual uint tell() const - { - nvDebugCheck(m_ptr >= m_mem); - return uint(m_ptr - m_mem); - } - - virtual uint size() const - { - return m_size; - } - - virtual bool isError() const - { - return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem; - } - - virtual void clearError() - { - // Nothing to do. - } - - virtual bool isAtEnd() const - { - return m_ptr == m_mem + m_size; - } - - /// Always true. - virtual bool isSeekable() const - { - return true; - } - - virtual bool isLoading() const - { - return true; - } - - virtual bool isSaving() const - { - return false; - } - //@} - - -private: - - const uint8 * m_mem; - const uint8 * m_ptr; - uint m_size; + virtual bool isLoading() const + { + return false; + } + + virtual bool isSaving() const + { + return true; + } + //@} + + }; + + + /// Standard input stream. + class NVCORE_CLASS StdInputStream : public StdStream + { + NV_FORBID_COPY(StdInputStream); + public: + + /// Construct stream by file name. + StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { } + + /// Construct stream by file handle. + StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose) + { + } + + /** @name Stream implementation. */ + //@{ + /// Read data. + virtual uint serialize( void * data, uint len ) + { + nvDebugCheck(data != NULL); + nvDebugCheck(m_fp != NULL); +#if NV_OS_WIN32 + return (uint)_fread_nolock(data, 1, len, m_fp); +#elif NV_OS_LINUX + return (uint)fread_unlocked(data, 1, len, m_fp); +#elif NV_OS_DARWIN + // @@ No error checking, always returns len. + for (uint i = 0; i < len; i++) { + ((char *)data)[i] = getc_unlocked(m_fp); + } + return len; +#else + return (uint)fread(data, 1, len, m_fp); +#endif + + } -}; + virtual bool isLoading() const + { + return true; + } + + virtual bool isSaving() const + { + return false; + } + //@} + }; + + + + /// Memory input stream. + class NVCORE_CLASS MemoryInputStream : public Stream + { + NV_FORBID_COPY(MemoryInputStream); + public: + + /// Ctor. + MemoryInputStream( const uint8 * mem, uint size ) : m_mem(mem), m_ptr(mem), m_size(size) { } + + /** @name Stream implementation. */ + //@{ + /// Read data. + virtual uint serialize( void * data, uint len ) + { + nvDebugCheck(data != NULL); + nvDebugCheck(!isError()); + + uint left = m_size - tell(); + if (len > left) len = left; + + memcpy( data, m_ptr, len ); + m_ptr += len; + + return len; + } + + virtual void seek( uint pos ) + { + nvDebugCheck(!isError()); + m_ptr = m_mem + pos; + nvDebugCheck(!isError()); + } + + virtual uint tell() const + { + nvDebugCheck(m_ptr >= m_mem); + return uint(m_ptr - m_mem); + } + + virtual uint size() const + { + return m_size; + } + + virtual bool isError() const + { + return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem; + } + + virtual void clearError() + { + // Nothing to do. + } + + virtual bool isAtEnd() const + { + return m_ptr == m_mem + m_size; + } + + /// Always true. + virtual bool isSeekable() const + { + return true; + } + + virtual bool isLoading() const + { + return true; + } + + virtual bool isSaving() const + { + return false; + } + //@} + + const uint8 * ptr() const { return m_ptr; } + + + private: + + const uint8 * m_mem; + const uint8 * m_ptr; + uint m_size; + + }; + + + /// Buffer output stream. + class NVCORE_CLASS BufferOutputStream : public Stream + { + NV_FORBID_COPY(BufferOutputStream); + public: + + BufferOutputStream(Array & buffer) : m_buffer(buffer) { } + + virtual uint serialize( void * data, uint len ) + { + nvDebugCheck(data != NULL); + m_buffer.append((uint8 *)data, len); + return len; + } + + virtual void seek( uint /*pos*/ ) { /*Not implemented*/ } + virtual uint tell() const { return m_buffer.size(); } + virtual uint size() const { return m_buffer.size(); } + + virtual bool isError() const { return false; } + virtual void clearError() {} + + virtual bool isAtEnd() const { return true; } + virtual bool isSeekable() const { return false; } + virtual bool isLoading() const { return false; } + virtual bool isSaving() const { return true; } + + private: + Array & m_buffer; + }; + + + /// Protected input stream. + class NVCORE_CLASS ProtectedStream : public Stream + { + NV_FORBID_COPY(ProtectedStream); + public: + + /// Ctor. + ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false) + { + } + + /// Ctor. + ProtectedStream( Stream * s, bool autodelete = true ) : + m_s(s), m_autodelete(autodelete) + { + nvDebugCheck(m_s != NULL); + } + + /// Dtor. + virtual ~ProtectedStream() + { + if( m_autodelete ) { + delete m_s; + } + } + + /** @name Stream implementation. */ + //@{ + /// Read data. + virtual uint serialize( void * data, uint len ) + { + nvDebugCheck(data != NULL); + len = m_s->serialize( data, len ); + + if( m_s->isError() ) { + throw; + } + + return len; + } + + virtual void seek( uint pos ) + { + m_s->seek( pos ); + + if( m_s->isError() ) { + throw; + } + } + + virtual uint tell() const + { + return m_s->tell(); + } + + virtual uint size() const + { + return m_s->size(); + } + + virtual bool isError() const + { + return m_s->isError(); + } + + virtual void clearError() + { + m_s->clearError(); + } + + virtual bool isAtEnd() const + { + return m_s->isAtEnd(); + } + + virtual bool isSeekable() const + { + return m_s->isSeekable(); + } + + virtual bool isLoading() const + { + return m_s->isLoading(); + } + + virtual bool isSaving() const + { + return m_s->isSaving(); + } + //@} -/// Protected input stream. -class NVCORE_CLASS ProtectedStream : public Stream -{ - NV_FORBID_COPY(ProtectedStream); -public: + private: - /// Ctor. - ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false) - { - } - - /// Ctor. - ProtectedStream( Stream * s, bool autodelete = true ) : - m_s(s), m_autodelete(autodelete) - { - nvDebugCheck(m_s != NULL); - } - - /// Dtor. - virtual ~ProtectedStream() - { - if( m_autodelete ) { - delete m_s; - } - } - - /** @name Stream implementation. */ - //@{ - /// Read data. - virtual uint serialize( void * data, uint len ) - { - nvDebugCheck(data != NULL); - len = m_s->serialize( data, len ); - - if( m_s->isError() ) { - throw std::exception(); - } - - return len; - } - - virtual void seek( uint pos ) - { - m_s->seek( pos ); - - if( m_s->isError() ) { - throw std::exception(); - } - } - - virtual uint tell() const - { - return m_s->tell(); - } - - virtual uint size() const - { - return m_s->size(); - } - - virtual bool isError() const - { - return m_s->isError(); - } - - virtual void clearError() - { - m_s->clearError(); - } - - virtual bool isAtEnd() const - { - return m_s->isAtEnd(); - } - - virtual bool isSeekable() const - { - return m_s->isSeekable(); - } - - virtual bool isLoading() const - { - return m_s->isLoading(); - } - - virtual bool isSaving() const - { - return m_s->isSaving(); - } - //@} - - -private: - - Stream * const m_s; - bool const m_autodelete; + Stream * const m_s; + bool const m_autodelete; -}; + }; } // nv namespace -#endif // NV_STDSTREAM_H +//#endif // NV_CORE_STDSTREAM_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.h @@ -1,354 +1,429 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño +#pragma once #ifndef NV_CORE_STRING_H #define NV_CORE_STRING_H -#include -#include // swap +#include "Debug.h" +#include "Hash.h" // hash -#include // strlen, strcmp, etc. +//#include // strlen, etc. +#if NV_OS_WIN32 +#define NV_PATH_SEPARATOR '\\' +#else +#define NV_PATH_SEPARATOR '/' +#endif namespace nv { - uint strHash(const char * str, uint h) NV_PURE; + NVCORE_API uint strHash(const char * str, uint h) NV_PURE; - /// String hash based on Bernstein's hash. - inline uint strHash(const char * data, uint h = 5381) - { - uint i = 0; - while(data[i] != 0) { - h = (33 * h) ^ uint(data[i]); - i++; - } - return h; - } - - template <> struct hash { - uint operator()(const char * str) const { return strHash(str); } - }; - - NVCORE_API int strCaseCmp(const char * s1, const char * s2) NV_PURE; - NVCORE_API int strCmp(const char * s1, const char * s2) NV_PURE; - NVCORE_API void strCpy(char * dst, int size, const char * src); - NVCORE_API void strCpy(char * dst, int size, const char * src, int len); - NVCORE_API void strCat(char * dst, int size, const char * src); - - NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE; - - - /// String builder. - class NVCORE_CLASS StringBuilder - { - public: - - StringBuilder(); - explicit StringBuilder( int size_hint ); - StringBuilder( const char * str ); - StringBuilder( const StringBuilder & ); - - ~StringBuilder(); - - StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3))); - StringBuilder & format( const char * format, va_list arg ); - - StringBuilder & append( const char * str ); - StringBuilder & appendFormat( const char * format, ... ) __attribute__((format (printf, 2, 3))); - StringBuilder & appendFormat( const char * format, va_list arg ); - - StringBuilder & number( int i, int base = 10 ); - StringBuilder & number( uint i, int base = 10 ); - - StringBuilder & reserve( uint size_hint ); - StringBuilder & copy( const char * str ); - StringBuilder & copy( const StringBuilder & str ); - - StringBuilder & toLower(); - StringBuilder & toUpper(); - - void reset(); - bool isNull() const { return m_size == 0; } - - // const char * accessors - operator const char * () const { return m_str; } - operator char * () { return m_str; } - const char * str() const { return m_str; } - char * str() { return m_str; } - - /// Implement value semantics. - StringBuilder & operator=( const StringBuilder & s ) { - return copy(s); - } - - /// Implement value semantics. - StringBuilder & operator=( const char * s ) { - return copy(s); - } - - /// Equal operator. - bool operator==( const StringBuilder & s ) const { - if (s.isNull()) return isNull(); - else if (isNull()) return false; - else return strcmp(s.m_str, m_str) != 0; - } - - /// Return the exact length. - uint length() const { return isNull() ? 0 : uint(strlen(m_str)); } - - /// Return the size of the string container. - uint capacity() const { return m_size; } - - /// Return the hash of the string. - uint hash() const { return isNull() ? 0 : strHash(m_str); } - - /// Swap strings. - friend void swap(StringBuilder & a, StringBuilder & b) { - nv::swap(a.m_size, b.m_size); - nv::swap(a.m_str, b.m_str); - } - - protected: - - /// Size of the string container. - uint m_size; - - /// String. - char * m_str; - - }; - - - /// Path string. @@ This should be called PathBuilder. - class NVCORE_CLASS Path : public StringBuilder - { - public: - Path() : StringBuilder() {} - explicit Path(int size_hint) : StringBuilder(size_hint) {} - Path(const char * str) : StringBuilder(str) {} - Path(const Path & path) : StringBuilder(path) {} - - const char * fileName() const; - const char * extension() const; - - void translatePath(); - - void stripFileName(); - void stripExtension(); - - // statics - NVCORE_API static char separator(); - NVCORE_API static const char * fileName(const char *); - NVCORE_API static const char * extension(const char *); - }; - - - /// String class. - class NVCORE_CLASS String - { - public: - - /// Constructs a null string. @sa isNull() - String() - { - data = NULL; - } - - /// Constructs a shared copy of str. - String(const String & str) - { - data = str.data; - if (data != NULL) addRef(); - } - - /// Constructs a shared string from a standard string. - String(const char * str) - { - setString(str); - } - - /// Constructs a shared string from a standard string. - String(const char * str, int length) - { - setString(str, length); - } - - /// Constructs a shared string from a StringBuilder. - String(const StringBuilder & str) - { - setString(str); - } - - /// Dtor. - ~String() - { - release(); - } - - String clone() const; - - /// Release the current string and allocate a new one. - const String & operator=( const char * str ) - { - release(); - setString( str ); - return *this; - } - - /// Release the current string and allocate a new one. - const String & operator=( const StringBuilder & str ) - { - release(); - setString( str ); - return *this; - } - - /// Implement value semantics. - String & operator=( const String & str ) - { - if (str.data != data) - { - release(); - data = str.data; - addRef(); - } - return *this; - } - - /// Equal operator. - bool operator==( const String & str ) const - { - if( str.data == data ) { - return true; - } - if ((data == NULL) != (str.data == NULL)) { - return false; - } - return strcmp(data, str.data) == 0; - } - - /// Equal operator. - bool operator==( const char * str ) const - { - nvCheck(str != NULL); // Use isNull! - if (data == NULL) { - return false; - } - return strcmp(data, str) == 0; - } - - /// Not equal operator. - bool operator!=( const String & str ) const - { - if( str.data == data ) { - return false; - } - if ((data == NULL) != (str.data == NULL)) { - return true; - } - return strcmp(data, str.data) != 0; - } - - /// Not equal operator. - bool operator!=( const char * str ) const - { - nvCheck(str != NULL); // Use isNull! - if (data == NULL) { - return false; - } - return strcmp(data, str) != 0; - } - - /// Returns true if this string is the null string. - bool isNull() const { return data == NULL; } - - /// Return the exact length. - uint length() const { nvDebugCheck(data != NULL); return uint(strlen(data)); } - - /// Return the hash of the string. - uint hash() const { nvDebugCheck(data != NULL); return strHash(data); } - - /// const char * cast operator. - operator const char * () const { return data; } - - /// Get string pointer. - const char * str() const { return data; } - - - private: - - // Add reference count. - void addRef() - { - if (data != NULL) - { - setRefCount(getRefCount() + 1); - } - } - - // Decrease reference count. - void release() - { - if (data != NULL) - { - const uint16 count = getRefCount(); - setRefCount(count - 1); - if (count - 1 == 0) { - free(data - 2); - data = NULL; - } - } - } - - uint16 getRefCount() const - { - nvDebugCheck(data != NULL); - return *reinterpret_cast(data - 2); - } - - void setRefCount(uint16 count) { - nvDebugCheck(data != NULL); - nvCheck(count < 0xFFFF); - *reinterpret_cast(const_cast(data - 2)) = uint16(count); - } - - void setData(const char * str) { - data = str + 2; - } - - void allocString(const char * str) - { - allocString(str, (int)strlen(str)); - } - - void allocString(const char * str, int len) - { - const char * ptr = static_cast(::malloc(2 + len + 1)); - - setData( ptr ); - setRefCount( 0 ); - - // Copy string. - strCpy(const_cast(data), len+1, str, len); - - // Add terminating character. - const_cast(data)[len] = '\0'; - } - - void setString(const char * str); - void setString(const char * str, int length); - void setString(const StringBuilder & str); - - /// Swap strings. - friend void swap(String & a, String & b) { - swap(a.data, b.data); - } - - private: - - const char * data; - - }; + /// String hash based on Bernstein's hash. + inline uint strHash(const char * data, uint h = 5381) + { + uint i = 0; + while(data[i] != 0) { + h = (33 * h) ^ uint(data[i]); + i++; + } + return h; + } + + template <> struct Hash { + uint operator()(const char * str) const { return strHash(str); } + }; + + NVCORE_API uint strLen(const char * str) NV_PURE; // Asserts on NULL strings. + + NVCORE_API int strDiff(const char * s1, const char * s2) NV_PURE; // Asserts on NULL strings. + NVCORE_API int strCaseDiff(const char * s1, const char * s2) NV_PURE; // Asserts on NULL strings. + NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings. + NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings. + + template <> struct Equal { + bool operator()(const char * a, const char * b) const { return strEqual(a, b); } + }; + + NVCORE_API bool strBeginsWith(const char * dst, const char * prefix) NV_PURE; + NVCORE_API bool strEndsWith(const char * dst, const char * suffix) NV_PURE; + + + NVCORE_API void strCpy(char * dst, uint size, const char * src); + NVCORE_API void strCpy(char * dst, uint size, const char * src, uint len); + NVCORE_API void strCat(char * dst, uint size, const char * src); + + NVCORE_API const char * strSkipWhiteSpace(const char * str); + NVCORE_API char * strSkipWhiteSpace(char * str); + + NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE; + + NVCORE_API bool isNumber(const char * str) NV_PURE; + + /* @@ Implement these two functions and modify StringBuilder to use them? + NVCORE_API void strFormat(const char * dst, const char * fmt, ...); + NVCORE_API void strFormatList(const char * dst, const char * fmt, va_list arg); + + template void strFormatSafe(char (&buffer)[count], const char *fmt, ...) __attribute__((format (printf, 2, 3))); + template void strFormatSafe(char (&buffer)[count], const char *fmt, ...) { + va_list args; + va_start(args, fmt); + strFormatList(buffer, count, fmt, args); + va_end(args); + } + template void strFormatListSafe(char (&buffer)[count], const char *fmt, va_list arg) { + va_list tmp; + va_copy(tmp, args); + strFormatList(buffer, count, fmt, tmp); + va_end(tmp); + }*/ + + template void strCpySafe(char (&buffer)[count], const char *src) { + strCpy(buffer, count, src); + } + + template void strCatSafe(char (&buffer)[count], const char * src) { + strCat(buffer, count, src); + } + + + + /// String builder. + class NVCORE_CLASS StringBuilder + { + public: + + StringBuilder(); + explicit StringBuilder( uint size_hint ); + StringBuilder(const char * str); + StringBuilder(const char * str, uint len); + StringBuilder(const StringBuilder & other); + + ~StringBuilder(); + + StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3))); + StringBuilder & formatList( const char * format, va_list arg ); + + StringBuilder & append(const char * str); + StringBuilder & append(const char * str, uint len); + StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3))); + StringBuilder & appendFormatList(const char * format, va_list arg); + + StringBuilder & appendSpace(uint n); + + StringBuilder & number( int i, int base = 10 ); + StringBuilder & number( uint i, int base = 10 ); + + StringBuilder & reserve(uint size_hint); + StringBuilder & copy(const char * str); + StringBuilder & copy(const char * str, uint len); + StringBuilder & copy(const StringBuilder & str); + + StringBuilder & toLower(); + StringBuilder & toUpper(); + + bool endsWith(const char * str) const; + bool beginsWith(const char * str) const; + + char * reverseFind(char c); + + void reset(); + bool isNull() const { return m_size == 0; } + + // const char * accessors + //operator const char * () const { return m_str; } + //operator char * () { return m_str; } + const char * str() const { return m_str; } + char * str() { return m_str; } + + char * release(); + + /// Implement value semantics. + StringBuilder & operator=( const StringBuilder & s ) { + return copy(s); + } + + /// Implement value semantics. + StringBuilder & operator=( const char * s ) { + return copy(s); + } + + /// Equal operator. + bool operator==( const StringBuilder & s ) const { + return strMatch(s.m_str, m_str); + } + + /// Return the exact length. + uint length() const { return isNull() ? 0 : strLen(m_str); } + + /// Return the size of the string container. + uint capacity() const { return m_size; } + + /// Return the hash of the string. + uint hash() const { return isNull() ? 0 : strHash(m_str); } + + // Swap strings. + friend void swap(StringBuilder & a, StringBuilder & b); + + protected: + + /// Size of the string container. + uint m_size; + + /// String. + char * m_str; + + }; + + + /// Path string. @@ This should be called PathBuilder. + class NVCORE_CLASS Path : public StringBuilder + { + public: + Path() : StringBuilder() {} + explicit Path(int size_hint) : StringBuilder(size_hint) {} + Path(const char * str) : StringBuilder(str) {} + Path(const Path & path) : StringBuilder(path) {} + + const char * fileName() const; + const char * extension() const; + + void translatePath(char pathSeparator = NV_PATH_SEPARATOR); + + void appendSeparator(char pathSeparator = NV_PATH_SEPARATOR); + + void stripFileName(); + void stripExtension(); + + // statics + static char separator(); + static const char * fileName(const char *); + static const char * extension(const char *); + + static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR); + }; + + + /// String class. + class NVCORE_CLASS String + { + public: + + /// Constructs a null string. @sa isNull() + String() + { + data = NULL; + } + + /// Constructs a shared copy of str. + String(const String & str) + { + data = str.data; + if (data != NULL) addRef(); + } + + /// Constructs a shared string from a standard string. + String(const char * str) + { + setString(str); + } + + /// Constructs a shared string from a standard string. + String(const char * str, int length) + { + setString(str, length); + } + + /// Constructs a shared string from a StringBuilder. + String(const StringBuilder & str) + { + setString(str); + } + + /// Dtor. + ~String() + { + release(); + } + + String clone() const; + + /// Release the current string and allocate a new one. + const String & operator=( const char * str ) + { + release(); + setString( str ); + return *this; + } + + /// Release the current string and allocate a new one. + const String & operator=( const StringBuilder & str ) + { + release(); + setString( str ); + return *this; + } + + /// Implement value semantics. + String & operator=( const String & str ) + { + if (str.data != data) + { + release(); + data = str.data; + addRef(); + } + return *this; + } + + /// Equal operator. + bool operator==( const String & str ) const + { + return strMatch(str.data, data); + } + + /// Equal operator. + bool operator==( const char * str ) const + { + return strMatch(str, data); + } + + /// Not equal operator. + bool operator!=( const String & str ) const + { + return !strMatch(str.data, data); + } + + /// Not equal operator. + bool operator!=( const char * str ) const + { + return !strMatch(str, data); + } + + /// Returns true if this string is the null string. + bool isNull() const { return data == NULL; } + + /// Return the exact length. + uint length() const { nvDebugCheck(data != NULL); return strLen(data); } + + /// Return the hash of the string. + uint hash() const { nvDebugCheck(data != NULL); return strHash(data); } + + /// const char * cast operator. + operator const char * () const { return data; } + + /// Get string pointer. + const char * str() const { return data; } + + + private: + + // Add reference count. + void addRef(); + + // Decrease reference count. + void release(); + + uint16 getRefCount() const + { + nvDebugCheck(data != NULL); + return *reinterpret_cast(data - 2); + } + + void setRefCount(uint16 count) { + nvDebugCheck(data != NULL); + nvCheck(count < 0xFFFF); + *reinterpret_cast(const_cast(data - 2)) = uint16(count); + } + + void setData(const char * str) { + data = str + 2; + } + + void allocString(const char * str) + { + allocString(str, strLen(str)); + } + + void allocString(const char * str, uint length); + + void setString(const char * str); + void setString(const char * str, uint length); + void setString(const StringBuilder & str); + + // Swap strings. + friend void swap(String & a, String & b); + + private: + + const char * data; + + }; + + template <> struct Hash { + uint operator()(const String & str) const { return str.hash(); } + }; + + + // Like AutoPtr, but for const char strings. + class AutoString + { + NV_FORBID_COPY(AutoString); + NV_FORBID_HEAPALLOC(); + public: + + // Ctor. + AutoString(const char * p = NULL) : m_ptr(p) { } + +#if NV_CC_CPP11 + // Move ctor. + AutoString(AutoString && ap) : m_ptr(ap.m_ptr) { ap.m_ptr = NULL; } +#endif + + // Dtor. Deletes owned pointer. + ~AutoString() { + delete [] m_ptr; + m_ptr = NULL; + } + + // Delete owned pointer and assign new one. + void operator=(const char * p) { + if (p != m_ptr) + { + delete [] m_ptr; + m_ptr = p; + } + } + + // Get pointer. + const char * ptr() const { return m_ptr; } + operator const char *() const { return m_ptr; } + + // Relinquish ownership of the underlying pointer and returns that pointer. + const char * release() { + const char * tmp = m_ptr; + m_ptr = NULL; + return tmp; + } + + // comparison operators. + friend bool operator == (const AutoString & ap, const char * const p) { + return (ap.ptr() == p); + } + friend bool operator != (const AutoString & ap, const char * const p) { + return (ap.ptr() != p); + } + friend bool operator == (const char * const p, const AutoString & ap) { + return (ap.ptr() == p); + } + friend bool operator != (const char * const p, const AutoString & ap) { + return (ap.ptr() != p); + } + + private: + const char * m_ptr; + }; } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.cpp @@ -1,137 +1,185 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño -#include +#include "StrLib.h" -#include // log -#include // vsnprintf +#include "Memory.h" +#include "Utils.h" // swap + +#include // log +#include // vsnprintf +#include // strlen, strcmp, etc. #if NV_CC_MSVC #include // vsnprintf #endif -#if NV_OS_WIN32 -#define NV_PATH_SEPARATOR '\\' -#else -#define NV_PATH_SEPARATOR '/' -#endif - using namespace nv; namespace { - static char * strAlloc(uint size) - { - return static_cast(::malloc(size)); - } - - static char * strReAlloc(char * str, uint size) - { - return static_cast(::realloc(str, size)); - } - - static void strFree(const char * str) - { - return ::free(const_cast(str)); - } - - /*static char * strDup( const char * str ) - { - nvDebugCheck( str != NULL ); - uint len = uint(strlen( str ) + 1); - char * dup = strAlloc( len ); - memcpy( dup, str, len ); - return dup; - }*/ - - // helper function for integer to string conversion. - static char * i2a( uint i, char *a, uint r ) - { - if( i / r > 0 ) { - a = i2a( i / r, a, r ); - } - *a = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % r]; - return a + 1; - } - - // Locale independent functions. - static inline char toUpper( char c ) { - return (c<'a' || c>'z') ? (c) : (c+'A'-'a'); - } - static inline char toLower( char c ) { - return (c<'A' || c>'Z') ? (c) : (c+'a'-'A'); - } - static inline bool isAlpha( char c ) { - return (c>='a' && c<='z') || (c>='A' && c<='Z'); - } - static inline bool isDigit( char c ) { - return c>='0' && c<='9'; - } - static inline bool isAlnum( char c ) { - return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9'); - } - -} - -int nv::strCmp(const char * s1, const char * s2) -{ - nvDebugCheck(s1 != NULL); - nvDebugCheck(s2 != NULL); - return strcmp(s1, s2); + static char * strAlloc(uint size) + { + return malloc(size); + } + + static char * strReAlloc(char * str, uint size) + { + return realloc(str, size); + } + + static void strFree(const char * str) + { + return free(str); + } + + /*static char * strDup( const char * str ) + { + nvDebugCheck( str != NULL ); + uint len = uint(strlen( str ) + 1); + char * dup = strAlloc( len ); + memcpy( dup, str, len ); + return dup; + }*/ + + // helper function for integer to string conversion. + static char * i2a( uint i, char *a, uint r ) + { + if( i / r > 0 ) { + a = i2a( i / r, a, r ); + } + *a = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % r]; + return a + 1; + } + + // Locale independent functions. + static inline char toUpper( char c ) { + return (c<'a' || c>'z') ? (c) : (c+'A'-'a'); + } + static inline char toLower( char c ) { + return (c<'A' || c>'Z') ? (c) : (c+'a'-'A'); + } + static inline bool isAlpha( char c ) { + return (c>='a' && c<='z') || (c>='A' && c<='Z'); + } + static inline bool isDigit( char c ) { + return c>='0' && c<='9'; + } + static inline bool isAlnum( char c ) { + return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9'); + } + +} + +uint nv::strLen(const char * str) +{ + nvDebugCheck(str != NULL); + return U32(strlen(str)); } -int nv::strCaseCmp(const char * s1, const char * s2) +int nv::strDiff(const char * s1, const char * s2) { - nvDebugCheck(s1 != NULL); - nvDebugCheck(s1 != NULL); + nvDebugCheck(s1 != NULL); + nvDebugCheck(s2 != NULL); + return strcmp(s1, s2); +} + +int nv::strCaseDiff(const char * s1, const char * s2) +{ + nvDebugCheck(s1 != NULL); + nvDebugCheck(s1 != NULL); #if NV_CC_MSVC - return _stricmp(s1, s2); + return _stricmp(s1, s2); #else - return strcasecmp(s1, s2); + return strcasecmp(s1, s2); #endif } -void nv::strCpy(char * dst, int size, const char * src) +bool nv::strEqual(const char * s1, const char * s2) +{ + if (s1 == s2) return true; + if (s1 == NULL || s2 == NULL) return false; + return strcmp(s1, s2) == 0; +} + +bool nv::strCaseEqual(const char * s1, const char * s2) { - nvDebugCheck(dst != NULL); - nvDebugCheck(src != NULL); + if (s1 == s2) return true; + if (s1 == NULL || s2 == NULL) return false; + return strCaseDiff(s1, s2) == 0; +} + +bool nv::strBeginsWith(const char * str, const char * prefix) +{ + //return strstr(str, prefix) == dst; + return strncmp(str, prefix, strlen(prefix)) == 0; +} + +bool nv::strEndsWith(const char * str, const char * suffix) +{ + uint ml = strLen(str); + uint sl = strLen(suffix); + if (ml < sl) return false; + return strncmp(str + ml - sl, suffix, sl) == 0; +} + +// @@ Add asserts to detect overlap between dst and src? +void nv::strCpy(char * dst, uint size, const char * src) +{ + nvDebugCheck(dst != NULL); + nvDebugCheck(src != NULL); #if NV_CC_MSVC && _MSC_VER >= 1400 - strcpy_s(dst, size, src); + strcpy_s(dst, size, src); #else - NV_UNUSED(size); - strcpy(dst, src); + NV_UNUSED(size); + strcpy(dst, src); #endif } -void nv::strCpy(char * dst, int size, const char * src, int len) +void nv::strCpy(char * dst, uint size, const char * src, uint len) { - nvDebugCheck(dst != NULL); - nvDebugCheck(src != NULL); + nvDebugCheck(dst != NULL); + nvDebugCheck(src != NULL); #if NV_CC_MSVC && _MSC_VER >= 1400 - strncpy_s(dst, size, src, len); + strncpy_s(dst, size, src, len); #else - NV_UNUSED(size); - strncpy(dst, src, len); + int n = min(len+1, size); + strncpy(dst, src, n); + dst[n-1] = '\0'; #endif } -void nv::strCat(char * dst, int size, const char * src) +void nv::strCat(char * dst, uint size, const char * src) { - nvDebugCheck(dst != NULL); - nvDebugCheck(src != NULL); + nvDebugCheck(dst != NULL); + nvDebugCheck(src != NULL); #if NV_CC_MSVC && _MSC_VER >= 1400 - strcat_s(dst, size, src); + strcat_s(dst, size, src); #else - NV_UNUSED(size); - strcat(dst, src); + NV_UNUSED(size); + strcat(dst, src); #endif } +NVCORE_API const char * nv::strSkipWhiteSpace(const char * str) +{ + nvDebugCheck(str != NULL); + while (*str == ' ') str++; + return str; +} + +NVCORE_API char * nv::strSkipWhiteSpace(char * str) +{ + nvDebugCheck(str != NULL); + while (*str == ' ') str++; + return str; +} + /** Pattern matching routine. I don't remember where did I get this. */ bool nv::strMatch(const char * str, const char * pat) { - nvDebugCheck(str != NULL); - nvDebugCheck(pat != NULL); + nvDebugCheck(str != NULL); + nvDebugCheck(pat != NULL); char c2; @@ -187,6 +235,13 @@ } } +bool nv::isNumber(const char * str) { + while(*str != '\0') { + if (!isDigit(*str)) return false; + str++; + } + return true; +} /** Empty string. */ @@ -195,313 +250,405 @@ } /** Preallocate space. */ -StringBuilder::StringBuilder( int size_hint ) : m_size(size_hint) +StringBuilder::StringBuilder( uint size_hint ) : m_size(size_hint) { - nvDebugCheck(m_size > 0); - m_str = strAlloc(m_size); - *m_str = '\0'; + nvDebugCheck(m_size > 0); + m_str = strAlloc(m_size); + *m_str = '\0'; } /** Copy ctor. */ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL) { - copy(s); + copy(s); } /** Copy string. */ -StringBuilder::StringBuilder( const char * s ) : m_size(0), m_str(NULL) +StringBuilder::StringBuilder(const char * s) : m_size(0), m_str(NULL) { - copy(s); + if (s != NULL) { + copy(s); + } +} + +/** Copy string. */ +StringBuilder::StringBuilder(const char * s, uint len) : m_size(0), m_str(NULL) +{ + copy(s, len); } /** Delete the string. */ StringBuilder::~StringBuilder() { - m_size = 0; - strFree(m_str); - m_str = NULL; + strFree(m_str); } /** Format a string safely. */ StringBuilder & StringBuilder::format( const char * fmt, ... ) { - nvDebugCheck(fmt != NULL); - va_list arg; - va_start( arg, fmt ); + nvDebugCheck(fmt != NULL); + va_list arg; + va_start( arg, fmt ); - format( fmt, arg ); + formatList( fmt, arg ); - va_end( arg ); + va_end( arg ); - return *this; + return *this; } /** Format a string safely. */ -StringBuilder & StringBuilder::format( const char * fmt, va_list arg ) +StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg ) { - nvDebugCheck(fmt != NULL); + nvDebugCheck(fmt != NULL); - if( m_size == 0 ) { - m_size = 64; - m_str = strAlloc( m_size ); - } + if (m_size == 0) { + m_size = 64; + m_str = strAlloc( m_size ); + } - va_list tmp; - va_copy(tmp, arg); + va_list tmp; + va_copy(tmp, arg); #if NV_CC_MSVC && _MSC_VER >= 1400 - int n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp); + int n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp); #else - int n = vsnprintf(m_str, m_size, fmt, tmp); + int n = vsnprintf(m_str, m_size, fmt, tmp); #endif - va_end(tmp); + va_end(tmp); - while( n < 0 || n >= int(m_size) ) { - if( n > -1 ) { - m_size = n + 1; - } - else { - m_size *= 2; - } + while( n < 0 || n >= int(m_size) ) { + if( n > -1 ) { + m_size = n + 1; + } + else { + m_size *= 2; + } - m_str = strReAlloc(m_str, m_size); + m_str = strReAlloc(m_str, m_size); - va_copy(tmp, arg); + va_copy(tmp, arg); #if NV_CC_MSVC && _MSC_VER >= 1400 - n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp); + n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp); #else - n = vsnprintf(m_str, m_size, fmt, tmp); + n = vsnprintf(m_str, m_size, fmt, tmp); #endif - va_end(tmp); - } - - nvDebugCheck(n < int(m_size)); - - // Make sure it's null terminated. - nvDebugCheck(m_str[n] == '\0'); - //str[n] = '\0'; + va_end(tmp); + } + + nvDebugCheck(n < int(m_size)); + + // Make sure it's null terminated. + nvDebugCheck(m_str[n] == '\0'); + //str[n] = '\0'; - return *this; + return *this; } /** Append a string. */ StringBuilder & StringBuilder::append( const char * s ) { - nvDebugCheck(s != NULL); + return append(s, U32(strlen( s ))); +} - const uint slen = uint(strlen( s )); - if( m_str == NULL ) { - m_size = slen + 1; - m_str = strAlloc(m_size); - strCpy( m_str, m_size, s ); - } - else { - - const uint len = uint(strlen( m_str )); - - if( m_size < len + slen + 1 ) { - m_size = len + slen + 1; - m_str = strReAlloc(m_str, m_size); - } - - strCat( m_str, m_size, s ); - } +/** Append a string. */ +StringBuilder & StringBuilder::append(const char * s, uint len) +{ + nvDebugCheck(s != NULL); + + uint offset = length(); + const uint size = offset + len + 1; + reserve(size); + strCpy(m_str + offset, len + 1, s, len); - return *this; + return *this; } /** Append a formatted string. */ -StringBuilder & StringBuilder::appendFormat( const char * format, ... ) +StringBuilder & StringBuilder::appendFormat( const char * fmt, ... ) { - nvDebugCheck( format != NULL ); + nvDebugCheck( fmt != NULL ); - va_list arg; - va_start( arg, format ); + va_list arg; + va_start( arg, fmt ); - appendFormat( format, arg ); + appendFormatList( fmt, arg ); - va_end( arg ); + va_end( arg ); - return *this; + return *this; } /** Append a formatted string. */ -StringBuilder & StringBuilder::appendFormat( const char * format, va_list arg ) +StringBuilder & StringBuilder::appendFormatList( const char * fmt, va_list arg ) { - nvDebugCheck( format != NULL ); - - va_list tmp; - va_copy(tmp, arg); - - StringBuilder tmp_str; - tmp_str.format( format, tmp ); - append( tmp_str ); - - va_end(tmp); + nvDebugCheck( fmt != NULL ); + + va_list tmp; + va_copy(tmp, arg); + + if (m_size == 0) { + formatList(fmt, arg); + } + else { + StringBuilder tmp_str; + tmp_str.formatList( fmt, tmp ); + append( tmp_str.str() ); + } + + va_end(tmp); + + return *this; +} + +// Append n spaces. +StringBuilder & StringBuilder::appendSpace(uint n) +{ + if (m_str == NULL) { + m_size = n + 1; + m_str = strAlloc(m_size); + memset(m_str, ' ', m_size); + m_str[n] = '\0'; + } + else { + const uint len = strLen(m_str); + if (m_size < len + n + 1) { + m_size = len + n + 1; + m_str = strReAlloc(m_str, m_size); + } + memset(m_str + len, ' ', n); + m_str[len+n] = '\0'; + } - return *this; + return *this; } /** Convert number to string in the given base. */ StringBuilder & StringBuilder::number( int i, int base ) { - nvCheck( base >= 2 ); - nvCheck( base <= 36 ); + nvCheck( base >= 2 ); + nvCheck( base <= 36 ); - // @@ This needs to be done correctly. - // length = floor(log(i, base)); - uint len = uint(log(float(i)) / log(float(base)) + 2); // one more if negative - reserve(len); - - if( i < 0 ) { - *m_str = '-'; - *i2a(uint(-i), m_str+1, base) = 0; - } - else { - *i2a(i, m_str, base) = 0; - } + // @@ This needs to be done correctly. + // length = floor(log(i, base)); + uint len = uint(log(float(i)) / log(float(base)) + 2); // one more if negative + reserve(len); + + if( i < 0 ) { + *m_str = '-'; + *i2a(uint(-i), m_str+1, base) = 0; + } + else { + *i2a(i, m_str, base) = 0; + } - return *this; + return *this; } /** Convert number to string in the given base. */ StringBuilder & StringBuilder::number( uint i, int base ) { - nvCheck( base >= 2 ); - nvCheck( base <= 36 ); + nvCheck( base >= 2 ); + nvCheck( base <= 36 ); - // @@ This needs to be done correctly. - // length = floor(log(i, base)); - uint len = uint(log(float(i)) / log(float(base)) - 0.5f + 1); - reserve(len); + // @@ This needs to be done correctly. + // length = floor(log(i, base)); + uint len = uint(log(float(i)) / log(float(base)) - 0.5f + 1); + reserve(len); - *i2a(i, m_str, base) = 0; + *i2a(i, m_str, base) = 0; - return *this; + return *this; } /** Resize the string preserving the contents. */ StringBuilder & StringBuilder::reserve( uint size_hint ) { - nvCheck(size_hint != 0); - if( size_hint > m_size ) { - m_str = strReAlloc(m_str, size_hint); - m_size = size_hint; - } - return *this; + nvCheck(size_hint != 0); + if (size_hint > m_size) { + m_str = strReAlloc(m_str, size_hint); + m_size = size_hint; + } + return *this; } /** Copy a string safely. */ -StringBuilder & StringBuilder::copy( const char * s ) +StringBuilder & StringBuilder::copy(const char * s) +{ + nvCheck( s != NULL ); + const uint str_size = uint(strlen( s )) + 1; + reserve(str_size); + memcpy(m_str, s, str_size); + return *this; +} + +/** Copy a string safely. */ +StringBuilder & StringBuilder::copy(const char * s, uint len) { - nvCheck( s != NULL ); - uint str_size = uint(strlen( s )) + 1; - reserve(str_size); - strCpy( m_str, str_size, s ); - return *this; + nvCheck( s != NULL ); + const uint str_size = len + 1; + reserve(str_size); + strCpy(m_str, str_size, s, len); + return *this; } /** Copy an StringBuilder. */ StringBuilder & StringBuilder::copy( const StringBuilder & s ) { - if( s.m_str == NULL ) { - nvCheck( s.m_size == 0 ); - m_size = 0; - strFree( m_str ); - m_str = NULL; - } - else { - reserve( s.m_size ); - strCpy( m_str, s.m_size, s.m_str ); - } - return *this; + if (s.m_str == NULL) { + nvCheck( s.m_size == 0 ); + reset(); + } + else { + reserve( s.m_size ); + strCpy( m_str, s.m_size, s.m_str ); + } + return *this; +} + +bool StringBuilder::endsWith(const char * str) const +{ + uint l = uint(strlen(str)); + uint ml = uint(strlen(m_str)); + if (ml < l) return false; + return strncmp(m_str + ml - l, str, l) == 0; +} + +bool StringBuilder::beginsWith(const char * str) const +{ + size_t l = strlen(str); + return strncmp(m_str, str, l) == 0; +} + +// Find given char starting from the end. +char * StringBuilder::reverseFind(char c) +{ + int length = (int)strlen(m_str) - 1; + while (length >= 0 && m_str[length] != c) { + length--; + } + if (length >= 0) { + return m_str + length; + } + else { + return NULL; + } } + /** Reset the string. */ void StringBuilder::reset() { - m_size = 0; - strFree( m_str ); - m_str = NULL; + m_size = 0; + strFree( m_str ); + m_str = NULL; +} + +/** Release the allocated string. */ +char * StringBuilder::release() +{ + char * str = m_str; + m_size = 0; + m_str = NULL; + return str; +} + +// Swap strings. +void nv::swap(StringBuilder & a, StringBuilder & b) { + swap(a.m_size, b.m_size); + swap(a.m_str, b.m_str); } /// Get the file name from a path. const char * Path::fileName() const { - return fileName(m_str); + return fileName(m_str); } /// Get the extension from a file path. const char * Path::extension() const { - return extension(m_str); + return extension(m_str); } +/*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) { + nvCheck(path != NULL); + + for (int i = 0;; i++) { + if (path[i] == '\0') break; + if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator; + } +} + /// Toggles path separators (ie. \\ into /). -void Path::translatePath() +void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/) { - nvCheck( m_str != NULL ); + nvCheck(!isNull()); + translatePath(m_str, pathSeparator); +} - for(int i = 0; ; i++) { - if( m_str[i] == '\0' ) break; -#if NV_PATH_SEPARATOR == '/' - if( m_str[i] == '\\' ) m_str[i] = NV_PATH_SEPARATOR; -#else - if( m_str[i] == '/' ) m_str[i] = NV_PATH_SEPARATOR; -#endif - } +void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/) +{ + nvCheck(!isNull()); + + const uint l = length(); + + if (m_str[l] != '\\' && m_str[l] != '/') { + char separatorString[] = { pathSeparator, '\0' }; + append(separatorString); + } } /** - * Strip the file name from a path. - * @warning path cannot end with '/' o '\\', can't it? - */ +* Strip the file name from a path. +* @warning path cannot end with '/' o '\\', can't it? +*/ void Path::stripFileName() { - nvCheck( m_str != NULL ); + nvCheck( m_str != NULL ); - int length = (int)strlen(m_str) - 1; - while (length > 0 && m_str[length] != '/' && m_str[length] != '\\'){ - length--; - } - if( length ) { - m_str[length+1] = 0; - } - else { - m_str[0] = 0; - } + int length = (int)strlen(m_str) - 1; + while (length > 0 && m_str[length] != '/' && m_str[length] != '\\'){ + length--; + } + if( length ) { + m_str[length+1] = 0; + } + else { + m_str[0] = 0; + } } /// Strip the extension from a path name. void Path::stripExtension() { - nvCheck( m_str != NULL ); - - int length = (int)strlen(m_str) - 1; - while( length > 0 && m_str[length] != '.' ) { - length--; - if( m_str[length] == NV_PATH_SEPARATOR ) { - return; // no extension - } - } - if( length ) { - m_str[length] = 0; - } + nvCheck( m_str != NULL ); + + int length = (int)strlen(m_str) - 1; + while (length > 0 && m_str[length] != '.') { + length--; + if( m_str[length] == NV_PATH_SEPARATOR ) { + return; // no extension + } + } + if (length > 0) { + m_str[length] = 0; + } } @@ -509,39 +656,39 @@ // static char Path::separator() { - return NV_PATH_SEPARATOR; + return NV_PATH_SEPARATOR; } // static const char * Path::fileName(const char * str) { - nvCheck( str != NULL ); + nvCheck( str != NULL ); - int length = (int)strlen(str) - 1; - while( length >= 0 && str[length] != separator() ) { - length--; - } + int length = (int)strlen(str) - 1; + while (length >= 0 && str[length] != '\\' && str[length] != '/') { + length--; + } - return &str[length+1]; + return &str[length+1]; } // static const char * Path::extension(const char * str) { - nvCheck( str != NULL ); + nvCheck( str != NULL ); - int length, l; - l = length = (int)strlen( str ); - while( length > 0 && str[length] != '.' ) { - length--; - if( str[length] == separator() ) { - return &str[l]; // no extension - } - } - if( length == 0 ) { - return &str[l]; - } - return &str[length]; + int length, l; + l = length = (int)strlen( str ); + while (length > 0 && str[length] != '.') { + length--; + if (str[length] == '\\' || str[length] == '/') { + return &str[l]; // no extension + } + } + if (length == 0) { + return &str[l]; + } + return &str[length]; } @@ -549,36 +696,77 @@ /// Clone this string String String::clone() const { - String str(data); - return str; + String str(data); + return str; } void String::setString(const char * str) { - if (str == NULL) { - data = NULL; - } - else { - allocString( str ); - addRef(); - } + if (str == NULL) { + data = NULL; + } + else { + allocString( str ); + addRef(); + } } -void String::setString(const char * str, int length) +void String::setString(const char * str, uint length) { - nvDebugCheck(str != NULL); + nvDebugCheck(str != NULL); - allocString(str, length); - addRef(); + allocString(str, length); + addRef(); } void String::setString(const StringBuilder & str) { - if (str.str() == NULL) { - data = NULL; - } - else { - allocString(str); - addRef(); - } + if (str.str() == NULL) { + data = NULL; + } + else { + allocString(str.str()); + addRef(); + } } + +// Add reference count. +void String::addRef() +{ + if (data != NULL) + { + setRefCount(getRefCount() + 1); + } +} + +// Decrease reference count. +void String::release() +{ + if (data != NULL) + { + const uint16 count = getRefCount(); + setRefCount(count - 1); + if (count - 1 == 0) { + free(data - 2); + data = NULL; + } + } +} + +void String::allocString(const char * str, uint len) +{ + const char * ptr = malloc(2 + len + 1); + + setData( ptr ); + setRefCount( 0 ); + + // Copy string. + strCpy(const_cast(data), len+1, str, len); + + // Add terminating character. + const_cast(data)[len] = '\0'; +} + +void nv::swap(String & a, String & b) { + swap(a.data, b.data); +} Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Stream.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Stream.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Stream.h @@ -1,160 +1,164 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño -#ifndef NVCORE_STREAM_H -#define NVCORE_STREAM_H +#pragma once +#ifndef NV_CORE_STREAM_H +#define NV_CORE_STREAM_H -#include -#include +#include "nvcore.h" +#include "Debug.h" namespace nv { -/// Base stream class. -class NVCORE_CLASS Stream { -public: - - enum ByteOrder { - LittleEndian = false, - BigEndian = true, - }; - - /// Get the byte order of the system. - static ByteOrder getSystemByteOrder() { -# if NV_LITTLE_ENDIAN - return LittleEndian; -# else - return BigEndian; -# endif - } - - - /// Ctor. - Stream() : m_byteOrder(LittleEndian) { } - - /// Virtual destructor. - virtual ~Stream() {} - - /// Set byte order. - void setByteOrder(ByteOrder bo) { m_byteOrder = bo; } - - /// Get byte order. - ByteOrder byteOrder() const { return m_byteOrder; } - - - /// Serialize the given data. - virtual uint serialize( void * data, uint len ) = 0; - - /// Move to the given position in the archive. - virtual void seek( uint pos ) = 0; - - /// Return the current position in the archive. - virtual uint tell() const = 0; - - /// Return the current size of the archive. - virtual uint size() const = 0; - - /// Determine if there has been any error. - virtual bool isError() const = 0; - - /// Clear errors. - virtual void clearError() = 0; - - /// Return true if the stream is at the end. - virtual bool isAtEnd() const = 0; - - /// Return true if the stream is seekable. - virtual bool isSeekable() const = 0; - - /// Return true if this is an input stream. - virtual bool isLoading() const = 0; - - /// Return true if this is an output stream. - virtual bool isSaving() const = 0; - - - // friends - friend Stream & operator<<( Stream & s, bool & c ) { -# if NV_OS_DARWIN - nvStaticCheck(sizeof(bool) == 4); - uint8 b = c ? 1 : 0; - s.serialize( &b, 1 ); - c = (b == 1); -# else - nvStaticCheck(sizeof(bool) == 1); - s.serialize( &c, 1 ); -# endif - return s; - } - friend Stream & operator<<( Stream & s, char & c ) { - nvStaticCheck(sizeof(char) == 1); - s.serialize( &c, 1 ); - return s; - } - friend Stream & operator<<( Stream & s, uint8 & c ) { - nvStaticCheck(sizeof(uint8) == 1); - s.serialize( &c, 1 ); - return s; - } - friend Stream & operator<<( Stream & s, int8 & c ) { - nvStaticCheck(sizeof(int8) == 1); - s.serialize( &c, 1 ); - return s; - } - friend Stream & operator<<( Stream & s, uint16 & c ) { - nvStaticCheck(sizeof(uint16) == 2); - return s.byteOrderSerialize( &c, 2 ); - } - friend Stream & operator<<( Stream & s, int16 & c ) { - nvStaticCheck(sizeof(int16) == 2); - return s.byteOrderSerialize( &c, 2 ); - } - friend Stream & operator<<( Stream & s, uint32 & c ) { - nvStaticCheck(sizeof(uint32) == 4); - return s.byteOrderSerialize( &c, 4 ); - } - friend Stream & operator<<( Stream & s, int32 & c ) { - nvStaticCheck(sizeof(int32) == 4); - return s.byteOrderSerialize( &c, 4 ); - } - friend Stream & operator<<( Stream & s, uint64 & c ) { - nvStaticCheck(sizeof(uint64) == 8); - return s.byteOrderSerialize( &c, 8 ); - } - friend Stream & operator<<( Stream & s, int64 & c ) { - nvStaticCheck(sizeof(int64) == 8); - return s.byteOrderSerialize( &c, 8 ); - } - friend Stream & operator<<( Stream & s, float & c ) { - nvStaticCheck(sizeof(float) == 4); - return s.byteOrderSerialize( &c, 4 ); - } - friend Stream & operator<<( Stream & s, double & c ) { - nvStaticCheck(sizeof(double) == 8); - return s.byteOrderSerialize( &c, 8 ); - } - -protected: - - /// Serialize in the stream byte order. - Stream & byteOrderSerialize( void * v, uint len ) { - if( m_byteOrder == getSystemByteOrder() ) { - serialize( v, len ); - } - else { - for( uint i = len; i > 0; i-- ) { - serialize( (uint8 *)v + i - 1, 1 ); - } - } - return *this; - } + /// Base stream class. + class NVCORE_CLASS Stream { + public: + + enum ByteOrder { + LittleEndian = false, + BigEndian = true, + }; + + /// Get the byte order of the system. + static ByteOrder getSystemByteOrder() { +#if NV_LITTLE_ENDIAN + return LittleEndian; +#else + return BigEndian; +#endif + } + + + /// Ctor. + Stream() : m_byteOrder(LittleEndian) { } + + /// Virtual destructor. + virtual ~Stream() {} + + /// Set byte order. + void setByteOrder(ByteOrder bo) { m_byteOrder = bo; } + + /// Get byte order. + ByteOrder byteOrder() const { return m_byteOrder; } + + + /// Serialize the given data. + virtual uint serialize( void * data, uint len ) = 0; + + /// Move to the given position in the archive. + virtual void seek( uint pos ) = 0; + + /// Return the current position in the archive. + virtual uint tell() const = 0; + + /// Return the current size of the archive. + virtual uint size() const = 0; + + /// Determine if there has been any error. + virtual bool isError() const = 0; + + /// Clear errors. + virtual void clearError() = 0; + + /// Return true if the stream is at the end. + virtual bool isAtEnd() const = 0; + + /// Return true if the stream is seekable. + virtual bool isSeekable() const = 0; + + /// Return true if this is an input stream. + virtual bool isLoading() const = 0; + + /// Return true if this is an output stream. + virtual bool isSaving() const = 0; + + + void advance(uint offset) { seek(tell() + offset); } + + + // friends + friend Stream & operator<<( Stream & s, bool & c ) { +#if NV_OS_DARWIN && !NV_CC_CPP11 + nvStaticCheck(sizeof(bool) == 4); + uint8 b = c ? 1 : 0; + s.serialize( &b, 1 ); + c = (b == 1); +#else + nvStaticCheck(sizeof(bool) == 1); + s.serialize( &c, 1 ); +#endif + return s; + } + friend Stream & operator<<( Stream & s, char & c ) { + nvStaticCheck(sizeof(char) == 1); + s.serialize( &c, 1 ); + return s; + } + friend Stream & operator<<( Stream & s, uint8 & c ) { + nvStaticCheck(sizeof(uint8) == 1); + s.serialize( &c, 1 ); + return s; + } + friend Stream & operator<<( Stream & s, int8 & c ) { + nvStaticCheck(sizeof(int8) == 1); + s.serialize( &c, 1 ); + return s; + } + friend Stream & operator<<( Stream & s, uint16 & c ) { + nvStaticCheck(sizeof(uint16) == 2); + return s.byteOrderSerialize( &c, 2 ); + } + friend Stream & operator<<( Stream & s, int16 & c ) { + nvStaticCheck(sizeof(int16) == 2); + return s.byteOrderSerialize( &c, 2 ); + } + friend Stream & operator<<( Stream & s, uint32 & c ) { + nvStaticCheck(sizeof(uint32) == 4); + return s.byteOrderSerialize( &c, 4 ); + } + friend Stream & operator<<( Stream & s, int32 & c ) { + nvStaticCheck(sizeof(int32) == 4); + return s.byteOrderSerialize( &c, 4 ); + } + friend Stream & operator<<( Stream & s, uint64 & c ) { + nvStaticCheck(sizeof(uint64) == 8); + return s.byteOrderSerialize( &c, 8 ); + } + friend Stream & operator<<( Stream & s, int64 & c ) { + nvStaticCheck(sizeof(int64) == 8); + return s.byteOrderSerialize( &c, 8 ); + } + friend Stream & operator<<( Stream & s, float & c ) { + nvStaticCheck(sizeof(float) == 4); + return s.byteOrderSerialize( &c, 4 ); + } + friend Stream & operator<<( Stream & s, double & c ) { + nvStaticCheck(sizeof(double) == 8); + return s.byteOrderSerialize( &c, 8 ); + } + + protected: + + /// Serialize in the stream byte order. + Stream & byteOrderSerialize( void * v, uint len ) { + if( m_byteOrder == getSystemByteOrder() ) { + serialize( v, len ); + } + else { + for( uint i = len; i > 0; i-- ) { + serialize( (uint8 *)v + i - 1, 1 ); + } + } + return *this; + } -private: + private: - ByteOrder m_byteOrder; + ByteOrder m_byteOrder; -}; + }; } // nv namespace -#endif // NV_STREAM_H +#endif // NV_CORE_STREAM_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/TextReader.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/TextReader.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/TextReader.h @@ -1,38 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NVCORE_TEXTREADER_H -#define NVCORE_TEXTREADER_H - -#include -#include -#include - -namespace nv -{ - -/// Text reader. -class NVCORE_CLASS TextReader { -public: - - /// Ctor. - TextReader(Stream * stream) : m_stream(stream), m_text(512) { - nvCheck(stream != NULL); - nvCheck(stream->isLoading()); - } - - char peek(); - char read(); - - const char *readToEnd(); - - // Returns a temporary string. - const char * readLine(); - -private: - Stream * m_stream; - Array m_text; -}; - -} // nv namespace - -#endif // NVCORE_TEXTREADER_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/TextReader.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/TextReader.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/TextReader.cpp @@ -1,86 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include - -using namespace nv; - -/// Peek next character. -char TextReader::peek() -{ - nvDebugCheck(m_stream != NULL); - nvDebugCheck(m_stream->isSeekable()); - - if (m_stream->isAtEnd()) { - return 0; - } - - uint pos = m_stream->tell(); - - char c; - m_stream->serialize(&c, 1); - m_stream->seek(pos); - return c; -} - -/// Read a single char. -char TextReader::read() -{ - nvDebugCheck(m_stream != NULL); - - char c; - m_stream->serialize(&c, 1); - - if( m_stream->isAtEnd() ) { - return 0; - } - - return c; -} - -/// Read from the current location to the end of the stream. -const char * TextReader::readToEnd() -{ - nvDebugCheck(m_stream != NULL); - const int size = m_stream->size(); - - m_text.clear(); - - m_text.reserve(size + 1); - m_text.resize(size); - - m_stream->serialize(m_text.unsecureBuffer(), size); - m_text.pushBack('\0'); - - return m_text.buffer(); -} - -/// Read from the current location to the end of the line. -const char * TextReader::readLine() -{ - m_text.clear(); - - if (m_stream->isAtEnd()) { - return NULL; - } - - while (true) { - char c = read(); - - if (c == 0 || c == '\n') { - break; - } - else if (c == '\r') { - if( peek() == '\n' ) { - read(); - } - break; - } - - m_text.pushBack(c); - } - - m_text.pushBack('\0'); - return m_text.buffer(); -} - - Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.h @@ -1,65 +1,62 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño +#pragma once #ifndef NVCORE_TEXTWRITER_H #define NVCORE_TEXTWRITER_H -#include -#include -#include +#include "nvcore.h" +#include "Stream.h" +#include "StrLib.h" namespace nv { - /// Text writer. - class NVCORE_CLASS TextWriter - { - public: - - TextWriter(Stream * s); - - void writeString(const char * str); - void writeString(const char * str, uint len); - void write(const char * format, ...) __attribute__((format (printf, 2, 3))); - void write(const char * format, va_list arg); - - private: - - Stream * s; - - // Temporary string. - StringBuilder str; - - }; - - - inline TextWriter & operator<<( TextWriter & tw, int i) - { - tw.write("%d", i); - return tw; - } - - inline TextWriter & operator<<( TextWriter & tw, uint i) - { - tw.write("%u", i); - return tw; - } - - inline TextWriter & operator<<( TextWriter & tw, float f) - { - tw.write("%f", f); - return tw; - } - - inline TextWriter & operator<<( TextWriter & tw, const char * str) - { - tw.writeString(str); - return tw; - } + /// Text writer. + class NVCORE_CLASS TextWriter + { + public: + + TextWriter(Stream * s); + + void writeString(const char * str); + void writeString(const char * str, uint len); + void format(const char * format, ...) __attribute__((format (printf, 2, 3))); + void formatList(const char * format, va_list arg); + + private: + + Stream * s; + + // Temporary string. + StringBuilder str; + + }; + + + inline TextWriter & operator<<( TextWriter & tw, int i) + { + tw.format("%d", i); + return tw; + } + + inline TextWriter & operator<<( TextWriter & tw, uint i) + { + tw.format("%u", i); + return tw; + } + + inline TextWriter & operator<<( TextWriter & tw, float f) + { + tw.format("%f", f); + return tw; + } + + inline TextWriter & operator<<( TextWriter & tw, const char * str) + { + tw.writeString(str); + return tw; + } } // nv namespace - - - - #endif // NVCORE_TEXTWRITER_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.cpp @@ -1,45 +1,45 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño -#include +#include "TextWriter.h" using namespace nv; /// Constructor TextWriter::TextWriter(Stream * s) : - s(s), - str(1024) + s(s), + str(1024) { - nvCheck(s != NULL); - nvCheck(s->isSaving()); + nvCheck(s != NULL); + nvCheck(s->isSaving()); } void TextWriter::writeString(const char * str) { - nvDebugCheck(s != NULL); - s->serialize(const_cast(str), (int)strlen(str)); + nvDebugCheck(s != NULL); + s->serialize(const_cast(str), strLen(str)); } void TextWriter::writeString(const char * str, uint len) { - nvDebugCheck(s != NULL); - s->serialize(const_cast(str), len); + nvDebugCheck(s != NULL); + s->serialize(const_cast(str), len); } -void TextWriter::write(const char * format, ...) +void TextWriter::format(const char * format, ...) { - va_list arg; - va_start(arg,format); - str.format(format, arg); - writeString(str.str(), str.length()); - va_end(arg); + va_list arg; + va_start(arg,format); + str.formatList(format, arg); + writeString(str.str(), str.length()); + va_end(arg); } -void TextWriter::write(const char * format, va_list arg) +void TextWriter::formatList(const char * format, va_list arg) { - va_list tmp; - va_copy(tmp, arg); - str.format(format, arg); - writeString(str.str(), str.length()); - va_end(tmp); + va_list tmp; + va_copy(tmp, arg); + str.formatList(format, arg); + writeString(str.str(), str.length()); + va_end(tmp); } Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Timer.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Timer.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Timer.h @@ -0,0 +1,53 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_CORE_TIMER_H +#define NV_CORE_TIMER_H + +#include "nvcore.h" + +#if NV_CC_MSVC +#include +#endif + +namespace nv { + +#if NV_CC_MSVC + NV_FORCEINLINE uint64 fastCpuClock() { return __rdtsc(); } +#elif NV_CC_GNUC && NV_CPU_X86 + NV_FORCEINLINE uint64 fastCpuClock() { + uint64 val; + __asm__ volatile (".byte 0x0f, 0x31" : "=A" (val)); + return val; + } +#elif NV_CC_GNUC && NV_CPU_X86_64 + NV_FORCEINLINE uint64 fastCpuClock() { + uint hi, lo; + __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); + return uint64(lo) | (uint64(hi) << 32); + } +#else + NV_FORCEINLINE uint64 fastCpuClock() { return 0; } +#endif + + uint64 systemClockFrequency(); + uint64 systemClock(); + + class NVCORE_CLASS Timer + { + public: + Timer() {} + + void start() { m_start = systemClock(); } + void stop() { m_stop = systemClock(); } + + float elapsed() const { return float(m_stop - m_start) / systemClockFrequency(); } + + private: + uint64 m_start; + uint64 m_stop; + }; + +} // nv namespace + +#endif // NV_CORE_TIMER_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Timer.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Timer.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Timer.cpp @@ -0,0 +1,44 @@ +// This code is in the public domain -- castano@gmail.com + +#include "Timer.h" + +using namespace nv; + + +#if NV_OS_WIN32 + +#define WINDOWS_LEAN_AND_MEAN +#define VC_EXTRALEAN +#define NOMINMAX +#include // QueryPerformanceFrequency, QueryPerformanceCounter + + +uint64 nv::systemClockFrequency() +{ + uint64 frequency; + QueryPerformanceFrequency((LARGE_INTEGER*) &frequency); + return frequency; +} + +uint64 nv::systemClock() +{ + uint64 counter; + QueryPerformanceCounter((LARGE_INTEGER*) &counter); + return counter; +} + +#else + +#include // clock + +uint64 nv::systemClockFrequency() +{ + return CLOCKS_PER_SEC; +} + +uint64 nv::systemClock() +{ + return clock(); +} + +#endif Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Tokenizer.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Tokenizer.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Tokenizer.h @@ -1,99 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_CORE_TOKENIZER_H -#define NV_CORE_TOKENIZER_H - -#include -#include -#include -#include - -namespace nv -{ - /// A token produced by the Tokenizer. - class NVCORE_CLASS Token - { - public: - Token(); - Token(const Token & token); - Token(const char * str, int len); - - bool operator==(const char * str) const; - bool operator!=(const char * str) const; - - bool isNull(); - - float toFloat() const; - int toInt() const; - uint toUnsignedInt() const; - String toString() const; - - bool parse(const char * format, int count, ...) const __attribute__((format (scanf, 2, 4))); - - private: - const char * m_str; - int m_len; - }; - - /// Exception thrown by the tokenizer. - class TokenizerException - { - public: - TokenizerException(int line, int column) : m_line(line), m_column(column) {} - - int line() const { return m_line; } - int column() const { return m_column; } - - private: - int m_line; - int m_column; - }; - - // @@ Use enums instead of bools for clarity! - //enum SkipEmptyLines { skipEmptyLines, noSkipEmptyLines }; - //enum SkipEndOfLine { skipEndOfLine, noSkipEndOfLine }; - - /// A simple stream tokenizer. - class NVCORE_CLASS Tokenizer - { - public: - Tokenizer(Stream * stream); - - bool nextLine(bool skipEmptyLines = true); - bool nextToken(bool skipEndOfLine = false); - - const Token & token() const { return m_token; } - - int lineNumber() const { return m_lineNumber; } - int columnNumber() const { return m_columnNumber; } - - void setDelimiters(const char * str) { m_delimiters = str; } - const char * delimiters() const { return m_delimiters; } - - void setSpaces(const char * str) { m_spaces = str; } - const char * spaces() const { return m_spaces; } - - private: - char readChar(); - bool readLine(); - bool readToken(); - void skipSpaces(); - bool isSpace(char c); - bool isDelimiter(char c); - - private: - TextReader m_reader; - const char * m_line; - Token m_token; - - int m_lineNumber; - int m_columnNumber; - - const char * m_delimiters; - const char * m_spaces; - }; - -} // nv namespace - - -#endif // NV_CORE_TOKENIZER_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Tokenizer.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Tokenizer.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Tokenizer.cpp @@ -1,229 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include -#include - -#include // vsscanf -#include // va_list -#include // atof, atoi - -#if NV_CC_MSVC -#if 0 // This doesn't work on MSVC for x64 -/* vsscanf for Win32 - * Written 5/2003 by - * This code is in the Public Domain - */ - -#include // alloca -//#include - -static int vsscanf(const char * buffer, const char * format, va_list argPtr) -{ - // Get an upper bound for the # of args - size_t count = 0; - const char *p = format; - while(1) { - char c = *(p++); - if(c==0) break; - if(c=='%' && (p[0]!='*' && p[0]!='%')) ++count; - } - - // Make a local stack - size_t stackSize = (2+count)*sizeof(void*); - void **newStack = (void**)alloca(stackSize); - - // Fill local stack the way sscanf likes it - newStack[0] = (void*)buffer; - newStack[1] = (void*)format; - memcpy(newStack+2, argPtr, count*sizeof(void*)); - - // @@ Use: CALL DWORD PTR [sscanf] - - // Warp into system sscanf with new stack - int result; - void *savedESP; - __asm - { - mov savedESP, esp - mov esp, newStack -#if _MSC_VER >= 1400 - call DWORD PTR [sscanf_s] -#else - call DWORD PTR [sscanf] -#endif - mov esp, savedESP - mov result, eax - } - return result; -} -#endif -#endif - -using namespace nv; - -Token::Token() : - m_str(""), m_len(0) -{ -} - -Token::Token(const Token & token) : - m_str(token.m_str), m_len(token.m_len) -{ -} - -Token::Token(const char * str, int len) : - m_str(str), m_len(len) -{ -} - -bool Token::operator==(const char * str) const -{ - return strncmp(m_str, str, m_len) == 0; -} -bool Token::operator!=(const char * str) const -{ - return strncmp(m_str, str, m_len) != 0; -} - -bool Token::isNull() -{ - return m_len != 0; -} - -float Token::toFloat() const -{ - return float(atof(m_str)); -} - -int Token::toInt() const -{ - return atoi(m_str); -} - -uint Token::toUnsignedInt() const -{ - // @@ TBD - return uint(atoi(m_str)); -} - -String Token::toString() const -{ - return String(m_str, m_len); -} - -bool Token::parse(const char * format, int count, ...) const -{ - va_list arg; - va_start(arg, count); - - int readCount = vsscanf(m_str, format, arg); - - va_end(arg); - - return readCount == count; -} - - -Tokenizer::Tokenizer(Stream * stream) : - m_reader(stream), m_lineNumber(0), m_columnNumber(0), m_delimiters("{}()="), m_spaces(" \t") -{ -} - -bool Tokenizer::nextLine(bool skipEmptyLines /*= true*/) -{ - do { - if (!readLine()) { - return false; - } - } - while (!readToken() && skipEmptyLines); - - return true; -} - -bool Tokenizer::nextToken(bool skipEndOfLine /*= false*/) -{ - if (!readToken()) { - if (!skipEndOfLine) { - return false; - } - else { - return nextLine(true); - } - } - return true; -} - -bool Tokenizer::readToken() -{ - skipSpaces(); - - const char * begin = m_line + m_columnNumber; - - if (*begin == '\0') { - return false; - } - - char c = readChar(); - if (isDelimiter(c)) { - m_token = Token(begin, 1); - return true; - } - - // @@ Add support for quoted tokens "", '' - - int len = 0; - while (!isDelimiter(c) && !isSpace(c) && c != '\0') { - c = readChar(); - len++; - } - m_columnNumber--; - - m_token = Token(begin, len); - - return true; -} - -char Tokenizer::readChar() -{ - return m_line[m_columnNumber++]; -} - -bool Tokenizer::readLine() -{ - m_lineNumber++; - m_columnNumber = 0; - m_line = m_reader.readLine(); - return m_line != NULL; -} - -void Tokenizer::skipSpaces() -{ - while (isSpace(readChar())) {} - m_columnNumber--; -} - -bool Tokenizer::isSpace(char c) -{ - uint i = 0; - while (m_spaces[i] != '\0') { - if (c == m_spaces[i]) { - return true; - } - i++; - } - return false; -} - -bool Tokenizer::isDelimiter(char c) -{ - uint i = 0; - while (m_delimiters[i] != '\0') { - if (c == m_delimiters[i]) { - return true; - } - i++; - } - return false; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Utils.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Utils.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Utils.h @@ -0,0 +1,282 @@ +// This code is in the public domain -- Ignacio Castaño + +#pragma once +#ifndef NV_CORE_UTILS_H +#define NV_CORE_UTILS_H + +#include "Debug.h" // nvDebugCheck + +#include // for placement new + + +// Just in case. Grrr. +#undef min +#undef max + +#define NV_INT8_MIN (-128) +#define NV_INT8_MAX 127 +#define NV_UINT8_MAX 255 +#define NV_INT16_MIN (-32767-1) +#define NV_INT16_MAX 32767 +#define NV_UINT16_MAX 0xffff +#define NV_INT32_MIN (-2147483647-1) +#define NV_INT32_MAX 2147483647 +#define NV_UINT32_MAX 0xffffffff +#define NV_INT64_MAX POSH_I64(9223372036854775807) +#define NV_INT64_MIN (-POSH_I64(9223372036854775807)-1) +#define NV_UINT64_MAX POSH_U64(0xffffffffffffffff) + +#define NV_HALF_MAX 65504.0F +#define NV_FLOAT_MAX 3.402823466e+38F + +#define NV_INTEGER_TO_FLOAT_MAX 16777217 // Largest integer such that it and all smaller integers can be stored in a 32bit float. + + +namespace nv +{ + // Less error prone than casting. From CB: + // http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html + + // These intentionally look like casts. + + // uint32 casts: + template inline uint32 U32(T x) { return x; } + template <> inline uint32 U32(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; } + template <> inline uint32 U32(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; } + //template <> inline uint32 U32(uint32 x) { return x; } + template <> inline uint32 U32(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; } + //template <> inline uint32 U32(uint16 x) { return x; } + template <> inline uint32 U32(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; } + //template <> inline uint32 U32(uint8 x) { return x; } + template <> inline uint32 U32(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; } + + // int32 casts: + template inline int32 I32(T x) { return x; } + template <> inline int32 I32(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; } + template <> inline int32 I32(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; } + template <> inline int32 I32(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; } + //template <> inline int32 I32(int32 x) { return x; } + //template <> inline int32 I32(uint16 x) { return x; } + //template <> inline int32 I32(int16 x) { return x; } + //template <> inline int32 I32(uint8 x) { return x; } + //template <> inline int32 I32(int8 x) { return x; } + + // uint16 casts: + template inline uint16 U16(T x) { return x; } + template <> inline uint16 U16(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; } + template <> inline uint16 U16(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; } + template <> inline uint16 U16(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; } + template <> inline uint16 U16(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; } + //template <> inline uint16 U16(uint16 x) { return x; } + template <> inline uint16 U16(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; } + //template <> inline uint16 U16(uint8 x) { return x; } + template <> inline uint16 U16(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; } + + // int16 casts: + template inline int16 I16(T x) { return x; } + template <> inline int16 I16(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; } + template <> inline int16 I16(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; } + template <> inline int16 I16(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; } + template <> inline int16 I16(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; } + template <> inline int16 I16(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; } + //template <> inline int16 I16(int16 x) { return x; } + //template <> inline int16 I16(uint8 x) { return x; } + //template <> inline int16 I16(int8 x) { return x; } + + // uint8 casts: + template inline uint8 U8(T x) { return x; } + template <> inline uint8 U8(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; } + template <> inline uint8 U8(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; } + template <> inline uint8 U8(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; } + template <> inline uint8 U8(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; } + template <> inline uint8 U8(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; } + template <> inline uint8 U8(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; } + //template <> inline uint8 U8(uint8 x) { return x; } + template <> inline uint8 U8(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; } + //template <> inline uint8 U8(int8 x) { nvDebugCheck(x >= 0.0f && x <= 255.0f); return (uint8)x; } + + // int8 casts: + template inline int8 I8(T x) { return x; } + template <> inline int8 I8(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; } + template <> inline int8 I8(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; } + template <> inline int8 I8(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; } + template <> inline int8 I8(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; } + template <> inline int8 I8(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; } + template <> inline int8 I8(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; } + template <> inline int8 I8(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; } + //template <> inline int8 I8(int8 x) { return x; } + + // float casts: + template inline float F32(T x) { return x; } + template <> inline float F32(uint64 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; } + template <> inline float F32(int64 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; } + template <> inline float F32(uint32 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; } + template <> inline float F32(int32 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; } + // The compiler should not complain about these conversions: + //template <> inline float F32(uint16 x) { nvDebugCheck(return (float)x; } + //template <> inline float F32(int16 x) { nvDebugCheck(return (float)x; } + //template <> inline float F32(uint8 x) { nvDebugCheck(return (float)x; } + //template <> inline float F32(int8 x) { nvDebugCheck(return (float)x; } + + + /// Swap two values. + template + inline void swap(T & a, T & b) + { + T temp(a); + a = b; + b = temp; + } + + /// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN. + template + //inline const T & max(const T & a, const T & b) + inline T max(const T & a, const T & b) + { + return (b < a) ? a : b; + } + + /// Return the maximum of the four arguments. + template + //inline const T & max4(const T & a, const T & b, const T & c) + inline T max4(const T & a, const T & b, const T & c, const T & d) + { + return max(max(a, b), max(c, d)); + } + + /// Return the maximum of the three arguments. + template + //inline const T & max3(const T & a, const T & b, const T & c) + inline T max3(const T & a, const T & b, const T & c) + { + return max(a, max(b, c)); + } + + /// Return the minimum of two values. + template + //inline const T & min(const T & a, const T & b) + inline T min(const T & a, const T & b) + { + return (a < b) ? a : b; + } + + /// Return the maximum of the three arguments. + template + //inline const T & min3(const T & a, const T & b, const T & c) + inline T min3(const T & a, const T & b, const T & c) + { + return min(a, min(b, c)); + } + + /// Clamp between two values. + template + //inline const T & clamp(const T & x, const T & a, const T & b) + inline T clamp(const T & x, const T & a, const T & b) + { + return min(max(x, a), b); + } + + /** Return the next power of two. + * @see http://graphics.stanford.edu/~seander/bithacks.html + * @warning Behaviour for 0 is undefined. + * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x + * @note nextPowerOfTwo(x) = 2 << log2(x-1) + */ + inline uint nextPowerOfTwo( uint x ) + { + nvDebugCheck( x != 0 ); +#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction. + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return x+1; +#else + uint p = 1; + while( x > p ) { + p += p; + } + return p; +#endif + } + + /// Return true if @a n is a power of two. + inline bool isPowerOfTwo( uint n ) + { + return (n & (n-1)) == 0; + } + + + // @@ Move this to utils? + /// Delete all the elements of a container. + template + void deleteAll(T & container) + { + for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i)) + { + delete container[i]; + } + } + + + + // @@ Specialize these methods for numeric, pointer, and pod types. + + template + void construct_range(T * restrict ptr, uint new_size, uint old_size) { + for (uint i = old_size; i < new_size; i++) { + new(ptr+i) T; // placement new + } + } + + template + void construct_range(T * restrict ptr, uint new_size, uint old_size, const T & elem) { + for (uint i = old_size; i < new_size; i++) { + new(ptr+i) T(elem); // placement new + } + } + + template + void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) { + for (uint i = old_size; i < new_size; i++) { + new(ptr+i) T(src[i]); // placement new + } + } + + template + void destroy_range(T * restrict ptr, uint new_size, uint old_size) { + for (uint i = new_size; i < old_size; i++) { + (ptr+i)->~T(); // Explicit call to the destructor + } + } + + template + void fill(T * restrict dst, uint count, const T & value) { + for (uint i = 0; i < count; i++) { + dst[i] = value; + } + } + + template + void copy_range(T * restrict dst, const T * restrict src, uint count) { + for (uint i = 0; i < count; i++) { + dst[i] = src[i]; + } + } + + template + bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) { + for (uint i = begin; i < end; i++) { + if (ptr[i] == element) { + if (index != NULL) *index = i; + return true; + } + } + return false; + } + +} // nv namespace + +#endif // NV_CORE_UTILS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h @@ -1,11 +1,9 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño +#pragma once #ifndef NV_CORE_H #define NV_CORE_H -// cmake config -#include - // Function linkage #if NVCORE_SHARED #ifdef NVCORE_EXPORTS @@ -22,7 +20,7 @@ // Platform definitions -#include "poshlib/posh.h" +#include // OS: // NV_OS_WIN32 @@ -32,34 +30,64 @@ // NV_OS_LINUX // NV_OS_UNIX // NV_OS_DARWIN +// NV_OS_XBOX +// NV_OS_ORBIS +// NV_OS_IOS -#define NV_OS_STRING POSH_OS_STRING +#define NV_OS_STRING POSH_OS_STRING #if defined POSH_OS_LINUX -# define NV_OS_LINUX 1 -# define NV_OS_UNIX 1 +# define NV_OS_LINUX 1 +# define NV_OS_UNIX 1 +#elif defined POSH_OS_ORBIS +# define NV_OS_ORBIS 1 #elif defined POSH_OS_FREEBSD -# define NV_OS_FREEBSD 1 -# define NV_OS_UNIX 1 +# define NV_OS_FREEBSD 1 +# define NV_OS_UNIX 1 +#elif defined POSH_OS_NETBSD +# define NV_OS_NETBSD 1 +# define NV_OS_UNIX 1 #elif defined POSH_OS_OPENBSD -# define NV_OS_OPENBSD 1 -# define NV_OS_UNIX 1 +# define NV_OS_OPENBSD 1 +# define NV_OS_UNIX 1 #elif defined POSH_OS_CYGWIN32 -# define NV_OS_CYGWIN 1 +# define NV_OS_CYGWIN 1 #elif defined POSH_OS_MINGW -# define NV_OS_MINGW 1 -# define NV_OS_WIN32 1 +# define NV_OS_MINGW 1 +# define NV_OS_WIN32 1 #elif defined POSH_OS_OSX -# define NV_OS_DARWIN 1 -# define NV_OS_UNIX 1 +# define NV_OS_DARWIN 1 +# define NV_OS_UNIX 1 +#elif defined POSH_OS_IOS +# define NV_OS_DARWIN 1 //ACS should we keep this on IOS? +# define NV_OS_UNIX 1 +# define NV_OS_IOS 1 #elif defined POSH_OS_UNIX -# define NV_OS_UNIX 1 -#elif defined POSH_OS_WIN32 -# define NV_OS_WIN32 1 +# define NV_OS_UNIX 1 #elif defined POSH_OS_WIN64 -# define NV_OS_WIN64 1 +# define NV_OS_WIN32 1 +# define NV_OS_WIN64 1 +#elif defined POSH_OS_WIN32 +# define NV_OS_WIN32 1 +#elif defined POSH_OS_XBOX +# define NV_OS_XBOX 1 #else -# error "Unsupported OS" +# error "Unsupported OS" +#endif + + +// Threading: +// some platforms don't implement __thread or similar for thread-local-storage +#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios? +# define NV_OS_USE_PTHREAD 1 +# if NV_OS_DARWIN || NV_OS_IOS +# define NV_OS_HAS_TLS_QUALIFIER 0 +# else +# define NV_OS_HAS_TLS_QUALIFIER 1 +# endif +#else +# define NV_OS_USE_PTHREAD 0 +# define NV_OS_HAS_TLS_QUALIFIER 1 #endif @@ -70,45 +98,71 @@ // NV_CPU_ARM // NV_CPU_AARCH64 -#define NV_CPU_STRING POSH_CPU_STRING +#define NV_CPU_STRING POSH_CPU_STRING #if defined POSH_CPU_X86_64 -# define NV_CPU_X86_64 1 +//# define NV_CPU_X86 1 +# define NV_CPU_X86_64 1 #elif defined POSH_CPU_X86 -# define NV_CPU_X86 1 +# define NV_CPU_X86 1 #elif defined POSH_CPU_PPC -# define NV_CPU_PPC 1 +# define NV_CPU_PPC 1 #elif defined POSH_CPU_STRONGARM -# define NV_CPU_ARM 1 +# define NV_CPU_ARM 1 #elif defined POSH_CPU_AARCH64 -# define NV_CPU_AARCH64 1 +# define NV_CPU_AARCH64 1 #else -# error "Unsupported CPU" +# error "Unsupported CPU" #endif // Compiler: // NV_CC_GNUC // NV_CC_MSVC -// @@ NV_CC_MSVC6 -// @@ NV_CC_MSVC7 -// @@ NV_CC_MSVC8 - -#if defined POSH_COMPILER_GCC -# define NV_CC_GNUC 1 -# define NV_CC_STRING "gcc" +// NV_CC_CLANG + +#if defined POSH_COMPILER_CLANG +# define NV_CC_CLANG 1 +# define NV_CC_GNUC 1 // Clang is compatible with GCC. +# define NV_CC_STRING "clang" +#elif defined POSH_COMPILER_GCC +# define NV_CC_GNUC 1 +# define NV_CC_STRING "gcc" #elif defined POSH_COMPILER_MSVC -# define NV_CC_MSVC 1 -# define NV_CC_STRING "msvc" +# define NV_CC_MSVC 1 +# define NV_CC_STRING "msvc" #else -# error "Unsupported compiler" +# error "Unsupported compiler" #endif +#if NV_CC_MSVC +#define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet. +#else +// @@ IC: This works in CLANG, about GCC? +// @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4. +#ifdef __clang__ +#define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert)) +#elif defined __GNUC__ +#define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) +#endif +#endif // Endiannes: -#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN -#define NV_BIG_ENDIAN POSH_BIG_ENDIAN -#define NV_ENDIAN_STRING POSH_ENDIAN_STRING +#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN +#define NV_BIG_ENDIAN POSH_BIG_ENDIAN +#define NV_ENDIAN_STRING POSH_ENDIAN_STRING + + +// Define the right printf prefix for size_t arguments: +#if POSH_64BIT_POINTER +# define NV_SIZET_PRINTF_PREFIX POSH_I64_PRINTF_PREFIX +#else +# define NV_SIZET_PRINTF_PREFIX +#endif + + +// cmake config +#include "nvconfig.h" // Type definitions: @@ -130,72 +184,124 @@ // Version string: #define NV_VERSION_STRING \ - NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \ - NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__ + NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \ + NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__ -/// Disable copy constructor and assignment operator. -/// @hideinitializer +// Disable copy constructor and assignment operator. +#if NV_CC_CPP11 +#define NV_FORBID_COPY(C) \ + C( const C & ) = delete; \ + C &operator=( const C & ) = delete +#else #define NV_FORBID_COPY(C) \ private: \ C( const C & ); \ - C &operator=( const C & ); - + C &operator=( const C & ) +#endif -/// Disable dynamic allocation on the heap. -/// See Prohibiting Heap-Based Objects in More Effective C++. -/// @hideinitializer +// Disable dynamic allocation on the heap. +// See Prohibiting Heap-Based Objects in More Effective C++. #define NV_FORBID_HEAPALLOC() \ - private: \ - static void *operator new(size_t size); \ - static void *operator new[](size_t size); + private: \ + void *operator new(size_t size); \ + void *operator new[](size_t size) + //static void *operator new(size_t size); \ + //static void *operator new[](size_t size); // String concatenation macros. #define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2) #define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2 #define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3) #define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3 +#define NV_STRING2(x) #x +#define NV_STRING(x) NV_STRING2(x) + +#if NV_CC_MSVC +#define NV_MULTI_LINE_MACRO_BEGIN do { +#define NV_MULTI_LINE_MACRO_END \ + __pragma(warning(push)) \ + __pragma(warning(disable:4127)) \ + } while(false) \ + __pragma(warning(pop)) +#else +#define NV_MULTI_LINE_MACRO_BEGIN do { +#define NV_MULTI_LINE_MACRO_END } while(false) +#endif + +#if NV_CC_CPP11 +#define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed") +#else +#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)] +#endif +#define NV_COMPILER_CHECK(x) nvStaticCheck(x) // I like this name best. + +// Make sure type definitions are fine. +NV_COMPILER_CHECK(sizeof(int8) == 1); +NV_COMPILER_CHECK(sizeof(uint8) == 1); +NV_COMPILER_CHECK(sizeof(int16) == 2); +NV_COMPILER_CHECK(sizeof(uint16) == 2); +NV_COMPILER_CHECK(sizeof(int32) == 4); +NV_COMPILER_CHECK(sizeof(uint32) == 4); +NV_COMPILER_CHECK(sizeof(int32) == 4); +NV_COMPILER_CHECK(sizeof(uint32) == 4); + + +#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +#if 0 // Disabled in The Witness. +#if NV_CC_MSVC +#define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x) +#else +#define NV_MESSAGE(x) message(x) +#endif +#else +#define NV_MESSAGE(x) +#endif + // Startup initialization macro. #define NV_AT_STARTUP(some_code) \ - namespace { \ - static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \ - NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \ - } \ - NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \ - }; + namespace { \ + static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \ + NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \ + } \ + NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \ + } -/// Indicate the compiler that the parameter is not used to suppress compier warnings. -/// @hideinitializer +// Indicate the compiler that the parameter is not used to suppress compier warnings. #define NV_UNUSED(a) ((a)=(a)) -/// Null index. @@ Move this somewhere else... This could have collisions with other definitions! -#define NIL uint(~0) +// Null index. @@ Move this somewhere else... it's only used by nvmesh. +//const unsigned int NIL = unsigned int(~0); +//#define NIL uint(~0) -/// Null pointer. +// Null pointer. #ifndef NULL #define NULL 0 #endif // Platform includes #if NV_CC_MSVC -# if NV_OS_WIN32 -# include "DefsVcWin32.h" -# else -# error "MSVC: Platform not supported" -# endif +# if NV_OS_WIN32 +# include "DefsVcWin32.h" +# elif NV_OS_XBOX +# include "DefsVcXBox.h" +# else +# error "MSVC: Platform not supported" +# endif #elif NV_CC_GNUC -# if NV_OS_LINUX -# include "DefsGnucLinux.h" -# elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD -# include "DefsGnucDarwin.h" -# elif NV_OS_MINGW -# include "DefsGnucWin32.h" -# elif NV_OS_CYGWIN -# error "GCC: Cygwin not supported" -# else -# error "GCC: Platform not supported" -# endif +# if NV_OS_LINUX +# include "DefsGnucLinux.h" +# elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD +# include "DefsGnucDarwin.h" +# elif NV_OS_MINGW +# include "DefsGnucWin32.h" +# elif NV_OS_CYGWIN +# error "GCC: Cygwin not supported" +# else +# error "GCC: Platform not supported" +# endif #endif #endif // NV_CORE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/poshlib/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/poshlib/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/poshlib/CMakeLists.txt @@ -1,7 +0,0 @@ - -SET(POSHLIB_SRCS - posh.c - posh.h) - -ADD_LIBRARY(posh STATIC ${POSHLIB_SRCS}) - Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/poshlib/posh.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/poshlib/posh.h +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/poshlib/posh.h @@ -1,1022 +0,0 @@ -/** -@file posh.h -@author Brian Hook -@version 1.3.001 - -Header file for POSH, the Portable Open Source Harness project. - -NOTE: Unlike most header files, this one is designed to be included -multiple times, which is why it does not have the @#ifndef/@#define -preamble. - -POSH relies on environment specified preprocessor symbols in order -to infer as much as possible about the target OS/architecture and -the host compiler capabilities. - -NOTE: POSH is simple and focused. It attempts to provide basic -functionality and information, but it does NOT attempt to emulate -missing functionality. I am also not willing to make POSH dirty -and hackish to support truly ancient and/or outmoded and/or bizarre -technologies such as non-ANSI compilers, systems with non-IEEE -floating point formats, segmented 16-bit operating systems, etc. - -Please refer to the accompanying HTML documentation or visit -http://www.poshlib.org for more information on how to use POSH. - -LICENSE: - -Copyright (c) 2004, Brian Hook -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * The names of this package'ss contributors contributors may not - be used to endorse or promote products derived from this - software without specific prior written permission. - - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -REVISION: - -I've been lax about revision histories, so this starts at, um, 1.3.001. -Sorry for any inconveniences. - -1.3.001 - 2/23/2006 - Incorporated fix for bug reported by Bill Cary, - where I was not detecting Visual Studio - compilation on x86-64 systems. Added check for - _M_X64 which should fix that. - -*/ -/* -I have yet to find an authoritative reference on preprocessor -symbols, but so far this is what I've gleaned: - -GNU GCC/G++: - - __GNUC__: GNU C version - - __GNUG__: GNU C++ compiler - - __sun__ : on Sun platforms - - __svr4__: on Solaris and other SysV R4 platforms - - __mips__: on MIPS processor platforms - - __sparc_v9__: on Sparc 64-bit CPUs - - __sparcv9: 64-bit Solaris - - __MIPSEL__: mips processor, compiled for little endian - - __MIPSEB__: mips processor, compiled for big endian - - _R5900: MIPS/Sony/Toshiba R5900 (PS2) - - mc68000: 68K - - m68000: 68K - - m68k: 68K - - __palmos__: PalmOS - -Intel C/C++ Compiler: - - __ECC : compiler version, IA64 only - - __EDG__ - - __ELF__ - - __GXX_ABI_VERSION - - __i386 : IA-32 only - - __i386__ : IA-32 only - - i386 : IA-32 only - - __ia64 : IA-64 only - - __ia64__ : IA-64 only - - ia64 : IA-64 only - - __ICC : IA-32 only - - __INTEL_COMPILER : IA-32 or IA-64, newer versions only - -Apple's C/C++ Compiler for OS X: - - __APPLE_CC__ - - __APPLE__ - - __BIG_ENDIAN__ - - __APPLE__ - - __ppc__ - - __MACH__ - -DJGPP: - - __MSDOS__ - - __unix__ - - __unix - - __GNUC__ - - __GO32 - - DJGPP - - __i386, __i386, i386 - -Cray's C compiler: - - _ADDR64: if 64-bit pointers - - _UNICOS: - - __unix: - -SGI's CC compiler predefines the following (and more) with -ansi: - - __sgi - - __unix - - __host_mips - - _SYSTYPE_SVR4 - - __mips - - _MIPSEB - - anyone know if there is a predefined symbol for the compiler?! - -MinGW: - - as GnuC but also defines _WIN32, __WIN32, WIN32, _X86_, __i386, __i386__, and several others - - __MINGW32__ - -Cygwin: - - as Gnu C, but also - - __unix__ - - __CYGWIN32__ - -Microsoft Visual Studio predefines the following: - - _MSC_VER - - _WIN32: on Win32 - - _M_IX6 (on x86 systems) - - _M_X64: on x86-64 systems - - _M_ALPHA (on DEC AXP systems) - - _SH3: WinCE, Hitachi SH-3 - - _MIPS: WinCE, MIPS - - _ARM: WinCE, ARM - -Sun's C Compiler: - - sun and _sun - - unix and _unix - - sparc and _sparc (SPARC systems only) - - i386 and _i386 (x86 systems only) - - __SVR4 (Solaris only) - - __sparcv9: 64-bit solaris - - __SUNPRO_C - - _LP64: defined in 64-bit LP64 mode, but only if is included - -Borland C/C++ predefines the following: - - __BORLANDC__: - -DEC/Compaq C/C++ on Alpha: - - __alpha - - __arch64__ - - __unix__ (on Tru64 Unix) - - __osf__ - - __DECC - - __DECCXX (C++ compilation) - - __DECC_VER - - __DECCXX_VER - -IBM's AIX compiler: - - __64BIT__ if 64-bit mode - - _AIX - - __IBMC__: C compiler version - - __IBMCPP__: C++ compiler version - - _LONG_LONG: compiler allows long long - -Watcom: - - __WATCOMC__ - - __DOS__ : if targeting DOS - - __386__ : if 32-bit support - - __WIN32__ : if targetin 32-bit Windows - -HP-UX C/C++ Compiler: - - __hpux - - __unix - - __hppa (on PA-RISC) - - __LP64__: if compiled in 64-bit mode - -Metrowerks: - - __MWERKS__ - - __powerpc__ - - _powerc - - __MC68K__ - - macintosh when compiling for MacOS - - __INTEL__ for x86 targets - - __POWERPC__ - -*/ - -/* -** ---------------------------------------------------------------------------- -** Include optionally -** ---------------------------------------------------------------------------- -*/ -#ifdef POSH_USE_LIMITS_H -# include -#endif - -/* -** ---------------------------------------------------------------------------- -** Determine compilation environment -** ---------------------------------------------------------------------------- -*/ -#if defined __ECC || defined __ICC || defined __INTEL_COMPILER -# define POSH_COMPILER_STRING "Intel C/C++" -# define POSH_COMPILER_INTEL 1 -#endif - -#if ( defined __host_mips || defined __sgi ) && !defined __GNUC__ -# define POSH_COMPILER_STRING "MIPSpro C/C++" -# define POSH_COMPILER_MIPSPRO 1 -#endif - -#if defined __hpux && !defined __GNUC__ -# define POSH_COMPILER_STRING "HP-UX CC" -# define POSH_COMPILER_HPCC 1 -#endif - -#if defined __GNUC__ -# define POSH_COMPILER_STRING "Gnu GCC" -# define POSH_COMPILER_GCC 1 -#endif - -#if defined __APPLE_CC__ - /* we don't define the compiler string here, let it be GNU */ -# define POSH_COMPILER_APPLECC 1 -#endif - -#if defined __IBMC__ || defined __IBMCPP__ -# define POSH_COMPILER_STRING "IBM C/C++" -# define POSH_COMPILER_IBM 1 -#endif - -#if defined _MSC_VER -# define POSH_COMPILER_STRING "Microsoft Visual C++" -# define POSH_COMPILER_MSVC 1 -#endif - -#if defined __SUNPRO_C -# define POSH_COMPILER_STRING "Sun Pro" -# define POSH_COMPILER_SUN 1 -#endif - -#if defined __BORLANDC__ -# define POSH_COMPILER_STRING "Borland C/C++" -# define POSH_COMPILER_BORLAND 1 -#endif - -#if defined __MWERKS__ -# define POSH_COMPILER_STRING "MetroWerks CodeWarrior" -# define POSH_COMPILER_METROWERKS 1 -#endif - -#if defined __DECC || defined __DECCXX -# define POSH_COMPILER_STRING "Compaq/DEC C/C++" -# define POSH_COMPILER_DEC 1 -#endif - -#if defined __WATCOMC__ -# define POSH_COMPILER_STRING "Watcom C/C++" -# define POSH_COMPILER_WATCOM 1 -#endif - -#if !defined POSH_COMPILER_STRING -# define POSH_COMPILER_STRING "Unknown compiler" -#endif - -/* -** ---------------------------------------------------------------------------- -** Determine target operating system -** ---------------------------------------------------------------------------- -*/ -#if defined linux || defined __linux__ -# define POSH_OS_LINUX 1 -# define POSH_OS_STRING "Linux" -#endif - -#if defined __FreeBSD__ -# define POSH_OS_FREEBSD 1 -# define POSH_OS_STRING "FreeBSD" -#endif - -#if defined __OpenBSD__ -# define POSH_OS_OPENBSD 1 -# define POSH_OS_STRING "OpenBSD" -#endif - -#if defined __CYGWIN32__ -# define POSH_OS_CYGWIN32 1 -# define POSH_OS_STRING "Cygwin" -#endif - -#if defined GEKKO -# define POSH_OS_GAMECUBE -# define __powerpc__ -# define POSH_OS_STRING "GameCube" -#endif - -#if defined __MINGW32__ -# define POSH_OS_MINGW 1 -# define POSH_OS_STRING "MinGW" -#endif - -#if defined GO32 && defined DJGPP && defined __MSDOS__ -# define POSH_OS_GO32 1 -# define POSH_OS_STRING "GO32/MS-DOS" -#endif - -/* NOTE: make sure you use /bt=DOS if compiling for 32-bit DOS, - otherwise Watcom assumes host=target */ -#if defined __WATCOMC__ && defined __386__ && defined __DOS__ -# define POSH_OS_DOS32 1 -# define POSH_OS_STRING "DOS/32-bit" -#endif - -#if defined _UNICOS -# define POSH_OS_UNICOS 1 -# define POSH_OS_STRING "UNICOS" -#endif - -#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx -# define POSH_OS_OSX 1 -# define POSH_OS_STRING "MacOS X" -#endif - -#if defined __sun__ || defined sun || defined __sun || defined __solaris__ -# if defined __SVR4 || defined __svr4__ || defined __solaris__ -# define POSH_OS_STRING "Solaris" -# define POSH_OS_SOLARIS 1 -# endif -# if !defined POSH_OS_STRING -# define POSH_OS_STRING "SunOS" -# define POSH_OS_SUNOS 1 -# endif -#endif - -#if defined __sgi__ || defined sgi || defined __sgi -# define POSH_OS_IRIX 1 -# define POSH_OS_STRING "Irix" -#endif - -#if defined __hpux__ || defined __hpux -# define POSH_OS_HPUX 1 -# define POSH_OS_STRING "HP-UX" -#endif - -#if defined _AIX -# define POSH_OS_AIX 1 -# define POSH_OS_STRING "AIX" -#endif - -#if ( defined __alpha && defined __osf__ ) -# define POSH_OS_TRU64 1 -# define POSH_OS_STRING "Tru64" -#endif - -#if defined __BEOS__ || defined __beos__ -# define POSH_OS_BEOS 1 -# define POSH_OS_STRING "BeOS" -#endif - -#if defined amiga || defined amigados || defined AMIGA || defined _AMIGA -# define POSH_OS_AMIGA 1 -# define POSH_OS_STRING "Amiga" -#endif - -#if defined __unix__ -# define POSH_OS_UNIX 1 -# if !defined POSH_OS_STRING -# define POSH_OS_STRING "Unix-like(generic)" -# endif -#endif - -#if defined _WIN32_WCE -# define POSH_OS_WINCE 1 -# define POSH_OS_STRING "Windows CE" -#endif - -#if defined _XBOX -# define POSH_OS_XBOX 1 -# define POSH_OS_STRING "XBOX" -#endif - -#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__ -# define POSH_OS_WIN32 1 -# if !defined POSH_OS_XBOX -# if defined _WIN64 -# define POSH_OS_WIN64 1 -# define POSH_OS_STRING "Win64" -# else -# if !defined POSH_OS_STRING -# define POSH_OS_STRING "Win32" -# endif -# endif -# endif -#endif - -#if defined __palmos__ -# define POSH_OS_PALM 1 -# define POSH_OS_STRING "PalmOS" -#endif - -#if defined THINK_C || defined macintosh -# define POSH_OS_MACOS 1 -# define POSH_OS_STRING "MacOS" -#endif - -/* -** ----------------------------------------------------------------------------- -** Determine target CPU -** ----------------------------------------------------------------------------- -*/ - -#if defined GEKKO -# define POSH_CPU_PPC750 1 -# define POSH_CPU_STRING "IBM PowerPC 750 (NGC)" -#endif - -#if defined mc68000 || defined m68k || defined __MC68K__ || defined m68000 -# define POSH_CPU_68K 1 -# define POSH_CPU_STRING "MC68000" -#endif - -#if defined __PPC__ || defined __POWERPC__ || defined powerpc || defined _POWER || defined __ppc__ || defined __powerpc__ -# define POSH_CPU_PPC 1 -# if !defined POSH_CPU_STRING -# if defined __powerpc64__ -# define POSH_CPU_STRING "PowerPC64" -# else -# define POSH_CPU_STRING "PowerPC" -# endif -# endif -#endif - -#if defined _CRAYT3E || defined _CRAYMPP -# define POSH_CPU_CRAYT3E 1 /* target processor is a DEC Alpha 21164 used in a Cray T3E*/ -# define POSH_CPU_STRING "Cray T3E (Alpha 21164)" -#endif - -#if defined CRAY || defined _CRAY && !defined _CRAYT3E -# error Non-AXP Cray systems not supported -#endif - -#if defined _SH3 -# define POSH_CPU_SH3 1 -# define POSH_CPU_STRING "Hitachi SH-3" -#endif - -#if defined __sh4__ || defined __SH4__ -# define POSH_CPU_SH3 1 -# define POSH_CPU_SH4 1 -# define POSH_CPU_STRING "Hitachi SH-4" -#endif - -#if defined __sparc__ || defined __sparc -# if defined __arch64__ || defined __sparcv9 || defined __sparc_v9__ -# define POSH_CPU_SPARC64 1 -# define POSH_CPU_STRING "Sparc/64" -# else -# define POSH_CPU_STRING "Sparc/32" -# endif -# define POSH_CPU_SPARC 1 -#endif - -#if defined ARM || defined __arm__ || defined _ARM -# define POSH_CPU_STRONGARM 1 -# define POSH_CPU_STRING "ARM" -#endif - -#if defined __aarch64__ -# define POSH_CPU_AARCH64 1 -# define POSH_CPU_STRING "ARM64" -#endif - -#if defined mips || defined __mips__ || defined __MIPS__ || defined _MIPS -# define POSH_CPU_MIPS 1 -# if defined _R5900 -# define POSH_CPU_STRING "MIPS R5900 (PS2)" -# else -# define POSH_CPU_STRING "MIPS" -# endif -#endif - -#if defined __ia64 || defined _M_IA64 || defined __ia64__ -# define POSH_CPU_IA64 1 -# define POSH_CPU_STRING "IA64" -#endif - -#if defined __X86__ || defined __i386__ || defined i386 || defined _M_IX86 || defined __386__ || defined __x86_64__ || defined _M_X64 -# define POSH_CPU_X86 1 -# if defined __x86_64__ || defined _M_X64 -# define POSH_CPU_X86_64 1 -# endif -# if defined POSH_CPU_X86_64 -# define POSH_CPU_STRING "AMD x86-64" -# else -# define POSH_CPU_STRING "Intel 386+" -# endif -#endif - -#if defined __alpha || defined alpha || defined _M_ALPHA || defined __alpha__ -# define POSH_CPU_AXP 1 -# define POSH_CPU_STRING "AXP" -#endif - -#if defined __hppa || defined hppa -# define POSH_CPU_HPPA 1 -# define POSH_CPU_STRING "PA-RISC" -#endif - -#if !defined POSH_CPU_STRING -# error POSH cannot determine target CPU -# define POSH_CPU_STRING "Unknown" /* this is here for Doxygen's benefit */ -#endif - -/* -** ----------------------------------------------------------------------------- -** Attempt to autodetect building for embedded on Sony PS2 -** ----------------------------------------------------------------------------- -*/ -#if !defined POSH_OS_STRING -# if !defined FORCE_DOXYGEN -# define POSH_OS_EMBEDDED 1 -# endif -# if defined _R5900 -# define POSH_OS_STRING "Sony PS2(embedded)" -# else -# define POSH_OS_STRING "Embedded/Unknown" -# endif -#endif - -/* -** --------------------------------------------------------------------------- -** Handle cdecl, stdcall, fastcall, etc. -** --------------------------------------------------------------------------- -*/ -#if defined POSH_CPU_X86 && !defined POSH_CPU_X86_64 -# if defined __GNUC__ -# define POSH_CDECL __attribute__((cdecl)) -# define POSH_STDCALL __attribute__((stdcall)) -# define POSH_FASTCALL __attribute__((fastcall)) -# elif ( defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__ || defined __MWERKS__ ) -# define POSH_CDECL __cdecl -# define POSH_STDCALL __stdcall -# define POSH_FASTCALL __fastcall -# endif -#else -# define POSH_CDECL -# define POSH_STDCALL -# define POSH_FASTCALL -#endif - -/* -** --------------------------------------------------------------------------- -** Define POSH_IMPORTEXPORT signature based on POSH_DLL and POSH_BUILDING_LIB -** --------------------------------------------------------------------------- -*/ - -/* -** We undefine this so that multiple inclusions will work -*/ -#if defined POSH_IMPORTEXPORT -# undef POSH_IMPORTEXPORT -#endif - -#if defined POSH_DLL -# if defined POSH_OS_WIN32 -# if defined _MSC_VER -# if ( _MSC_VER >= 800 ) -# if defined POSH_BUILDING_LIB -# define POSH_IMPORTEXPORT __declspec( dllexport ) -# else -# define POSH_IMPORTEXPORT __declspec( dllimport ) -# endif -# else -# if defined POSH_BUILDING_LIB -# define POSH_IMPORTEXPORT __export -# else -# define POSH_IMPORTEXPORT -# endif -# endif -# endif /* defined _MSC_VER */ -# if defined __BORLANDC__ -# if ( __BORLANDC__ >= 0x500 ) -# if defined POSH_BUILDING_LIB -# define POSH_IMPORTEXPORT __declspec( dllexport ) -# else -# define POSH_IMPORTEXPORT __declspec( dllimport ) -# endif -# else -# if defined POSH_BUILDING_LIB -# define POSH_IMPORTEXPORT __export -# else -# define POSH_IMPORTEXPORT -# endif -# endif -# endif /* defined __BORLANDC__ */ - /* for all other compilers, we're just making a blanket assumption */ -# if defined __GNUC__ || defined __WATCOMC__ || defined __MWERKS__ -# if defined POSH_BUILDING_LIB -# define POSH_IMPORTEXPORT __declspec( dllexport ) -# else -# define POSH_IMPORTEXPORT __declspec( dllimport ) -# endif -# endif /* all other compilers */ -# if !defined POSH_IMPORTEXPORT -# error Building DLLs not supported on this compiler (poshlib@poshlib.org if you know how) -# endif -# endif /* defined POSH_OS_WIN32 */ -#endif - -/* On pretty much everything else, we can thankfully just ignore this */ -#if !defined POSH_IMPORTEXPORT -# define POSH_IMPORTEXPORT -#endif - -#if defined FORCE_DOXYGEN -# define POSH_DLL -# define POSH_BUILDING_LIB -# undef POSH_DLL -# undef POSH_BUILDING_LIB -#endif - -/* -** ---------------------------------------------------------------------------- -** (Re)define POSH_PUBLIC_API export signature -** ---------------------------------------------------------------------------- -*/ -#ifdef POSH_PUBLIC_API -# undef POSH_PUBLIC_API -#endif - -#if ( ( defined _MSC_VER ) && ( _MSC_VER < 800 ) ) || ( defined __BORLANDC__ && ( __BORLANDC__ < 0x500 ) ) -# define POSH_PUBLIC_API(rtype) extern rtype POSH_IMPORTEXPORT -#else -# define POSH_PUBLIC_API(rtype) extern POSH_IMPORTEXPORT rtype -#endif - -/* -** ---------------------------------------------------------------------------- -** Try to infer endianess. Basically we just go through the CPUs we know are -** little endian, and assume anything that isn't one of those is big endian. -** As a sanity check, we also do this with operating systems we know are -** little endian, such as Windows. Some processors are bi-endian, such as -** the MIPS series, so we have to be careful about those. -** ---------------------------------------------------------------------------- -*/ -#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_CPU_AARCH64 || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__ -# define POSH_ENDIAN_STRING "little" -# define POSH_LITTLE_ENDIAN 1 -#else -# define POSH_ENDIAN_STRING "big" -# define POSH_BIG_ENDIAN 1 -#endif - -#if defined FORCE_DOXYGEN -# define POSH_LITTLE_ENDIAN -#endif - -/* -** ---------------------------------------------------------------------------- -** Cross-platform compile time assertion macro -** ---------------------------------------------------------------------------- -*/ -#define POSH_COMPILE_TIME_ASSERT(name, x) typedef int _POSH_dummy_ ## name[(x) ? 1 : -1 ] - -/* -** ---------------------------------------------------------------------------- -** 64-bit Integer -** -** We don't require 64-bit support, nor do we emulate its functionality, we -** simply export it if it's available. Since we can't count on -** for 64-bit support, we ignore the POSH_USE_LIMITS_H directive. -** ---------------------------------------------------------------------------- -*/ -#if defined ( __LP64__ ) || defined ( __powerpc64__ ) || defined POSH_CPU_SPARC64 -# define POSH_64BIT_INTEGER 1 -typedef long posh_i64_t; -typedef unsigned long posh_u64_t; -# define POSH_I64( x ) ((posh_i64_t)x) -# define POSH_U64( x ) ((posh_u64_t)x) -# define POSH_I64_PRINTF_PREFIX "l" -#elif defined _MSC_VER || defined __BORLANDC__ || defined __WATCOMC__ || ( defined __alpha && defined __DECC ) -# define POSH_64BIT_INTEGER 1 -typedef __int64 posh_i64_t; -typedef unsigned __int64 posh_u64_t; -# define POSH_I64( x ) ((posh_i64_t)x) -# define POSH_U64( x ) ((posh_u64_t)x) -# define POSH_I64_PRINTF_PREFIX "I64" -#elif defined __GNUC__ || defined __MWERKS__ || defined __SUNPRO_C || defined __SUNPRO_CC || defined __APPLE_CC__ || defined POSH_OS_IRIX || defined _LONG_LONG || defined _CRAYC -# define POSH_64BIT_INTEGER 1 -typedef long long posh_i64_t; -typedef unsigned long long posh_u64_t; -# define POSH_U64( x ) ((posh_u64_t)(x##LL)) -# define POSH_I64( x ) ((posh_i64_t)(x##LL)) -# define POSH_I64_PRINTF_PREFIX "ll" -#endif - -/* hack */ -/*#ifdef __MINGW32__ -#undef POSH_I64 -#undef POSH_U64 -#undef POSH_I64_PRINTF_PREFIX -#define POSH_I64( x ) ((posh_i64_t)x) -#define POSH_U64( x ) ((posh_u64_t)x) -#define POSH_I64_PRINTF_PREFIX "I64" -#endif*/ - -#ifdef FORCE_DOXYGEN -typedef long long posh_i64_t; -typedef unsigned long posh_u64_t; -# define POSH_64BIT_INTEGER -# define POSH_I64_PRINTF_PREFIX -# define POSH_I64(x) -# define POSH_U64(x) -#endif - -/** Minimum value for a 64-bit signed integer */ -#define POSH_I64_MIN POSH_I64(0x8000000000000000) -/** Maximum value for a 64-bit signed integer */ -#define POSH_I64_MAX POSH_I64(0x7FFFFFFFFFFFFFFF) -/** Minimum value for a 64-bit unsigned integer */ -#define POSH_U64_MIN POSH_U64(0) -/** Maximum value for a 64-bit unsigned integer */ -#define POSH_U64_MAX POSH_U64(0xFFFFFFFFFFFFFFFF) - -/* ---------------------------------------------------------------------------- -** Basic Sized Types -** -** These types are expected to be EXACTLY sized so you can use them for -** serialization. -** ---------------------------------------------------------------------------- -*/ -#define POSH_FALSE 0 -#define POSH_TRUE 1 - -typedef int posh_bool_t; -typedef unsigned char posh_byte_t; - -/* NOTE: These assume that CHAR_BIT is 8!! */ -typedef unsigned char posh_u8_t; -typedef signed char posh_i8_t; - -#if defined POSH_USE_LIMITS_H -# if CHAR_BITS > 8 -# error This machine uses 9-bit characters. This is a warning, you can comment this out now. -# endif /* CHAR_BITS > 8 */ - -/* 16-bit */ -# if ( USHRT_MAX == 65535 ) - typedef unsigned short posh_u16_t; - typedef short posh_i16_t; -# else - /* Yes, in theory there could still be a 16-bit character type and shorts are - 32-bits in size...if you find such an architecture, let me know =P */ -# error No 16-bit type found -# endif - -/* 32-bit */ -# if ( INT_MAX == 2147483647 ) - typedef unsigned posh_u32_t; - typedef int posh_i32_t; -# elif ( LONG_MAX == 2147483647 ) - typedef unsigned long posh_u32_t; - typedef long posh_i32_t; -# else - error No 32-bit type found -# endif - -#else /* POSH_USE_LIMITS_H */ - - typedef unsigned short posh_u16_t; - typedef short posh_i16_t; - -# if !defined POSH_OS_PALM - typedef unsigned posh_u32_t; - typedef int posh_i32_t; -# else - typedef unsigned long posh_u32_t; - typedef long posh_i32_t; -# endif -#endif - -/** Minimum value for a byte */ -#define POSH_BYTE_MIN 0 -/** Maximum value for an 8-bit unsigned value */ -#define POSH_BYTE_MAX 255 -/** Minimum value for a byte */ -#define POSH_I16_MIN ( ( posh_i16_t ) 0x8000 ) -/** Maximum value for a 16-bit signed value */ -#define POSH_I16_MAX ( ( posh_i16_t ) 0x7FFF ) -/** Minimum value for a 16-bit unsigned value */ -#define POSH_U16_MIN 0 -/** Maximum value for a 16-bit unsigned value */ -#define POSH_U16_MAX ( ( posh_u16_t ) 0xFFFF ) -/** Minimum value for a 32-bit signed value */ -#define POSH_I32_MIN ( ( posh_i32_t ) 0x80000000 ) -/** Maximum value for a 32-bit signed value */ -#define POSH_I32_MAX ( ( posh_i32_t ) 0x7FFFFFFF ) -/** Minimum value for a 32-bit unsigned value */ -#define POSH_U32_MIN 0 -/** Maximum value for a 32-bit unsigned value */ -#define POSH_U32_MAX ( ( posh_u32_t ) 0xFFFFFFFF ) - -/* -** ---------------------------------------------------------------------------- -** Sanity checks on expected sizes -** ---------------------------------------------------------------------------- -*/ -#if !defined FORCE_DOXYGEN - -POSH_COMPILE_TIME_ASSERT(posh_byte_t, sizeof(posh_byte_t) == 1); -POSH_COMPILE_TIME_ASSERT(posh_u8_t, sizeof(posh_u8_t) == 1); -POSH_COMPILE_TIME_ASSERT(posh_i8_t, sizeof(posh_i8_t) == 1); -POSH_COMPILE_TIME_ASSERT(posh_u16_t, sizeof(posh_u16_t) == 2); -POSH_COMPILE_TIME_ASSERT(posh_i16_t, sizeof(posh_i16_t) == 2); -POSH_COMPILE_TIME_ASSERT(posh_u32_t, sizeof(posh_u32_t) == 4); -POSH_COMPILE_TIME_ASSERT(posh_i32_t, sizeof(posh_i32_t) == 4); - -#if !defined POSH_NO_FLOAT - POSH_COMPILE_TIME_ASSERT(posh_testfloat_t, sizeof(float)==4 ); - POSH_COMPILE_TIME_ASSERT(posh_testdouble_t, sizeof(double)==8); -#endif - -#if defined POSH_64BIT_INTEGER - POSH_COMPILE_TIME_ASSERT(posh_u64_t, sizeof(posh_u64_t) == 8); - POSH_COMPILE_TIME_ASSERT(posh_i64_t, sizeof(posh_i64_t) == 8); -#endif - -#endif - -/* -** ---------------------------------------------------------------------------- -** 64-bit pointer support -** ---------------------------------------------------------------------------- -*/ -#if defined POSH_CPU_AXP && ( defined POSH_OS_TRU64 || defined POSH_OS_LINUX ) -# define POSH_64BIT_POINTER 1 -#endif - -#if defined POSH_CPU_X86_64 && defined POSH_OS_LINUX -# define POSH_64BIT_POINTER 1 -#endif - -#if defined POSH_CPU_SPARC64 || defined POSH_OS_WIN64 || defined __64BIT__ || defined __LP64 || defined _LP64 || defined __LP64__ || defined _ADDR64 || defined _CRAYC -# define POSH_64BIT_POINTER 1 -#endif - -#if defined POSH_64BIT_POINTER - POSH_COMPILE_TIME_ASSERT( posh_64bit_pointer, sizeof( void * ) == 8 ); -#elif !defined FORCE_DOXYGEN -/* if this assertion is hit then you're on a system that either has 64-bit - addressing and we didn't catch it, or you're on a system with 16-bit - pointers. In the latter case, POSH doesn't actually care, we're just - triggering this assertion to make sure you're aware of the situation, - so feel free to delete it. - - If this assertion is triggered on a known 32 or 64-bit platform, - please let us know (poshlib@poshlib.org) */ - POSH_COMPILE_TIME_ASSERT( posh_32bit_pointer, sizeof( void * ) == 4 ); -#endif - -#if defined FORCE_DOXYGEN -# define POSH_64BIT_POINTER -#endif - -/* -** ---------------------------------------------------------------------------- -** POSH Utility Functions -** -** These are optional POSH utility functions that are not required if you don't -** need anything except static checking of your host and target environment. -** -** These functions are NOT wrapped with POSH_PUBLIC_API because I didn't want -** to enforce their export if your own library is only using them internally. -** ---------------------------------------------------------------------------- -*/ -#ifdef __cplusplus -extern "C" { -#endif - -const char *POSH_GetArchString( void ); - -#if !defined POSH_NO_FLOAT - -posh_u32_t POSH_LittleFloatBits( float f ); -posh_u32_t POSH_BigFloatBits( float f ); -float POSH_FloatFromLittleBits( posh_u32_t bits ); -float POSH_FloatFromBigBits( posh_u32_t bits ); - -void POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] ); -double POSH_DoubleFromBits( const posh_byte_t src[ 8 ] ); - -/* unimplemented -float *POSH_WriteFloatToLittle( void *dst, float f ); -float *POSH_WriteFloatToBig( void *dst, float f ); -float POSH_ReadFloatFromLittle( const void *src ); -float POSH_ReadFloatFromBig( const void *src ); - -double *POSH_WriteDoubleToLittle( void *dst, double d ); -double *POSH_WriteDoubleToBig( void *dst, double d ); -double POSH_ReadDoubleFromLittle( const void *src ); -double POSH_ReadDoubleFromBig( const void *src ); -*/ -#endif /* !defined POSH_NO_FLOAT */ - -#if defined FORCE_DOXYGEN -# define POSH_NO_FLOAT -# undef POSH_NO_FLOAT -#endif - -extern posh_u16_t POSH_SwapU16( posh_u16_t u ); -extern posh_i16_t POSH_SwapI16( posh_i16_t u ); -extern posh_u32_t POSH_SwapU32( posh_u32_t u ); -extern posh_i32_t POSH_SwapI32( posh_i32_t u ); - -#if defined POSH_64BIT_INTEGER - -extern posh_u64_t POSH_SwapU64( posh_u64_t u ); -extern posh_i64_t POSH_SwapI64( posh_i64_t u ); - -#endif /*POSH_64BIT_INTEGER */ - -extern posh_u16_t *POSH_WriteU16ToLittle( void *dst, posh_u16_t value ); -extern posh_i16_t *POSH_WriteI16ToLittle( void *dst, posh_i16_t value ); -extern posh_u32_t *POSH_WriteU32ToLittle( void *dst, posh_u32_t value ); -extern posh_i32_t *POSH_WriteI32ToLittle( void *dst, posh_i32_t value ); - -extern posh_u16_t *POSH_WriteU16ToBig( void *dst, posh_u16_t value ); -extern posh_i16_t *POSH_WriteI16ToBig( void *dst, posh_i16_t value ); -extern posh_u32_t *POSH_WriteU32ToBig( void *dst, posh_u32_t value ); -extern posh_i32_t *POSH_WriteI32ToBig( void *dst, posh_i32_t value ); - -extern posh_u16_t POSH_ReadU16FromLittle( const void *src ); -extern posh_i16_t POSH_ReadI16FromLittle( const void *src ); -extern posh_u32_t POSH_ReadU32FromLittle( const void *src ); -extern posh_i32_t POSH_ReadI32FromLittle( const void *src ); - -extern posh_u16_t POSH_ReadU16FromBig( const void *src ); -extern posh_i16_t POSH_ReadI16FromBig( const void *src ); -extern posh_u32_t POSH_ReadU32FromBig( const void *src ); -extern posh_i32_t POSH_ReadI32FromBig( const void *src ); - -#if defined POSH_64BIT_INTEGER -extern posh_u64_t *POSH_WriteU64ToLittle( void *dst, posh_u64_t value ); -extern posh_i64_t *POSH_WriteI64ToLittle( void *dst, posh_i64_t value ); -extern posh_u64_t *POSH_WriteU64ToBig( void *dst, posh_u64_t value ); -extern posh_i64_t *POSH_WriteI64ToBig( void *dst, posh_i64_t value ); - -extern posh_u64_t POSH_ReadU64FromLittle( const void *src ); -extern posh_i64_t POSH_ReadI64FromLittle( const void *src ); -extern posh_u64_t POSH_ReadU64FromBig( const void *src ); -extern posh_i64_t POSH_ReadI64FromBig( const void *src ); -#endif /* POSH_64BIT_INTEGER */ - -#if defined POSH_LITTLE_ENDIAN - -# define POSH_LittleU16(x) (x) -# define POSH_LittleU32(x) (x) -# define POSH_LittleI16(x) (x) -# define POSH_LittleI32(x) (x) -# if defined POSH_64BIT_INTEGER -# define POSH_LittleU64(x) (x) -# define POSH_LittleI64(x) (x) -# endif /* defined POSH_64BIT_INTEGER */ - -# define POSH_BigU16(x) POSH_SwapU16(x) -# define POSH_BigU32(x) POSH_SwapU32(x) -# define POSH_BigI16(x) POSH_SwapI16(x) -# define POSH_BigI32(x) POSH_SwapI32(x) -# if defined POSH_64BIT_INTEGER -# define POSH_BigU64(x) POSH_SwapU64(x) -# define POSH_BigI64(x) POSH_SwapI64(x) -# endif /* defined POSH_64BIT_INTEGER */ - -#else - -# define POSH_BigU16(x) (x) -# define POSH_BigU32(x) (x) -# define POSH_BigI16(x) (x) -# define POSH_BigI32(x) (x) - -# if defined POSH_64BIT_INTEGER -# define POSH_BigU64(x) (x) -# define POSH_BigI64(x) (x) -# endif /* POSH_64BIT_INTEGER */ - -# define POSH_LittleU16(x) POSH_SwapU16(x) -# define POSH_LittleU32(x) POSH_SwapU32(x) -# define POSH_LittleI16(x) POSH_SwapI16(x) -# define POSH_LittleI32(x) POSH_SwapI32(x) - -# if defined POSH_64BIT_INTEGER -# define POSH_LittleU64(x) POSH_SwapU64(x) -# define POSH_LittleI64(x) POSH_SwapI64(x) -# endif /* POSH_64BIT_INTEGER */ - -#endif - -#ifdef __cplusplus -} -#endif - - Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/poshlib/posh.c =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/poshlib/posh.c +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/poshlib/posh.c @@ -1,1006 +0,0 @@ -/* -LICENSE: - -Copyright (c) 2004, Brian Hook -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * The names of this package'ss contributors contributors may not - be used to endorse or promote products derived from this - software without specific prior written permission. - - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -/** - @file posh.c - @author Brian Hook - @date 2002 - @brief Portable Open Source Harness primary source file -*/ -#include "posh.h" - -#if !defined FORCE_DOXYGEN - -#if !defined POSH_NO_FLOAT -# define POSH_FLOAT_STRING "enabled" -#else -# define POSH_FLOAT_STRING "disabled" -#endif - -#if defined POSH_64BIT_INTEGER -# define POSH_64BIT_INTEGER_STRING "yes" -#else -# define POSH_64BIT_INTEGER_STRING "no" -#endif - -#if defined POSH_64BIT_POINTER -# define POSH_POINTER_STRING "64-bits" -#else -# define POSH_POINTER_STRING "32-bits" -#endif - -#if defined POSH_LITTLE_ENDIAN -# define IS_BIG_ENDIAN 0 - -# define NATIVE16 POSH_LittleU16 -# define NATIVE32 POSH_LittleU32 -# define NATIVE64 POSH_LittleU64 -# define FOREIGN16 POSH_BigU16 -# define FOREIGN32 POSH_BigU32 -# define FOREIGN64 POSH_BigU64 -#else -# define IS_BIG_ENDIAN 1 - -# define NATIVE16 POSH_BigU16 -# define NATIVE32 POSH_BigU32 -# define NATIVE64 POSH_BigU64 -# define FOREIGN16 POSH_LittleU16 -# define FOREIGN32 POSH_LittleU32 -# define FOREIGN64 POSH_LittleU64 -#endif /* POSH_LITTLE_ENDIAN */ - -static -int -s_testBigEndian( void ) -{ - union - { - posh_byte_t c[ 4 ]; - posh_u32_t i; - } u; - - u.i= 1; - - if ( u.c[ 0 ] == 1 ) - { - return 0; - } - return 1; -} - -static -const char * -s_testSerialization( void ) -{ - posh_byte_t serbuf[ 8 ]; - posh_u16_t tmp16; - posh_u32_t tmp32; - - /* 16-bit serialization */ - POSH_WriteU16ToLittle( serbuf, 0xABCD ); - if ( ( tmp16 = POSH_ReadU16FromLittle( serbuf ) ) != 0xABCD ) - { - return "*ERROR: failed little-endian 16-bit serialization test"; - } - - POSH_WriteU16ToBig( serbuf, 0xABCD ); - if ( ( tmp16 = POSH_ReadU16FromBig( serbuf ) ) != 0xABCD ) - { - return "*ERROR: failed big-endian 16-bit serialization test"; - } - - /* 32-bit serialization */ - POSH_WriteU32ToLittle( serbuf, 0xABCD1234L ); - if ( ( tmp32 = POSH_ReadU32FromLittle( serbuf ) ) != 0xABCD1234 ) - { - return "*ERROR: failed little-endian 32-bit serialization test"; - } - - POSH_WriteU32ToBig( serbuf, 0xABCD1234L ); - if ( ( tmp32 = POSH_ReadU32FromBig( serbuf ) ) != 0xABCD1234 ) - { - return "*ERROR: failed big-endian 32-bit serialization test"; - } - -#if defined POSH_64BIT_INTEGER - { -#define REF64 POSH_U64(0xFEDCBA9876543210) - - posh_u64_t tmp64; - - POSH_WriteU64ToLittle( serbuf, REF64 ); - - if ( ( tmp64 = POSH_ReadU64FromLittle( serbuf ) ) != REF64 ) - { - return "*ERROR: failed little-endian 64-bit serialization test"; - } - - POSH_WriteU64ToBig( serbuf, REF64 ); - - if ( ( tmp64 = POSH_ReadU64FromBig( serbuf ) ) != REF64 ) - { - return "*ERROR: failed big-endian 64-bit serialization test"; - } - } -#endif - - return 0; -} - -#if !defined POSH_NO_FLOAT -static -const char * -s_testFloatingPoint( void ) -{ - float fRef = 10.0f/30.0f; - double dRef = 10.0/30.0; - posh_byte_t dbuf[ 8 ]; - float fTmp; - double dTmp; - - fTmp = POSH_FloatFromLittleBits( POSH_LittleFloatBits( fRef ) ); - - if ( fTmp != fRef ) - { - return "*ERROR: POSH little endian floating point conversion failed. Please report this to poshlib@poshlib.org!\n"; - } - - fTmp = POSH_FloatFromBigBits( POSH_BigFloatBits( fRef ) ); - if ( fTmp != fRef ) - { - return "*ERROR: POSH big endian floating point conversion failed. Please report this to poshlib@poshlib.org!\n"; - } - - POSH_DoubleBits( dRef, dbuf ); - - dTmp = POSH_DoubleFromBits( dbuf ); - - if ( dTmp != dRef ) - { - return "*ERROR: POSH double precision floating point serialization failed. Please report this to poshlib@poshlib.org!\n"; - } - - return 0; -} -#endif /* !defined POSH_NO_FLOAT */ - -static -const char * -s_testEndianess( void ) -{ - /* check endianess */ - if ( s_testBigEndian() != IS_BIG_ENDIAN ) - { - return "*ERROR: POSH compile time endianess does not match run-time endianess verification. Please report this to poshlib@poshlib.org!\n"; - } - - /* make sure our endian swap routines work */ - if ( ( NATIVE32( 0x11223344L ) != 0x11223344L ) || - ( FOREIGN32( 0x11223344L ) != 0x44332211L ) || - ( NATIVE16( 0x1234 ) != 0x1234 ) || - ( FOREIGN16( 0x1234 ) != 0x3412 ) ) - { - return "*ERROR: POSH endianess macro selection failed. Please report this to poshlib@poshlib.org!\n"; - } - - /* test serialization routines */ - - return 0; -} -#endif /* !defined FORCE_DOXYGEN */ - -/** - Returns a string describing this platform's basic attributes. - - POSH_GetArchString() reports on an architecture's statically determined - attributes. In addition, it will perform run-time verification checks - to make sure the various platform specific functions work. If an error - occurs, please contact me at poshlib@poshlib.org so we can try to resolve - what the specific failure case is. - @returns a string describing this platform on success, or a string in the - form "*ERROR: [text]" on failure. You can simply check to see if - the first character returned is '*' to verify an error condition. -*/ -const char * -POSH_GetArchString( void ) -{ - const char *err; - const char *s = "OS:.............."POSH_OS_STRING"\n" - "CPU:............."POSH_CPU_STRING"\n" - "endian:.........."POSH_ENDIAN_STRING"\n" - "ptr size:........"POSH_POINTER_STRING"\n" - "64-bit ints......"POSH_64BIT_INTEGER_STRING"\n" - "floating point..."POSH_FLOAT_STRING"\n" - "compiler........."POSH_COMPILER_STRING"\n"; - - /* test endianess */ - err = s_testEndianess(); - - if ( err != 0 ) - { - return err; - } - - /* test serialization */ - err = s_testSerialization(); - - if ( err != 0 ) - { - return err; - } - -#if !defined POSH_NO_FLOAT - /* check that our floating point support is correct */ - err = s_testFloatingPoint(); - - if ( err != 0 ) - { - return err; - } - -#endif - - return s; -} - -/* ---------------------------------------------------------------------------*/ -/* BYTE SWAPPING SUPPORT */ -/* ---------------------------------------------------------------------------*/ -/** - * Byte swaps a 16-bit unsigned value - * - @ingroup ByteSwapFunctions - @param v [in] unsigned 16-bit input value to swap - @returns a byte swapped version of v - */ -posh_u16_t -POSH_SwapU16( posh_u16_t v ) -{ - posh_u16_t swapped; - - swapped = v << 8; - swapped |= v >> 8; - - return swapped; -} - -/** - * Byte swaps a 16-bit signed value - * - @ingroup ByteSwapFunctions - @param v [in] signed 16-bit input value to swap - @returns a byte swapped version of v - @remarks This just calls back to the unsigned version, since byte swapping - is independent of sign. However, we still provide this function to - avoid signed/unsigned mismatch compiler warnings. - */ -posh_i16_t -POSH_SwapI16( posh_i16_t v ) -{ - return ( posh_i16_t ) POSH_SwapU16( v ); -} - -/** - * Byte swaps a 32-bit unsigned value - * - @ingroup ByteSwapFunctions - @param v [in] unsigned 32-bit input value to swap - @returns a byte swapped version of v - */ -posh_u32_t -POSH_SwapU32( posh_u32_t v ) -{ - posh_u32_t swapped; - - swapped = ( v & 0xFF ) << 24; - swapped |= ( v & 0xFF00 ) << 8; - swapped |= ( v >> 8 ) & 0xFF00; - swapped |= ( v >> 24 ); - - return swapped; -} - -/** - * Byte swaps a 32-bit signed value - * - @ingroup ByteSwapFunctions - @param v [in] signed 32-bit input value to swap - @returns a byte swapped version of v - @remarks This just calls back to the unsigned version, since byte swapping - is independent of sign. However, we still provide this function to - avoid signed/unsigned mismatch compiler warnings. - */ -posh_i32_t -POSH_SwapI32( posh_i32_t v ) -{ - return ( posh_i32_t ) POSH_SwapU32( ( posh_u32_t ) v ); -} - -#if defined POSH_64BIT_INTEGER -/** - * Byte swaps a 64-bit unsigned value - - @param v [in] a 64-bit input value to swap - @ingroup SixtyFourBit - @returns a byte swapped version of v -*/ -posh_u64_t -POSH_SwapU64( posh_u64_t v ) -{ - posh_byte_t tmp; - union { - posh_byte_t bytes[ 8 ]; - posh_u64_t u64; - } u; - - u.u64 = v; - - tmp = u.bytes[ 0 ]; u.bytes[ 0 ] = u.bytes[ 7 ]; u.bytes[ 7 ] = tmp; - tmp = u.bytes[ 1 ]; u.bytes[ 1 ] = u.bytes[ 6 ]; u.bytes[ 6 ] = tmp; - tmp = u.bytes[ 2 ]; u.bytes[ 2 ] = u.bytes[ 5 ]; u.bytes[ 5 ] = tmp; - tmp = u.bytes[ 3 ]; u.bytes[ 3 ] = u.bytes[ 4 ]; u.bytes[ 4 ] = tmp; - - return u.u64; -} - -/** - * Byte swaps a 64-bit signed value - - @param v [in] a 64-bit input value to swap - @ingroup SixtyFourBit - @returns a byte swapped version of v -*/ -posh_i64_t -POSH_SwapI64( posh_i64_t v ) -{ - return ( posh_i64_t ) POSH_SwapU64( ( posh_u64_t ) v ); -} - -#endif /* defined POSH_64BIT_INTEGER */ - -/* ---------------------------------------------------------------------------*/ -/* IN-MEMORY SERIALIZATION */ -/* ---------------------------------------------------------------------------*/ - -/** - * Writes an unsigned 16-bit value to a little endian buffer - - @ingroup MemoryBuffer - @param dst [out] pointer to the destination buffer, may not be NULL. Alignment doesn't matter. - @param value [in] host-endian unsigned 16-bit value - @returns a pointer to the location two bytes after dst - @remarks does no validation of the inputs -*/ -posh_u16_t * -POSH_WriteU16ToLittle( void *dst, posh_u16_t value ) -{ - posh_u16_t *p16 = ( posh_u16_t * ) dst; - posh_byte_t *p = ( posh_byte_t * ) dst; - - p[ 0 ] = value & 0xFF; - p[ 1 ] = ( value & 0xFF00) >> 8; - - return p16 + 1; -} - -/** - * Writes a signed 16-bit value to a little endian buffer - - @ingroup MemoryBuffer - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian signed 16-bit value - @returns a pointer to the location two bytes after dst - @remarks does no validation of the inputs. This simply calls - POSH_WriteU16ToLittle() with appropriate casting. -*/ -posh_i16_t * -POSH_WriteI16ToLittle( void *dst, posh_i16_t value ) -{ - return ( posh_i16_t * ) POSH_WriteU16ToLittle( dst, ( posh_u16_t ) value ); -} - -/** - * Writes an unsigned 32-bit value to a little endian buffer - - @ingroup MemoryBuffer - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian signed 32-bit value - @returns a pointer to the location four bytes after dst - @remarks does no validation of the inputs. -*/ -posh_u32_t * -POSH_WriteU32ToLittle( void *dst, posh_u32_t value ) -{ - posh_u32_t *p32 = ( posh_u32_t * ) dst; - posh_byte_t *p = ( posh_byte_t * ) dst; - - p[ 0 ] = ( value & 0xFF ); - p[ 1 ] = ( value & 0xFF00 ) >> 8; - p[ 2 ] = ( value & 0xFF0000 ) >> 16; - p[ 3 ] = ( value & 0xFF000000 ) >> 24; - - return p32 + 1; -} - -/** - * Writes a signed 32-bit value to a little endian buffer - - @ingroup MemoryBuffer - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian signed 32-bit value - @returns a pointer to the location four bytes after dst - @remarks does no validation of the inputs. This simply calls - POSH_WriteU32ToLittle() with appropriate casting. -*/ -posh_i32_t * -POSH_WriteI32ToLittle( void *dst, posh_i32_t value ) -{ - return ( posh_i32_t * ) POSH_WriteU32ToLittle( dst, ( posh_u32_t ) value ); -} - -/** - * Writes an unsigned 16-bit value to a big endian buffer - - @ingroup MemoryBuffer - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian unsigned 16-bit value - @returns a pointer to the location two bytes after dst - @remarks does no validation of the inputs -*/ -posh_u16_t * -POSH_WriteU16ToBig( void *dst, posh_u16_t value ) -{ - posh_u16_t *p16 = ( posh_u16_t * ) dst; - posh_byte_t *p = ( posh_byte_t * ) dst; - - p[ 1 ] = ( value & 0xFF ); - p[ 0 ] = ( value & 0xFF00 ) >> 8; - - return p16 + 1; -} - -/** - * Writes a signed 16-bit value to a big endian buffer - - @ingroup MemoryBuffer - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian signed 16-bit value - @returns a pointer to the location two bytes after dst - @remarks does no validation of the inputs. This simply calls - POSH_WriteU16ToLittle() with appropriate casting. -*/ -posh_i16_t * -POSH_WriteI16ToBig( void *dst, posh_i16_t value ) -{ - return ( posh_i16_t * ) POSH_WriteU16ToBig( dst, ( posh_u16_t ) value ); -} - -/** - * Writes an unsigned 32-bit value to a big endian buffer - - @ingroup MemoryBuffer - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian unsigned 32-bit value - @returns a pointer to the location four bytes after dst - @remarks does no validation of the inputs. -*/ -posh_u32_t * -POSH_WriteU32ToBig( void *dst, posh_u32_t value ) -{ - posh_u32_t *p32 = ( posh_u32_t * ) dst; - posh_byte_t *p = ( posh_byte_t * ) dst; - - p[ 3 ] = ( value & 0xFF ); - p[ 2 ] = ( value & 0xFF00 ) >> 8; - p[ 1 ] = ( value & 0xFF0000 ) >> 16; - p[ 0 ] = ( value & 0xFF000000 ) >> 24; - - return p32 + 1; -} - -/** - * Writes a signed 32-bit value to a big endian buffer - - @ingroup MemoryBuffer - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian signed 32-bit value - @returns a pointer to the location four bytes after dst - @remarks does no validation of the inputs. This simply calls - POSH_WriteU32ToBig() with appropriate casting. -*/ -posh_i32_t * -POSH_WriteI32ToBig( void *dst, posh_i32_t value ) -{ - return ( posh_i32_t * ) POSH_WriteU32ToBig( dst, ( posh_u32_t ) value ); -} - -#if defined POSH_64BIT_INTEGER -/** - * Writes an unsigned 64-bit value to a little-endian buffer - - @ingroup SixtyFourBit - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian unsigned 64-bit value - @returns a pointer to the location eight bytes after dst - @remarks does no validation of the inputs. -*/ -posh_u64_t * -POSH_WriteU64ToLittle( void *dst, posh_u64_t value ) -{ - posh_u64_t *p64 = ( posh_u64_t * ) dst; - posh_byte_t *p = ( posh_byte_t * ) dst; - int i; - - for ( i = 0; i < 8; i++, value >>= 8 ) - { - p[ i ] = ( posh_byte_t ) ( value & 0xFF ); - } - - return p64 + 1; -} - -/** - * Writes a signed 64-bit value to a little-endian buffer - - @ingroup SixtyFourBit - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian unsigned 64-bit value - @returns a pointer to the location eight bytes after dst - @remarks does no validation of the inputs. -*/ -posh_i64_t * -POSH_WriteI64ToLittle( void *dst, posh_i64_t value ) -{ - return ( posh_i64_t * ) POSH_WriteU64ToLittle( dst, ( posh_u64_t ) value ); -} - -/** - * Writes an unsigned 64-bit value to a big-endian buffer - - @ingroup SixtyFourBit - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian unsigned 64-bit value - @returns a pointer to the location eight bytes after dst - @remarks does no validation of the inputs. -*/ -posh_u64_t * -POSH_WriteU64ToBig( void *dst, posh_u64_t value ) -{ - posh_u64_t *p64 = ( posh_u64_t * ) dst; - posh_byte_t *p = ( posh_byte_t * ) dst; - int i; - - for ( i = 0; i < 8; i++, value >>= 8 ) - { - p[ 7-i ] = ( posh_byte_t ) ( value & 0xFF ); - } - - return p64 + 8; -} - -/** - * Writes a signed 64-bit value to a big-endian buffer - - @ingroup SixtyFourBit - @param dst [out] pointer to the destination buffer, may not be NULL - @param value [in] host-endian signed 64-bit value - @returns a pointer to the location eight bytes after dst - @remarks does no validation of the inputs. -*/ -posh_i64_t * -POSH_WriteI64ToBig( void *dst, posh_i64_t value ) -{ - return ( posh_i64_t * ) POSH_WriteU64ToBig( dst, ( posh_u64_t ) value ); -} - -#endif /* POSH_64BIT_INTEGER */ - -/* ---------------------------------------------------------------------------*/ -/* IN-MEMORY DESERIALIZATION */ -/* ---------------------------------------------------------------------------*/ - -/** - * Reads an unsigned 16-bit value from a little-endian buffer - @ingroup MemoryBuffer - @param src [in] source buffer - @returns host-endian unsigned 16-bit value -*/ -posh_u16_t -POSH_ReadU16FromLittle( const void *src ) -{ - posh_u16_t v = 0; - posh_byte_t *p = ( posh_byte_t * ) src; - - v |= p[ 0 ]; - v |= ( ( posh_u16_t ) p[ 1 ] ) << 8; - - return v; -} - -/** - * Reads a signed 16-bit value from a little-endian buffer - @ingroup MemoryBuffer - @param src [in] source buffer - @returns host-endian signed 16-bit value -*/ -posh_i16_t -POSH_ReadI16FromLittle( const void *src ) -{ - return ( posh_i16_t ) POSH_ReadU16FromLittle( src ); -} - -/** - * Reads an unsigned 32-bit value from a little-endian buffer - @ingroup MemoryBuffer - @param src [in] source buffer - @returns host-endian unsigned 32-bit value -*/ -posh_u32_t -POSH_ReadU32FromLittle( const void *src ) -{ - posh_u32_t v = 0; - posh_byte_t *p = ( posh_byte_t * ) src; - - v |= p[ 0 ]; - v |= ( ( posh_u32_t ) p[ 1 ] ) << 8; - v |= ( ( posh_u32_t ) p[ 2 ] ) << 16; - v |= ( ( posh_u32_t ) p[ 3 ] ) << 24; - - return v; -} - -/** - * Reads a signed 32-bit value from a little-endian buffer - @ingroup MemoryBuffer - @param src [in] source buffer - @returns host-endian signed 32-bit value -*/ -posh_i32_t -POSH_ReadI32FromLittle( const void *src ) -{ - return ( posh_i32_t ) POSH_ReadU32FromLittle( src ); -} - - -/** - * Reads an unsigned 16-bit value from a big-endian buffer - @ingroup MemoryBuffer - @param src [in] source buffer - @returns host-endian unsigned 16-bit value -*/ -posh_u16_t -POSH_ReadU16FromBig( const void *src ) -{ - posh_u16_t v = 0; - posh_byte_t *p = ( posh_byte_t * ) src; - - v |= p[ 1 ]; - v |= ( ( posh_u16_t ) p[ 0 ] ) << 8; - - return v; -} - -/** - * Reads a signed 16-bit value from a big-endian buffer - @ingroup MemoryBuffer - @param src [in] source buffer - @returns host-endian signed 16-bit value -*/ -posh_i16_t -POSH_ReadI16FromBig( const void *src ) -{ - return ( posh_i16_t ) POSH_ReadU16FromBig( src ); -} - -/** - * Reads an unsigned 32-bit value from a big-endian buffer - @ingroup MemoryBuffer - @param src [in] source buffer - @returns host-endian unsigned 32-bit value -*/ -posh_u32_t -POSH_ReadU32FromBig( const void *src ) -{ - posh_u32_t v = 0; - posh_byte_t *p = ( posh_byte_t * ) src; - - v |= p[ 3 ]; - v |= ( ( posh_u32_t ) p[ 2 ] ) << 8; - v |= ( ( posh_u32_t ) p[ 1 ] ) << 16; - v |= ( ( posh_u32_t ) p[ 0 ] ) << 24; - - return v; -} - -/** - * Reads a signed 32-bit value from a big-endian buffer - @ingroup MemoryBuffer - @param src [in] source buffer - @returns host-endian signed 32-bit value -*/ -posh_i32_t -POSH_ReadI32FromBig( const void *src ) -{ - return POSH_BigI32( (*(const posh_i32_t*)src ) ); -} - -#if defined POSH_64BIT_INTEGER - -/** - * Reads an unsigned 64-bit value from a little-endian buffer - @param src [in] source buffer - @returns host-endian unsigned 32-bit value -*/ -posh_u64_t -POSH_ReadU64FromLittle( const void *src ) -{ - posh_u64_t v = 0; - posh_byte_t *p = ( posh_byte_t * ) src; - int i; - - for ( i = 0; i < 8; i++ ) - { - v |= ( ( posh_u64_t ) p[ i ] ) << (i*8); - } - - return v; -} - -/** - * Reads a signed 64-bit value from a little-endian buffer - @param src [in] source buffer - @returns host-endian signed 32-bit value -*/ -posh_i64_t -POSH_ReadI64FromLittle( const void *src ) -{ - return ( posh_i64_t ) POSH_ReadU64FromLittle( src ); -} - -/** - * Reads an unsigned 64-bit value from a big-endian buffer - @param src [in] source buffer - @returns host-endian unsigned 32-bit value -*/ -posh_u64_t -POSH_ReadU64FromBig( const void *src ) -{ - posh_u64_t v = 0; - posh_byte_t *p = ( posh_byte_t * ) src; - int i; - - for ( i = 0; i < 8; i++ ) - { - v |= ( ( posh_u64_t ) p[ 7-i ] ) << (i*8); - } - - return v; -} - -/** - * Reads an signed 64-bit value from a big-endian buffer - @param src [in] source buffer - @returns host-endian signed 32-bit value -*/ -posh_i64_t -POSH_ReadI64FromBig( const void *src ) -{ - return ( posh_i64_t ) POSH_ReadU64FromBig( src ); -} - -#endif /* POSH_64BIT_INTEGER */ - -/* ---------------------------------------------------------------------------*/ -/* FLOATING POINT SUPPORT */ -/* ---------------------------------------------------------------------------*/ - -#if !defined POSH_NO_FLOAT - -/** @ingroup FloatingPoint - @param[in] f floating point value - @returns a little-endian bit representation of f - */ -posh_u32_t -POSH_LittleFloatBits( float f ) -{ - union - { - float f32; - posh_u32_t u32; - } u; - - u.f32 = f; - - return POSH_LittleU32( u.u32 ); -} - -/** - * Extracts raw big-endian bits from a 32-bit floating point value - * - @ingroup FloatingPoint - @param f [in] floating point value - @returns a big-endian bit representation of f - */ -posh_u32_t -POSH_BigFloatBits( float f ) -{ - union - { - float f32; - posh_u32_t u32; - } u; - - u.f32 = f; - - return POSH_BigU32( u.u32 ); -} - -/** - * Extracts raw, little-endian bit representation from a 64-bit double. - * - @param d [in] 64-bit double precision value - @param dst [out] 8-byte storage buffer - @ingroup FloatingPoint - @returns the raw bits used to represent the value 'd', in the form dst[0]=LSB - */ -void -POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] ) -{ - union - { - double d64; - posh_byte_t bytes[ 8 ]; - } u; - - u.d64 = d; - -#if defined POSH_LITTLE_ENDIAN - dst[ 0 ] = u.bytes[ 0 ]; - dst[ 1 ] = u.bytes[ 1 ]; - dst[ 2 ] = u.bytes[ 2 ]; - dst[ 3 ] = u.bytes[ 3 ]; - dst[ 4 ] = u.bytes[ 4 ]; - dst[ 5 ] = u.bytes[ 5 ]; - dst[ 6 ] = u.bytes[ 6 ]; - dst[ 7 ] = u.bytes[ 7 ]; -#else - dst[ 0 ] = u.bytes[ 7 ]; - dst[ 1 ] = u.bytes[ 6 ]; - dst[ 2 ] = u.bytes[ 5 ]; - dst[ 3 ] = u.bytes[ 4 ]; - dst[ 4 ] = u.bytes[ 3 ]; - dst[ 5 ] = u.bytes[ 2 ]; - dst[ 6 ] = u.bytes[ 1 ]; - dst[ 7 ] = u.bytes[ 0 ]; -#endif -} - -/** - * Creates a double-precision, 64-bit floating point value from a set of raw, - * little-endian bits - - @ingroup FloatingPoint - @param src [in] little-endian byte representation of 64-bit double precision - floating point value - @returns double precision floating point representation of the raw bits - @remarks No error checking is performed, so there are no guarantees that the - result is a valid number, nor is there any check to ensure that src is - non-NULL. BE CAREFUL USING THIS. - */ -double -POSH_DoubleFromBits( const posh_byte_t src[ 8 ] ) -{ - union - { - double d64; - posh_byte_t bytes[ 8 ]; - } u; - -#if defined POSH_LITTLE_ENDIAN - u.bytes[ 0 ] = src[ 0 ]; - u.bytes[ 1 ] = src[ 1 ]; - u.bytes[ 2 ] = src[ 2 ]; - u.bytes[ 3 ] = src[ 3 ]; - u.bytes[ 4 ] = src[ 4 ]; - u.bytes[ 5 ] = src[ 5 ]; - u.bytes[ 6 ] = src[ 6 ]; - u.bytes[ 7 ] = src[ 7 ]; -#else - u.bytes[ 0 ] = src[ 7 ]; - u.bytes[ 1 ] = src[ 6 ]; - u.bytes[ 2 ] = src[ 5 ]; - u.bytes[ 3 ] = src[ 4 ]; - u.bytes[ 4 ] = src[ 3 ]; - u.bytes[ 5 ] = src[ 2 ]; - u.bytes[ 6 ] = src[ 1 ]; - u.bytes[ 7 ] = src[ 0 ]; -#endif - - return u.d64; -} - -/** - * Creates a floating point number from little endian bits - * - @ingroup FloatingPoint - @param bits [in] raw floating point bits in little-endian form - @returns a floating point number based on the given bit representation - @remarks No error checking is performed, so there are no guarantees that the - result is a valid number. BE CAREFUL USING THIS. - */ -float -POSH_FloatFromLittleBits( posh_u32_t bits ) -{ - union - { - float f32; - posh_u32_t u32; - } u; - - u.u32 = bits; -#if defined POSH_BIG_ENDIAN - u.u32 = POSH_SwapU32( u.u32 ); -#endif - - return u.f32; -} - -/** - * Creates a floating point number from big-endian bits - * - @ingroup FloatingPoint - @param bits [in] raw floating point bits in big-endian form - @returns a floating point number based on the given bit representation - @remarks No error checking is performed, so there are no guarantees that the - result is a valid number. BE CAREFUL USING THIS. - */ -float -POSH_FloatFromBigBits( posh_u32_t bits ) -{ - union - { - float f32; - posh_u32_t u32; - } u; - - u.u32 = bits; -#if defined POSH_LITTLE_ENDIAN - u.u32 = POSH_SwapU32( u.u32 ); -#endif - - return u.f32; -} - -#endif /* !defined POSH_NO_FLOAT */ Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/BlockDXT.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/BlockDXT.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/BlockDXT.h @@ -21,202 +21,228 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. +#pragma once #ifndef NV_IMAGE_BLOCKDXT_H #define NV_IMAGE_BLOCKDXT_H -#include +#include "nvimage.h" -#include +#include "nvmath/Color.h" namespace nv { - struct ColorBlock; - class Stream; - - - /// DXT1 block. - struct BlockDXT1 - { - Color16 col0; - Color16 col1; - union { - uint8 row[4]; - uint indices; - }; - - bool isFourColorMode() const; - - uint evaluatePalette(Color32 color_array[4]) const; - uint evaluatePaletteFast(Color32 color_array[4]) const; - void evaluatePalette3(Color32 color_array[4]) const; - void evaluatePalette4(Color32 color_array[4]) const; - - void decodeBlock(ColorBlock * block) const; - - void setIndices(int * idx); - - void flip4(); - void flip2(); - }; - - /// Return true if the block uses four color mode, false otherwise. - inline bool BlockDXT1::isFourColorMode() const - { - return col0.u > col1.u; - } - - - /// DXT3 alpha block with explicit alpha. - struct AlphaBlockDXT3 - { - union { - struct { - uint alpha0 : 4; - uint alpha1 : 4; - uint alpha2 : 4; - uint alpha3 : 4; - uint alpha4 : 4; - uint alpha5 : 4; - uint alpha6 : 4; - uint alpha7 : 4; - uint alpha8 : 4; - uint alpha9 : 4; - uint alphaA : 4; - uint alphaB : 4; - uint alphaC : 4; - uint alphaD : 4; - uint alphaE : 4; - uint alphaF : 4; - }; - uint16 row[4]; - }; - - void decodeBlock(ColorBlock * block) const; - - void flip4(); - void flip2(); - }; - - - /// DXT3 block. - struct BlockDXT3 - { - AlphaBlockDXT3 alpha; - BlockDXT1 color; - - void decodeBlock(ColorBlock * block) const; - - void flip4(); - void flip2(); - }; - - - /// DXT5 alpha block. - struct AlphaBlockDXT5 - { - union { - struct { - uint64 alpha0 : 8; // 8 - uint64 alpha1 : 8; // 16 - uint64 bits0 : 3; // 3 - 19 - uint64 bits1 : 3; // 6 - 22 - uint64 bits2 : 3; // 9 - 25 - uint64 bits3 : 3; // 12 - 28 - uint64 bits4 : 3; // 15 - 31 - uint64 bits5 : 3; // 18 - 34 - uint64 bits6 : 3; // 21 - 37 - uint64 bits7 : 3; // 24 - 40 - uint64 bits8 : 3; // 27 - 43 - uint64 bits9 : 3; // 30 - 46 - uint64 bitsA : 3; // 33 - 49 - uint64 bitsB : 3; // 36 - 52 - uint64 bitsC : 3; // 39 - 55 - uint64 bitsD : 3; // 42 - 58 - uint64 bitsE : 3; // 45 - 61 - uint64 bitsF : 3; // 48 - 64 - }; - uint64 u; - }; - - void evaluatePalette(uint8 alpha[8]) const; - void evaluatePalette8(uint8 alpha[8]) const; - void evaluatePalette6(uint8 alpha[8]) const; - void indices(uint8 index_array[16]) const; - - uint index(uint index) const; - void setIndex(uint index, uint value); - - void decodeBlock(ColorBlock * block) const; - - void flip4(); - void flip2(); - }; - - - /// DXT5 block. - struct BlockDXT5 - { - AlphaBlockDXT5 alpha; - BlockDXT1 color; - - void decodeBlock(ColorBlock * block) const; - - void flip4(); - void flip2(); - }; - - /// ATI1 block. - struct BlockATI1 - { - AlphaBlockDXT5 alpha; - - void decodeBlock(ColorBlock * block) const; - - void flip4(); - void flip2(); - }; - - /// ATI2 block. - struct BlockATI2 - { - AlphaBlockDXT5 x; - AlphaBlockDXT5 y; - + struct ColorBlock; + struct ColorSet; + struct AlphaBlock4x4; + class Stream; + class Vector3; + + + /// DXT1 block. + struct NVIMAGE_CLASS BlockDXT1 + { + Color16 col0; + Color16 col1; + union { + uint8 row[4]; + uint indices; + }; + + bool isFourColorMode() const; + + uint evaluatePalette(Color32 color_array[4], bool d3d9) const; + uint evaluatePaletteNV5x(Color32 color_array[4]) const; + + void evaluatePalette3(Color32 color_array[4], bool d3d9) const; + void evaluatePalette4(Color32 color_array[4], bool d3d9) const; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + void decodeBlockNV5x(ColorBlock * block) const; + + void setIndices(int * idx); + + void flip4(); + void flip2(); + }; + + /// Return true if the block uses four color mode, false otherwise. + inline bool BlockDXT1::isFourColorMode() const + { + return col0.u > col1.u; + } + + + /// DXT3 alpha block with explicit alpha. + struct AlphaBlockDXT3 + { + union { + struct { + uint alpha0 : 4; + uint alpha1 : 4; + uint alpha2 : 4; + uint alpha3 : 4; + uint alpha4 : 4; + uint alpha5 : 4; + uint alpha6 : 4; + uint alpha7 : 4; + uint alpha8 : 4; + uint alpha9 : 4; + uint alphaA : 4; + uint alphaB : 4; + uint alphaC : 4; + uint alphaD : 4; + uint alphaE : 4; + uint alphaF : 4; + }; + uint16 row[4]; + }; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + + void flip4(); + void flip2(); + }; + + + /// DXT3 block. + struct NVIMAGE_CLASS BlockDXT3 + { + AlphaBlockDXT3 alpha; + BlockDXT1 color; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + void decodeBlockNV5x(ColorBlock * block) const; + + void flip4(); + void flip2(); + }; + + + /// DXT5 alpha block. + struct NVIMAGE_CLASS AlphaBlockDXT5 + { + union { + struct { + uint64 alpha0 : 8; // 8 + uint64 alpha1 : 8; // 16 + uint64 bits0 : 3; // 3 - 19 + uint64 bits1 : 3; // 6 - 22 + uint64 bits2 : 3; // 9 - 25 + uint64 bits3 : 3; // 12 - 28 + uint64 bits4 : 3; // 15 - 31 + uint64 bits5 : 3; // 18 - 34 + uint64 bits6 : 3; // 21 - 37 + uint64 bits7 : 3; // 24 - 40 + uint64 bits8 : 3; // 27 - 43 + uint64 bits9 : 3; // 30 - 46 + uint64 bitsA : 3; // 33 - 49 + uint64 bitsB : 3; // 36 - 52 + uint64 bitsC : 3; // 39 - 55 + uint64 bitsD : 3; // 42 - 58 + uint64 bitsE : 3; // 45 - 61 + uint64 bitsF : 3; // 48 - 64 + }; + uint64 u; + }; + + void evaluatePalette(uint8 alpha[8], bool d3d9) const; + void evaluatePalette8(uint8 alpha[8], bool d3d9) const; + void evaluatePalette6(uint8 alpha[8], bool d3d9) const; + void indices(uint8 index_array[16]) const; + + uint index(uint index) const; + void setIndex(uint index, uint value); + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + void decodeBlock(AlphaBlock4x4 * block, bool d3d9 = false) const; + + void flip4(); + void flip2(); + }; + + + /// DXT5 block. + struct NVIMAGE_CLASS BlockDXT5 + { + AlphaBlockDXT5 alpha; + BlockDXT1 color; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + void decodeBlockNV5x(ColorBlock * block) const; + + void flip4(); + void flip2(); + }; + + /// ATI1 block. + struct NVIMAGE_CLASS BlockATI1 + { + AlphaBlockDXT5 alpha; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + + void flip4(); + void flip2(); + }; + + /// ATI2 block. + struct NVIMAGE_CLASS BlockATI2 + { + AlphaBlockDXT5 x; + AlphaBlockDXT5 y; + + void decodeBlock(ColorBlock * block, bool d3d9 = false) const; + + void flip4(); + void flip2(); + }; + + /// CTX1 block. + struct BlockCTX1 + { + uint8 col0[2]; + uint8 col1[2]; + union { + uint8 row[4]; + uint indices; + }; + + void evaluatePalette(Color32 color_array[4]) const; + void setIndices(int * idx); + + void decodeBlock(ColorBlock * block) const; + + void flip4(); + void flip2(); + }; + + /// BC6 block. + struct NVIMAGE_CLASS BlockBC6 + { + uint8 data[16]; // Not even going to try to write a union for this thing. + void decodeBlock(Vector3 colors[16]) const; + }; + + /// BC7 block. + struct NVIMAGE_CLASS BlockBC7 + { + uint8 data[16]; // Not even going to try to write a union for this thing. void decodeBlock(ColorBlock * block) const; - - void flip4(); - void flip2(); }; - /// CTX1 block. - struct BlockCTX1 - { - uint8 col0[2]; - uint8 col1[2]; - union { - uint8 row[4]; - uint indices; - }; - - void evaluatePalette(Color32 color_array[4]) const; - void setIndices(int * idx); - - void decodeBlock(ColorBlock * block) const; - - void flip4(); - void flip2(); - }; - // Serialization functions. - NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT1 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, AlphaBlockDXT3 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT3 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, AlphaBlockDXT5 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT5 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI1 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block); - NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block); + // Serialization functions. + NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT1 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, AlphaBlockDXT3 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT3 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, AlphaBlockDXT5 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT5 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI1 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC6 & block); + NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC7 & block); } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/BlockDXT.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/BlockDXT.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/BlockDXT.cpp @@ -21,584 +21,654 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include - -#include "ColorBlock.h" #include "BlockDXT.h" +#include "ColorBlock.h" + +#include "nvcore/Stream.h" +#include "nvcore/Utils.h" // swap +#include "nvmath/Half.h" +#include "nvmath/Vector.inl" + +#include "bc6h/zoh.h" +#include "bc7/avpcl.h" + using namespace nv; /*---------------------------------------------------------------------------- - BlockDXT1 +BlockDXT1 ----------------------------------------------------------------------------*/ -uint BlockDXT1::evaluatePalette(Color32 color_array[4]) const +uint BlockDXT1::evaluatePalette(Color32 color_array[4], bool d3d9/*= false*/) const { - // Does bit expansion before interpolation. - color_array[0].b = (col0.b << 3) | (col0.b >> 2); - color_array[0].g = (col0.g << 2) | (col0.g >> 4); - color_array[0].r = (col0.r << 3) | (col0.r >> 2); - color_array[0].a = 0xFF; - - // @@ Same as above, but faster? -// Color32 c; -// c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000); -// c.u |= (c.u >> 5) & 0x070007; -// c.u |= (c.u >> 6) & 0x000300; -// color_array[0].u = c.u; - - color_array[1].r = (col1.r << 3) | (col1.r >> 2); - color_array[1].g = (col1.g << 2) | (col1.g >> 4); - color_array[1].b = (col1.b << 3) | (col1.b >> 2); - color_array[1].a = 0xFF; - - // @@ Same as above, but faster? -// c.u = ((col1.u << 3) & 0xf8) | ((col1.u << 5) & 0xfc00) | ((col1.u << 8) & 0xf80000); -// c.u |= (c.u >> 5) & 0x070007; -// c.u |= (c.u >> 6) & 0x000300; -// color_array[1].u = c.u; - - if( col0.u > col1.u ) { - // Four-color block: derive the other two colors. - color_array[2].r = (2 * color_array[0].r + color_array[1].r) / 3; - color_array[2].g = (2 * color_array[0].g + color_array[1].g) / 3; - color_array[2].b = (2 * color_array[0].b + color_array[1].b) / 3; - color_array[2].a = 0xFF; - - color_array[3].r = (2 * color_array[1].r + color_array[0].r) / 3; - color_array[3].g = (2 * color_array[1].g + color_array[0].g) / 3; - color_array[3].b = (2 * color_array[1].b + color_array[0].b) / 3; - color_array[3].a = 0xFF; - - return 4; - } - else { - // Three-color block: derive the other color. - color_array[2].r = (color_array[0].r + color_array[1].r) / 2; - color_array[2].g = (color_array[0].g + color_array[1].g) / 2; - color_array[2].b = (color_array[0].b + color_array[1].b) / 2; - color_array[2].a = 0xFF; - - // Set all components to 0 to match DXT specs. - color_array[3].r = 0x00; // color_array[2].r; - color_array[3].g = 0x00; // color_array[2].g; - color_array[3].b = 0x00; // color_array[2].b; - color_array[3].a = 0x00; - - return 3; - } + // Does bit expansion before interpolation. + color_array[0].b = (col0.b << 3) | (col0.b >> 2); + color_array[0].g = (col0.g << 2) | (col0.g >> 4); + color_array[0].r = (col0.r << 3) | (col0.r >> 2); + color_array[0].a = 0xFF; + + // @@ Same as above, but faster? + // Color32 c; + // c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000); + // c.u |= (c.u >> 5) & 0x070007; + // c.u |= (c.u >> 6) & 0x000300; + // color_array[0].u = c.u; + + color_array[1].r = (col1.r << 3) | (col1.r >> 2); + color_array[1].g = (col1.g << 2) | (col1.g >> 4); + color_array[1].b = (col1.b << 3) | (col1.b >> 2); + color_array[1].a = 0xFF; + + // @@ Same as above, but faster? + // c.u = ((col1.u << 3) & 0xf8) | ((col1.u << 5) & 0xfc00) | ((col1.u << 8) & 0xf80000); + // c.u |= (c.u >> 5) & 0x070007; + // c.u |= (c.u >> 6) & 0x000300; + // color_array[1].u = c.u; + + if( col0.u > col1.u ) { + int bias = 0; + if (d3d9) bias = 1; + + // Four-color block: derive the other two colors. + color_array[2].r = (2 * color_array[0].r + color_array[1].r + bias) / 3; + color_array[2].g = (2 * color_array[0].g + color_array[1].g + bias) / 3; + color_array[2].b = (2 * color_array[0].b + color_array[1].b + bias) / 3; + color_array[2].a = 0xFF; + + color_array[3].r = (2 * color_array[1].r + color_array[0].r + bias) / 3; + color_array[3].g = (2 * color_array[1].g + color_array[0].g + bias) / 3; + color_array[3].b = (2 * color_array[1].b + color_array[0].b + bias) / 3; + color_array[3].a = 0xFF; + + return 4; + } + else { + // Three-color block: derive the other color. + color_array[2].r = (color_array[0].r + color_array[1].r) / 2; + color_array[2].g = (color_array[0].g + color_array[1].g) / 2; + color_array[2].b = (color_array[0].b + color_array[1].b) / 2; + color_array[2].a = 0xFF; + + // Set all components to 0 to match DXT specs. + color_array[3].r = 0x00; // color_array[2].r; + color_array[3].g = 0x00; // color_array[2].g; + color_array[3].b = 0x00; // color_array[2].b; + color_array[3].a = 0x00; + + return 3; + } +} + + +uint BlockDXT1::evaluatePaletteNV5x(Color32 color_array[4]) const +{ + // Does bit expansion before interpolation. + color_array[0].b = (3 * col0.b * 22) / 8; + color_array[0].g = (col0.g << 2) | (col0.g >> 4); + color_array[0].r = (3 * col0.r * 22) / 8; + color_array[0].a = 0xFF; + + color_array[1].r = (3 * col1.r * 22) / 8; + color_array[1].g = (col1.g << 2) | (col1.g >> 4); + color_array[1].b = (3 * col1.b * 22) / 8; + color_array[1].a = 0xFF; + + int gdiff = color_array[1].g - color_array[0].g; + + if( col0.u > col1.u ) { + // Four-color block: derive the other two colors. + color_array[2].r = ((2 * col0.r + col1.r) * 22) / 8; + color_array[2].g = (256 * color_array[0].g + gdiff / 4 + 128 + gdiff * 80) / 256; + color_array[2].b = ((2 * col0.b + col1.b) * 22) / 8; + color_array[2].a = 0xFF; + + color_array[3].r = ((2 * col1.r + col0.r) * 22) / 8; + color_array[3].g = (256 * color_array[1].g - gdiff / 4 + 128 - gdiff * 80) / 256; + color_array[3].b = ((2 * col1.b + col0.b) * 22) / 8; + color_array[3].a = 0xFF; + + return 4; + } + else { + // Three-color block: derive the other color. + color_array[2].r = ((col0.r + col1.r) * 33) / 8; + color_array[2].g = (256 * color_array[0].g + gdiff / 4 + 128 + gdiff * 128) / 256; + color_array[2].b = ((col0.b + col1.b) * 33) / 8; + color_array[2].a = 0xFF; + + // Set all components to 0 to match DXT specs. + color_array[3].r = 0x00; + color_array[3].g = 0x00; + color_array[3].b = 0x00; + color_array[3].a = 0x00; + + return 3; + } } // Evaluate palette assuming 3 color block. -void BlockDXT1::evaluatePalette3(Color32 color_array[4]) const +void BlockDXT1::evaluatePalette3(Color32 color_array[4], bool d3d9) const { - color_array[0].b = (col0.b << 3) | (col0.b >> 2); - color_array[0].g = (col0.g << 2) | (col0.g >> 4); - color_array[0].r = (col0.r << 3) | (col0.r >> 2); - color_array[0].a = 0xFF; - - color_array[1].r = (col1.r << 3) | (col1.r >> 2); - color_array[1].g = (col1.g << 2) | (col1.g >> 4); - color_array[1].b = (col1.b << 3) | (col1.b >> 2); - color_array[1].a = 0xFF; - - // Three-color block: derive the other color. - color_array[2].r = (color_array[0].r + color_array[1].r) / 2; - color_array[2].g = (color_array[0].g + color_array[1].g) / 2; - color_array[2].b = (color_array[0].b + color_array[1].b) / 2; - color_array[2].a = 0xFF; - - // Set all components to 0 to match DXT specs. - color_array[3].r = 0x00; // color_array[2].r; - color_array[3].g = 0x00; // color_array[2].g; - color_array[3].b = 0x00; // color_array[2].b; - color_array[3].a = 0x00; + color_array[0].b = (col0.b << 3) | (col0.b >> 2); + color_array[0].g = (col0.g << 2) | (col0.g >> 4); + color_array[0].r = (col0.r << 3) | (col0.r >> 2); + color_array[0].a = 0xFF; + + color_array[1].r = (col1.r << 3) | (col1.r >> 2); + color_array[1].g = (col1.g << 2) | (col1.g >> 4); + color_array[1].b = (col1.b << 3) | (col1.b >> 2); + color_array[1].a = 0xFF; + + // Three-color block: derive the other color. + color_array[2].r = (color_array[0].r + color_array[1].r) / 2; + color_array[2].g = (color_array[0].g + color_array[1].g) / 2; + color_array[2].b = (color_array[0].b + color_array[1].b) / 2; + color_array[2].a = 0xFF; + + // Set all components to 0 to match DXT specs. + color_array[3].r = 0x00; + color_array[3].g = 0x00; + color_array[3].b = 0x00; + color_array[3].a = 0x00; } // Evaluate palette assuming 4 color block. -void BlockDXT1::evaluatePalette4(Color32 color_array[4]) const +void BlockDXT1::evaluatePalette4(Color32 color_array[4], bool d3d9) const { - color_array[0].b = (col0.b << 3) | (col0.b >> 2); - color_array[0].g = (col0.g << 2) | (col0.g >> 4); - color_array[0].r = (col0.r << 3) | (col0.r >> 2); - color_array[0].a = 0xFF; - - color_array[1].r = (col1.r << 3) | (col1.r >> 2); - color_array[1].g = (col1.g << 2) | (col1.g >> 4); - color_array[1].b = (col1.b << 3) | (col1.b >> 2); - color_array[1].a = 0xFF; - - // Four-color block: derive the other two colors. - color_array[2].r = (2 * color_array[0].r + color_array[1].r) / 3; - color_array[2].g = (2 * color_array[0].g + color_array[1].g) / 3; - color_array[2].b = (2 * color_array[0].b + color_array[1].b) / 3; - color_array[2].a = 0xFF; - - color_array[3].r = (2 * color_array[1].r + color_array[0].r) / 3; - color_array[3].g = (2 * color_array[1].g + color_array[0].g) / 3; - color_array[3].b = (2 * color_array[1].b + color_array[0].b) / 3; - color_array[3].a = 0xFF; -} - + color_array[0].b = (col0.b << 3) | (col0.b >> 2); + color_array[0].g = (col0.g << 2) | (col0.g >> 4); + color_array[0].r = (col0.r << 3) | (col0.r >> 2); + color_array[0].a = 0xFF; -/* Jason Dorie's code. -// ---------------------------------------------------------------------------- -// Build palette for a 3 color + traparent black block -// ---------------------------------------------------------------------------- -void DXTCGen::BuildCodes3(cbVector *pVects, cbVector &v1, cbVector &v2) -{ - //pVects[0] = v1; - //pVects[2] = v2; - //pVects[1][0] = v1[0]; - //pVects[1][1] = (BYTE)( ((long)v1[1] + (long)v2[1]) / 2 ); - //pVects[1][2] = (BYTE)( ((long)v1[2] + (long)v2[2]) / 2 ); - //pVects[1][3] = (BYTE)( ((long)v1[3] + (long)v2[3]) / 2 ); - - __asm { - mov ecx, dword ptr pVects - mov eax, dword ptr v1 - mov ebx, dword ptr v2 - - movd mm0, [eax] - movd mm1, [ebx] - pxor mm2, mm2 - nop - - movd [ecx], mm0 - movd [ecx+8], mm1 + color_array[1].r = (col1.r << 3) | (col1.r >> 2); + color_array[1].g = (col1.g << 2) | (col1.g >> 4); + color_array[1].b = (col1.b << 3) | (col1.b >> 2); + color_array[1].a = 0xFF; - punpcklbw mm0, mm2 - punpcklbw mm1, mm2 + int bias = 0; + if (d3d9) bias = 1; - paddw mm0, mm1 - psrlw mm0, 1 + // Four-color block: derive the other two colors. + color_array[2].r = (2 * color_array[0].r + color_array[1].r + bias) / 3; + color_array[2].g = (2 * color_array[0].g + color_array[1].g + bias) / 3; + color_array[2].b = (2 * color_array[0].b + color_array[1].b + bias) / 3; + color_array[2].a = 0xFF; - packuswb mm0, mm0 - movd [ecx+4], mm0 - } - // *(long *)&pVects[1] = r1; + color_array[3].r = (2 * color_array[1].r + color_array[0].r + bias) / 3; + color_array[3].g = (2 * color_array[1].g + color_array[0].g + bias) / 3; + color_array[3].b = (2 * color_array[1].b + color_array[0].b + bias) / 3; + color_array[3].a = 0xFF; } -__int64 ScaleOneThird = 0x5500550055005500; -// ---------------------------------------------------------------------------- -// Build palette for a 4 color block -// ---------------------------------------------------------------------------- -void DXTCGen::BuildCodes4(cbVector *pVects, cbVector &v1, cbVector &v2) +void BlockDXT1::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const { -// pVects[0] = v1; -// pVects[3] = v2; -// -// pVects[1][0] = v1[0]; -// pVects[1][1] = (BYTE)( ((long)v1[1] * 2 + (long)v2[1]) / 3 ); -// pVects[1][2] = (BYTE)( ((long)v1[2] * 2 + (long)v2[2]) / 3 ); -// pVects[1][3] = (BYTE)( ((long)v1[3] * 2 + (long)v2[3]) / 3 ); -// -// pVects[2][0] = v1[0]; -// pVects[2][1] = (BYTE)( ((long)v2[1] * 2 + (long)v1[1]) / 3 ); -// pVects[2][2] = (BYTE)( ((long)v2[2] * 2 + (long)v1[2]) / 3 ); -// pVects[2][3] = (BYTE)( ((long)v2[3] * 2 + (long)v1[3]) / 3 ); - - __asm { - mov ecx, dword ptr pVects - mov eax, dword ptr v1 - mov ebx, dword ptr v2 - - movd mm0, [eax] - movd mm1, [ebx] - - pxor mm2, mm2 - movd [ecx], mm0 - movd [ecx+12], mm1 - - punpcklbw mm0, mm2 - punpcklbw mm1, mm2 - movq mm3, mm0 // mm3 = v0 - - paddw mm0, mm1 // mm0 = v0 + v1 - paddw mm3, mm3 // mm3 = v0*2 - - paddw mm0, mm1 // mm0 = v0 + v1*2 - paddw mm1, mm3 // mm1 = v0*2 + v1 - - pmulhw mm0, ScaleOneThird - pmulhw mm1, ScaleOneThird - packuswb mm1, mm0 + nvDebugCheck(block != NULL); - movq [ecx+4], mm1 - } + // Decode color block. + Color32 color_array[4]; + evaluatePalette(color_array, d3d9); + + // Write color block. + for( uint j = 0; j < 4; j++ ) { + for( uint i = 0; i < 4; i++ ) { + uint idx = (row[j] >> (2 * i)) & 3; + block->color(i, j) = color_array[idx]; + } + } } -*/ -void BlockDXT1::decodeBlock(ColorBlock * block) const +void BlockDXT1::decodeBlockNV5x(ColorBlock * block) const { - nvDebugCheck(block != NULL); - - // Decode color block. - Color32 color_array[4]; - evaluatePalette(color_array); - - // Write color block. - for( uint j = 0; j < 4; j++ ) { - for( uint i = 0; i < 4; i++ ) { - uint idx = (row[j] >> (2 * i)) & 3; - block->color(i, j) = color_array[idx]; - } - } + nvDebugCheck(block != NULL); + + // Decode color block. + Color32 color_array[4]; + evaluatePaletteNV5x(color_array); + + // Write color block. + for( uint j = 0; j < 4; j++ ) { + for( uint i = 0; i < 4; i++ ) { + uint idx = (row[j] >> (2 * i)) & 3; + block->color(i, j) = color_array[idx]; + } + } } void BlockDXT1::setIndices(int * idx) { - indices = 0; - for(uint i = 0; i < 16; i++) { - indices |= (idx[i] & 3) << (2 * i); - } + indices = 0; + for(uint i = 0; i < 16; i++) { + indices |= (idx[i] & 3) << (2 * i); + } } /// Flip DXT1 block vertically. inline void BlockDXT1::flip4() { - swap(row[0], row[3]); - swap(row[1], row[2]); + swap(row[0], row[3]); + swap(row[1], row[2]); } /// Flip half DXT1 block vertically. inline void BlockDXT1::flip2() { - swap(row[0], row[1]); + swap(row[0], row[1]); } /*---------------------------------------------------------------------------- - BlockDXT3 +BlockDXT3 ----------------------------------------------------------------------------*/ -void BlockDXT3::decodeBlock(ColorBlock * block) const +void BlockDXT3::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const { - nvDebugCheck(block != NULL); - - // Decode color. - color.decodeBlock(block); - - // Decode alpha. - alpha.decodeBlock(block); -} - -void AlphaBlockDXT3::decodeBlock(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - block->color(0x0).a = (alpha0 << 4) | alpha0; - block->color(0x1).a = (alpha1 << 4) | alpha1; - block->color(0x2).a = (alpha2 << 4) | alpha2; - block->color(0x3).a = (alpha3 << 4) | alpha3; - block->color(0x4).a = (alpha4 << 4) | alpha4; - block->color(0x5).a = (alpha5 << 4) | alpha5; - block->color(0x6).a = (alpha6 << 4) | alpha6; - block->color(0x7).a = (alpha7 << 4) | alpha7; - block->color(0x8).a = (alpha8 << 4) | alpha8; - block->color(0x9).a = (alpha9 << 4) | alpha9; - block->color(0xA).a = (alphaA << 4) | alphaA; - block->color(0xB).a = (alphaB << 4) | alphaB; - block->color(0xC).a = (alphaC << 4) | alphaC; - block->color(0xD).a = (alphaD << 4) | alphaD; - block->color(0xE).a = (alphaE << 4) | alphaE; - block->color(0xF).a = (alphaF << 4) | alphaF; + nvDebugCheck(block != NULL); + + // Decode color. + color.decodeBlock(block, d3d9); + + // Decode alpha. + alpha.decodeBlock(block, d3d9); +} + +void BlockDXT3::decodeBlockNV5x(ColorBlock * block) const +{ + nvDebugCheck(block != NULL); + + color.decodeBlockNV5x(block); + alpha.decodeBlock(block); +} + +void AlphaBlockDXT3::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + nvDebugCheck(block != NULL); + + block->color(0x0).a = (alpha0 << 4) | alpha0; + block->color(0x1).a = (alpha1 << 4) | alpha1; + block->color(0x2).a = (alpha2 << 4) | alpha2; + block->color(0x3).a = (alpha3 << 4) | alpha3; + block->color(0x4).a = (alpha4 << 4) | alpha4; + block->color(0x5).a = (alpha5 << 4) | alpha5; + block->color(0x6).a = (alpha6 << 4) | alpha6; + block->color(0x7).a = (alpha7 << 4) | alpha7; + block->color(0x8).a = (alpha8 << 4) | alpha8; + block->color(0x9).a = (alpha9 << 4) | alpha9; + block->color(0xA).a = (alphaA << 4) | alphaA; + block->color(0xB).a = (alphaB << 4) | alphaB; + block->color(0xC).a = (alphaC << 4) | alphaC; + block->color(0xD).a = (alphaD << 4) | alphaD; + block->color(0xE).a = (alphaE << 4) | alphaE; + block->color(0xF).a = (alphaF << 4) | alphaF; } /// Flip DXT3 alpha block vertically. void AlphaBlockDXT3::flip4() { - swap(row[0], row[3]); - swap(row[1], row[2]); + swap(row[0], row[3]); + swap(row[1], row[2]); } /// Flip half DXT3 alpha block vertically. void AlphaBlockDXT3::flip2() { - swap(row[0], row[1]); + swap(row[0], row[1]); } /// Flip DXT3 block vertically. void BlockDXT3::flip4() { - alpha.flip4(); - color.flip4(); + alpha.flip4(); + color.flip4(); } /// Flip half DXT3 block vertically. void BlockDXT3::flip2() { - alpha.flip2(); - color.flip2(); + alpha.flip2(); + color.flip2(); } /*---------------------------------------------------------------------------- - BlockDXT5 +BlockDXT5 ----------------------------------------------------------------------------*/ -void AlphaBlockDXT5::evaluatePalette(uint8 alpha[8]) const +void AlphaBlockDXT5::evaluatePalette(uint8 alpha[8], bool d3d9) const { - if (alpha0 > alpha1) { - evaluatePalette8(alpha); - } - else { - evaluatePalette6(alpha); - } -} - -void AlphaBlockDXT5::evaluatePalette8(uint8 alpha[8]) const -{ - // 8-alpha block: derive the other six alphas. - // Bit code 000 = alpha0, 001 = alpha1, others are interpolated. - alpha[0] = alpha0; - alpha[1] = alpha1; - alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7; // bit code 010 - alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; // bit code 011 - alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; // bit code 100 - alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; // bit code 101 - alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; // bit code 110 - alpha[7] = (1 * alpha[0] + 6 * alpha[1]) / 7; // bit code 111 -} - -void AlphaBlockDXT5::evaluatePalette6(uint8 alpha[8]) const -{ - // 6-alpha block. - // Bit code 000 = alpha0, 001 = alpha1, others are interpolated. - alpha[0] = alpha0; - alpha[1] = alpha1; - alpha[2] = (4 * alpha[0] + 1 * alpha[1]) / 5; // Bit code 010 - alpha[3] = (3 * alpha[0] + 2 * alpha[1]) / 5; // Bit code 011 - alpha[4] = (2 * alpha[0] + 3 * alpha[1]) / 5; // Bit code 100 - alpha[5] = (1 * alpha[0] + 4 * alpha[1]) / 5; // Bit code 101 - alpha[6] = 0x00; // Bit code 110 - alpha[7] = 0xFF; // Bit code 111 + if (alpha0 > alpha1) { + evaluatePalette8(alpha, d3d9); + } + else { + evaluatePalette6(alpha, d3d9); + } +} + +void AlphaBlockDXT5::evaluatePalette8(uint8 alpha[8], bool d3d9) const +{ + int bias = 0; + if (d3d9) bias = 3; + + // 8-alpha block: derive the other six alphas. + // Bit code 000 = alpha0, 001 = alpha1, others are interpolated. + alpha[0] = alpha0; + alpha[1] = alpha1; + alpha[2] = (6 * alpha[0] + 1 * alpha[1] + bias) / 7; // bit code 010 + alpha[3] = (5 * alpha[0] + 2 * alpha[1] + bias) / 7; // bit code 011 + alpha[4] = (4 * alpha[0] + 3 * alpha[1] + bias) / 7; // bit code 100 + alpha[5] = (3 * alpha[0] + 4 * alpha[1] + bias) / 7; // bit code 101 + alpha[6] = (2 * alpha[0] + 5 * alpha[1] + bias) / 7; // bit code 110 + alpha[7] = (1 * alpha[0] + 6 * alpha[1] + bias) / 7; // bit code 111 +} + +void AlphaBlockDXT5::evaluatePalette6(uint8 alpha[8], bool d3d9) const +{ + int bias = 0; + if (d3d9) bias = 2; + + // 6-alpha block. + // Bit code 000 = alpha0, 001 = alpha1, others are interpolated. + alpha[0] = alpha0; + alpha[1] = alpha1; + alpha[2] = (4 * alpha[0] + 1 * alpha[1] + bias) / 5; // Bit code 010 + alpha[3] = (3 * alpha[0] + 2 * alpha[1] + bias) / 5; // Bit code 011 + alpha[4] = (2 * alpha[0] + 3 * alpha[1] + bias) / 5; // Bit code 100 + alpha[5] = (1 * alpha[0] + 4 * alpha[1] + bias) / 5; // Bit code 101 + alpha[6] = 0x00; // Bit code 110 + alpha[7] = 0xFF; // Bit code 111 } void AlphaBlockDXT5::indices(uint8 index_array[16]) const { - index_array[0x0] = bits0; - index_array[0x1] = bits1; - index_array[0x2] = bits2; - index_array[0x3] = bits3; - index_array[0x4] = bits4; - index_array[0x5] = bits5; - index_array[0x6] = bits6; - index_array[0x7] = bits7; - index_array[0x8] = bits8; - index_array[0x9] = bits9; - index_array[0xA] = bitsA; - index_array[0xB] = bitsB; - index_array[0xC] = bitsC; - index_array[0xD] = bitsD; - index_array[0xE] = bitsE; - index_array[0xF] = bitsF; + index_array[0x0] = bits0; + index_array[0x1] = bits1; + index_array[0x2] = bits2; + index_array[0x3] = bits3; + index_array[0x4] = bits4; + index_array[0x5] = bits5; + index_array[0x6] = bits6; + index_array[0x7] = bits7; + index_array[0x8] = bits8; + index_array[0x9] = bits9; + index_array[0xA] = bitsA; + index_array[0xB] = bitsB; + index_array[0xC] = bitsC; + index_array[0xD] = bitsD; + index_array[0xE] = bitsE; + index_array[0xF] = bitsF; } uint AlphaBlockDXT5::index(uint index) const { - nvDebugCheck(index < 16); + nvDebugCheck(index < 16); - int offset = (3 * index + 16); - return uint((this->u >> offset) & 0x7); + int offset = (3 * index + 16); + return uint((this->u >> offset) & 0x7); } void AlphaBlockDXT5::setIndex(uint index, uint value) { - nvDebugCheck(index < 16); - nvDebugCheck(value < 8); + nvDebugCheck(index < 16); + nvDebugCheck(value < 8); - int offset = (3 * index + 16); - uint64 mask = uint64(0x7) << offset; - this->u = (this->u & ~mask) | (uint64(value) << offset); -} - -void AlphaBlockDXT5::decodeBlock(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - uint8 alpha_array[8]; - evaluatePalette(alpha_array); - - uint8 index_array[16]; - indices(index_array); - - for(uint i = 0; i < 16; i++) { - block->color(i).a = alpha_array[index_array[i]]; - } + int offset = (3 * index + 16); + uint64 mask = uint64(0x7) << offset; + this->u = (this->u & ~mask) | (uint64(value) << offset); +} + +void AlphaBlockDXT5::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + nvDebugCheck(block != NULL); + + uint8 alpha_array[8]; + evaluatePalette(alpha_array, d3d9); + + uint8 index_array[16]; + indices(index_array); + + for(uint i = 0; i < 16; i++) { + block->color(i).a = alpha_array[index_array[i]]; + } +} + +void AlphaBlockDXT5::decodeBlock(AlphaBlock4x4 * block, bool d3d9/*= false*/) const +{ + nvDebugCheck(block != NULL); + + uint8 alpha_array[8]; + evaluatePalette(alpha_array, d3d9); + + uint8 index_array[16]; + indices(index_array); + + for(uint i = 0; i < 16; i++) { + block->alpha[i] = alpha_array[index_array[i]]; + } } + void AlphaBlockDXT5::flip4() { - uint64 * b = (uint64 *)this; - - // @@ The masks might have to be byte swapped. - uint64 tmp = (*b & POSH_U64(0x000000000000FFFF)); - tmp |= (*b & POSH_U64(0x000000000FFF0000)) << 36; - tmp |= (*b & POSH_U64(0x000000FFF0000000)) << 12; - tmp |= (*b & POSH_U64(0x000FFF0000000000)) >> 12; - tmp |= (*b & POSH_U64(0xFFF0000000000000)) >> 36; - - *b = tmp; + uint64 * b = (uint64 *)this; + + // @@ The masks might have to be byte swapped. + uint64 tmp = (*b & POSH_U64(0x000000000000FFFF)); + tmp |= (*b & POSH_U64(0x000000000FFF0000)) << 36; + tmp |= (*b & POSH_U64(0x000000FFF0000000)) << 12; + tmp |= (*b & POSH_U64(0x000FFF0000000000)) >> 12; + tmp |= (*b & POSH_U64(0xFFF0000000000000)) >> 36; + + *b = tmp; } void AlphaBlockDXT5::flip2() { - uint * b = (uint *)this; - - // @@ The masks might have to be byte swapped. - uint tmp = (*b & 0xFF000000); - tmp |= (*b & 0x00000FFF) << 12; - tmp |= (*b & 0x00FFF000) >> 12; - - *b = tmp; -} - -void BlockDXT5::decodeBlock(ColorBlock * block) const -{ - nvDebugCheck(block != NULL); - - // Decode color. - color.decodeBlock(block); - - // Decode alpha. - alpha.decodeBlock(block); + uint * b = (uint *)this; + + // @@ The masks might have to be byte swapped. + uint tmp = (*b & 0xFF000000); + tmp |= (*b & 0x00000FFF) << 12; + tmp |= (*b & 0x00FFF000) >> 12; + + *b = tmp; +} + +void BlockDXT5::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const +{ + nvDebugCheck(block != NULL); + + // Decode color. + color.decodeBlock(block, d3d9); + // Decode alpha. + alpha.decodeBlock(block, d3d9); +} + +void BlockDXT5::decodeBlockNV5x(ColorBlock * block) const +{ + nvDebugCheck(block != NULL); + + // Decode color. + color.decodeBlockNV5x(block); + + // Decode alpha. + alpha.decodeBlock(block); } /// Flip DXT5 block vertically. void BlockDXT5::flip4() { - alpha.flip4(); - color.flip4(); + alpha.flip4(); + color.flip4(); } /// Flip half DXT5 block vertically. void BlockDXT5::flip2() { - alpha.flip2(); - color.flip2(); + alpha.flip2(); + color.flip2(); } /// Decode ATI1 block. -void BlockATI1::decodeBlock(ColorBlock * block) const +void BlockATI1::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const { - uint8 alpha_array[8]; - alpha.evaluatePalette(alpha_array); - - uint8 index_array[16]; - alpha.indices(index_array); - - for(uint i = 0; i < 16; i++) { - Color32 & c = block->color(i); - c.b = c.g = c.r = alpha_array[index_array[i]]; - c.a = 255; - } + uint8 alpha_array[8]; + alpha.evaluatePalette(alpha_array, d3d9); + + uint8 index_array[16]; + alpha.indices(index_array); + + for(uint i = 0; i < 16; i++) { + Color32 & c = block->color(i); + c.b = c.g = c.r = alpha_array[index_array[i]]; + c.a = 255; + } } /// Flip ATI1 block vertically. void BlockATI1::flip4() { - alpha.flip4(); + alpha.flip4(); } /// Flip half ATI1 block vertically. void BlockATI1::flip2() { - alpha.flip2(); + alpha.flip2(); } /// Decode ATI2 block. -void BlockATI2::decodeBlock(ColorBlock * block) const +void BlockATI2::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const { - uint8 alpha_array[8]; - uint8 index_array[16]; - - x.evaluatePalette(alpha_array); - x.indices(index_array); - - for(uint i = 0; i < 16; i++) { - Color32 & c = block->color(i); - c.r = alpha_array[index_array[i]]; - } + uint8 alpha_array[8]; + uint8 index_array[16]; - y.evaluatePalette(alpha_array); - y.indices(index_array); - - for(uint i = 0; i < 16; i++) { - Color32 & c = block->color(i); - c.g = alpha_array[index_array[i]]; - c.b = 0; - c.a = 255; - } + x.evaluatePalette(alpha_array, d3d9); + x.indices(index_array); + + for(uint i = 0; i < 16; i++) { + Color32 & c = block->color(i); + c.r = alpha_array[index_array[i]]; + } + + y.evaluatePalette(alpha_array, d3d9); + y.indices(index_array); + + for(uint i = 0; i < 16; i++) { + Color32 & c = block->color(i); + c.g = alpha_array[index_array[i]]; + c.b = 0; + c.a = 255; + } } /// Flip ATI2 block vertically. void BlockATI2::flip4() { - x.flip4(); - y.flip4(); + x.flip4(); + y.flip4(); } /// Flip half ATI2 block vertically. void BlockATI2::flip2() { - x.flip2(); - y.flip2(); + x.flip2(); + y.flip2(); } void BlockCTX1::evaluatePalette(Color32 color_array[4]) const { - // Does bit expansion before interpolation. - color_array[0].b = 0x00; - color_array[0].g = col0[1]; - color_array[0].r = col0[0]; - color_array[0].a = 0xFF; - - color_array[1].r = 0x00; - color_array[1].g = col0[1]; - color_array[1].b = col1[0]; - color_array[1].a = 0xFF; - - color_array[2].r = 0x00; - color_array[2].g = (2 * color_array[0].g + color_array[1].g) / 3; - color_array[2].b = (2 * color_array[0].b + color_array[1].b) / 3; - color_array[2].a = 0xFF; - - color_array[3].r = 0x00; - color_array[3].g = (2 * color_array[1].g + color_array[0].g) / 3; - color_array[3].b = (2 * color_array[1].b + color_array[0].b) / 3; - color_array[3].a = 0xFF; + // Does bit expansion before interpolation. + color_array[0].b = 0x00; + color_array[0].g = col0[1]; + color_array[0].r = col0[0]; + color_array[0].a = 0xFF; + + color_array[1].r = 0x00; + color_array[1].g = col0[1]; + color_array[1].b = col1[0]; + color_array[1].a = 0xFF; + + color_array[2].r = 0x00; + color_array[2].g = (2 * color_array[0].g + color_array[1].g) / 3; + color_array[2].b = (2 * color_array[0].b + color_array[1].b) / 3; + color_array[2].a = 0xFF; + + color_array[3].r = 0x00; + color_array[3].g = (2 * color_array[1].g + color_array[0].g) / 3; + color_array[3].b = (2 * color_array[1].b + color_array[0].b) / 3; + color_array[3].a = 0xFF; } void BlockCTX1::decodeBlock(ColorBlock * block) const { - nvDebugCheck(block != NULL); - - // Decode color block. - Color32 color_array[4]; - evaluatePalette(color_array); - - // Write color block. - for( uint j = 0; j < 4; j++ ) { - for( uint i = 0; i < 4; i++ ) { - uint idx = (row[j] >> (2 * i)) & 3; - block->color(i, j) = color_array[idx]; - } - } + nvDebugCheck(block != NULL); + + // Decode color block. + Color32 color_array[4]; + evaluatePalette(color_array); + + // Write color block. + for( uint j = 0; j < 4; j++ ) { + for( uint i = 0; i < 4; i++ ) { + uint idx = (row[j] >> (2 * i)) & 3; + block->color(i, j) = color_array[idx]; + } + } } void BlockCTX1::setIndices(int * idx) { - indices = 0; - for(uint i = 0; i < 16; i++) { - indices |= (idx[i] & 3) << (2 * i); + indices = 0; + for(uint i = 0; i < 16; i++) { + indices |= (idx[i] & 3) << (2 * i); + } +} + + +/// Decode BC6 block. +void BlockBC6::decodeBlock(Vector3 colors[16]) const +{ + ZOH::Tile tile(4, 4); + ZOH::decompress((const char *)data, tile); + + // Convert ZOH's tile struct to Vector3, and convert half to float. + for (uint y = 0; y < 4; ++y) + { + for (uint x = 0; x < 4; ++x) + { + uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x); + uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y); + uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z); + colors[y * 4 + x].x = to_float(rHalf); + colors[y * 4 + x].y = to_float(gHalf); + colors[y * 4 + x].z = to_float(bHalf); + } + } +} + + +/// Decode BC7 block. +void BlockBC7::decodeBlock(ColorBlock * block) const +{ + AVPCL::Tile tile(4, 4); + AVPCL::decompress((const char *)data, tile); + + // Convert AVPCL's tile struct back to NVTT's. + for (uint y = 0; y < 4; ++y) + { + for (uint x = 0; x < 4; ++x) + { + Vector4 rgba = tile.data[y][x]; + // Note: decoded rgba values are in [0, 255] range and should be an integer, + // because BC7 never uses more than 8 bits per channel. So no need to round. + block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w)); + } } } @@ -606,14 +676,14 @@ /// Flip CTX1 block vertically. inline void BlockCTX1::flip4() { - swap(row[0], row[3]); - swap(row[1], row[2]); + swap(row[0], row[3]); + swap(row[1], row[2]); } /// Flip half CTX1 block vertically. inline void BlockCTX1::flip2() { - swap(row[0], row[1]); + swap(row[0], row[1]); } @@ -621,46 +691,57 @@ Stream & nv::operator<<(Stream & stream, BlockDXT1 & block) { - stream << block.col0.u << block.col1.u; - stream.serialize(&block.indices, sizeof(block.indices)); - return stream; + stream << block.col0.u << block.col1.u; + stream.serialize(&block.indices, sizeof(block.indices)); + return stream; } Stream & nv::operator<<(Stream & stream, AlphaBlockDXT3 & block) { - stream.serialize(&block, sizeof(block)); - return stream; + stream.serialize(&block, sizeof(block)); + return stream; } Stream & nv::operator<<(Stream & stream, BlockDXT3 & block) { - return stream << block.alpha << block.color; + return stream << block.alpha << block.color; } Stream & nv::operator<<(Stream & stream, AlphaBlockDXT5 & block) { - stream.serialize(&block, sizeof(block)); - return stream; + stream.serialize(&block, sizeof(block)); + return stream; } Stream & nv::operator<<(Stream & stream, BlockDXT5 & block) { - return stream << block.alpha << block.color; + return stream << block.alpha << block.color; } Stream & nv::operator<<(Stream & stream, BlockATI1 & block) { - return stream << block.alpha; + return stream << block.alpha; } Stream & nv::operator<<(Stream & stream, BlockATI2 & block) { - return stream << block.x << block.y; + return stream << block.x << block.y; } Stream & nv::operator<<(Stream & stream, BlockCTX1 & block) { - stream.serialize(&block, sizeof(block)); - return stream; + stream.serialize(&block, sizeof(block)); + return stream; } +Stream & nv::operator<<(Stream & stream, BlockBC6 & block) +{ + stream.serialize(&block, sizeof(block)); + return stream; +} + +Stream & nv::operator<<(Stream & stream, BlockBC7 & block) +{ + stream.serialize(&block, sizeof(block)); + return stream; +} Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/CMakeLists.txt @@ -1,68 +1,62 @@ PROJECT(nvimage) SET(IMAGE_SRCS - nvimage.h - FloatImage.h - FloatImage.cpp - Filter.h - Filter.cpp - Image.h - Image.cpp - ImageIO.h - ImageIO.cpp - ColorBlock.h - ColorBlock.cpp - BlockDXT.h - BlockDXT.cpp - HoleFilling.h - HoleFilling.cpp - DirectDrawSurface.h - DirectDrawSurface.cpp - Quantize.h - Quantize.cpp - NormalMap.h - NormalMap.cpp - NormalMipmap.h - NormalMipmap.cpp - PsdFile.h - TgaFile.h) + nvimage.h + BlockDXT.h BlockDXT.cpp + ColorBlock.h ColorBlock.cpp + DirectDrawSurface.h DirectDrawSurface.cpp + ErrorMetric.h ErrorMetric.cpp + Filter.h Filter.cpp + FloatImage.h FloatImage.cpp + Image.h Image.cpp + ImageIO.h ImageIO.cpp + #KtxFile.h KtxFile.cpp + NormalMap.h NormalMap.cpp + PixelFormat.h + PsdFile.h + TgaFile.h) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) IF(PNG_FOUND) - SET(LIBS ${LIBS} ${PNG_LIBRARIES}) - INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR}) + SET(LIBS ${LIBS} ${PNG_LIBRARIES}) + INCLUDE_DIRECTORIES(${PNG_INCLUDE_DIR}) ENDIF(PNG_FOUND) IF(JPEG_FOUND) - SET(LIBS ${LIBS} ${JPEG_LIBRARIES}) - INCLUDE_DIRECTORIES(${JPEG_INCLUDE_DIR}) + SET(LIBS ${LIBS} ${JPEG_LIBRARIES}) + INCLUDE_DIRECTORIES(${JPEG_INCLUDE_DIR}) ENDIF(JPEG_FOUND) IF(TIFF_FOUND) - SET(LIBS ${LIBS} ${TIFF_LIBRARIES}) - INCLUDE_DIRECTORIES(${TIFF_INCLUDE_DIR}) + SET(LIBS ${LIBS} ${TIFF_LIBRARIES}) + INCLUDE_DIRECTORIES(${TIFF_INCLUDE_DIR}) ENDIF(TIFF_FOUND) IF(OPENEXR_FOUND) - SET(LIBS ${LIBS} ${OPENEXR_LIBRARIES}) - INCLUDE_DIRECTORIES(${OPENEXR_INCLUDE_PATHS}) + SET(LIBS ${LIBS} ${OPENEXR_LIBRARIES}) + INCLUDE_DIRECTORIES(${OPENEXR_INCLUDE_PATHS}) ENDIF(OPENEXR_FOUND) +IF(FREEIMAGE_FOUND) + SET(LIBS ${LIBS} ${FREEIMAGE_LIBRARIES}) + INCLUDE_DIRECTORIES(${FREEIMAGE_INCLUDE_PATH}) +ENDIF(FREEIMAGE_FOUND) + # targets ADD_DEFINITIONS(-DNVIMAGE_EXPORTS) -IF(NVIMAGE_SHARED) - ADD_DEFINITIONS(-DNVIMAGE_SHARED=1) - ADD_LIBRARY(nvimage SHARED ${IMAGE_SRCS}) +IF(NVIMAGE_SHARED) + ADD_DEFINITIONS(-DNVIMAGE_SHARED=1) + ADD_LIBRARY(nvimage SHARED ${IMAGE_SRCS}) ELSE(NVIMAGE_SHARED) - ADD_LIBRARY(nvimage ${IMAGE_SRCS}) + ADD_LIBRARY(nvimage ${IMAGE_SRCS}) ENDIF(NVIMAGE_SHARED) -TARGET_LINK_LIBRARIES(nvimage ${LIBS} nvcore nvmath posh) +TARGET_LINK_LIBRARIES(nvimage ${LIBS} nvcore posh bc6h bc7 nvmath) INSTALL(TARGETS nvimage - RUNTIME DESTINATION ${BINDIR} - LIBRARY DESTINATION ${LIBDIR} - ARCHIVE DESTINATION ${LIBDIR}) + RUNTIME DESTINATION ${BINDIR} + LIBRARY DESTINATION ${LIBDIR} + ARCHIVE DESTINATION ${LIBDIR}) Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorBlock.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorBlock.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorBlock.h @@ -1,95 +1,163 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_IMAGE_COLORBLOCK_H #define NV_IMAGE_COLORBLOCK_H -#include +#include "nvimage.h" + +#include "nvmath/Color.h" +#include "nvmath/Vector.h" namespace nv { - class Image; + class Image; + class FloatImage; + + + /// Uncompressed 4x4 color block. + struct NVIMAGE_CLASS ColorBlock + { + ColorBlock(); + ColorBlock(const uint * linearImage); + ColorBlock(const ColorBlock & block); + ColorBlock(const Image * img, uint x, uint y); + + void init(const Image * img, uint x, uint y); + void init(uint w, uint h, const uint * data, uint x, uint y); + void init(uint w, uint h, const float * data, uint x, uint y); + + void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 + + bool isSingleColor(Color32 mask = Color32(0xFF, 0xFF, 0xFF, 0x00)) const; + bool hasAlpha() const; + + + // Accessors + const Color32 * colors() const; + + Color32 color(uint i) const; + Color32 & color(uint i); + + Color32 color(uint x, uint y) const; + Color32 & color(uint x, uint y); + + private: + + Color32 m_color[4*4]; + + }; + + + /// Get pointer to block colors. + inline const Color32 * ColorBlock::colors() const + { + return m_color; + } + + /// Get block color. + inline Color32 ColorBlock::color(uint i) const + { + nvDebugCheck(i < 16); + return m_color[i]; + } + + /// Get block color. + inline Color32 & ColorBlock::color(uint i) + { + nvDebugCheck(i < 16); + return m_color[i]; + } + + /// Get block color. + inline Color32 ColorBlock::color(uint x, uint y) const + { + nvDebugCheck(x < 4 && y < 4); + return m_color[y * 4 + x]; + } + + /// Get block color. + inline Color32 & ColorBlock::color(uint x, uint y) + { + nvDebugCheck(x < 4 && y < 4); + return m_color[y * 4 + x]; + } + + /* + struct ColorSet + { + ColorSet() : colorCount(0), indexCount(0), w(0), h(0) {} + //~ColorSet() {} + + void allocate(uint w, uint h); + + void setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y); + void setColors(const Vector3 colors[16], const float weights[16]); + void setColors(const Vector4 colors[16], const float weights[16]); + + void setAlphaWeights(); + void setUniformWeights(); + + void createMinimalSet(bool ignoreTransparent); + void wrapIndices(); + + void swizzle(uint x, uint y, uint z, uint w); // 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 + + bool isSingleColor(bool ignoreAlpha) const; + bool hasAlpha() const; + + // These methods require indices to be set: + Vector4 color(uint x, uint y) const { nvDebugCheck(x < w && y < h); return colors[indices[y * 4 + x]]; } + Vector4 & color(uint x, uint y) { nvDebugCheck(x < w && y < h); return colors[indices[y * 4 + x]]; } + + Vector4 color(uint i) const { nvDebugCheck(i < indexCount); return colors[indices[i]]; } + Vector4 & color(uint i) { nvDebugCheck(i < indexCount); return colors[indices[i]]; } + + float weight(uint i) const { nvDebugCheck(i < indexCount); return weights[indices[i]]; } + + bool isValidIndex(uint i) const { return i < indexCount && indices[i] >= 0; } + + uint colorCount; + uint indexCount; // Fixed to 16 + uint w, h; // Fixed to 4x4 + + // Allocate color set dynamically and add support for sets larger than 4x4. + Vector4 colors[16]; + float weights[16]; // @@ Add mask to indicate what color components are weighted? + int indices[16]; + }; + */ + + + /// Uncompressed 4x4 alpha block. + struct NVIMAGE_CLASS AlphaBlock4x4 + { + void init(uint8 value); + void init(const ColorBlock & src, uint channel); + //void init(const ColorSet & src, uint channel); + + //void initMaxRGB(const ColorSet & src, float threshold); + //void initWeights(const ColorSet & src); + + uint8 alpha[4*4]; + float weights[16]; + }; + + + struct FloatAlphaBlock4x4 + { + float alphas[4 * 4]; + float weights[4 * 4]; + }; + + struct FloatColorBlock4x4 + { + Vector4 colors[4 * 4]; + float weights[4 * 4]; + }; + + - /// Uncompressed 4x4 color block. - struct ColorBlock - { - ColorBlock(); - ColorBlock(const uint * linearImage); - ColorBlock(const ColorBlock & block); - ColorBlock(const Image * img, uint x, uint y); - - void init(const Image * img, uint x, uint y); - - void swizzleDXT5n(); - void splatX(); - void splatY(); - - bool isSingleColor() const; - uint countUniqueColors() const; - Color32 averageColor() const; - bool hasAlpha() const; - - void diameterRange(Color32 * start, Color32 * end) const; - void luminanceRange(Color32 * start, Color32 * end) const; - void boundsRange(Color32 * start, Color32 * end) const; - void boundsRangeAlpha(Color32 * start, Color32 * end) const; - - void sortColorsByAbsoluteValue(); - - void computeRange(const Vector3 & axis, Color32 * start, Color32 * end) const; - void sortColors(const Vector3 & axis); - - float volume() const; - - // Accessors - const Color32 * colors() const; - - Color32 color(uint i) const; - Color32 & color(uint i); - - Color32 color(uint x, uint y) const; - Color32 & color(uint x, uint y); - - private: - - Color32 m_color[4*4]; - - }; - - - /// Get pointer to block colors. - inline const Color32 * ColorBlock::colors() const - { - return m_color; - } - - /// Get block color. - inline Color32 ColorBlock::color(uint i) const - { - nvDebugCheck(i < 16); - return m_color[i]; - } - - /// Get block color. - inline Color32 & ColorBlock::color(uint i) - { - nvDebugCheck(i < 16); - return m_color[i]; - } - - /// Get block color. - inline Color32 ColorBlock::color(uint x, uint y) const - { - nvDebugCheck(x < 4 && y < 4); - return m_color[y * 4 + x]; - } - - /// Get block color. - inline Color32 & ColorBlock::color(uint x, uint y) - { - nvDebugCheck(x < 4 && y < 4); - return m_color[y * 4 + x]; - } - } // nv namespace #endif // NV_IMAGE_COLORBLOCK_H Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorBlock.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorBlock.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorBlock.cpp @@ -1,25 +1,33 @@ // This code is in the public domain -- castanyo@yahoo.es -#include -#include -#include +#include "ColorBlock.h" +#include "Image.h" +#include "FloatImage.h" + +#include "nvmath/Box.h" +#include "nvmath/Vector.inl" +#include "nvmath/ftoi.h" + +#include "nvcore/Utils.h" // swap + +#include // memcpy using namespace nv; namespace { - - // Get approximate luminance. - inline static uint colorLuminance(Color32 c) - { - return c.r + c.g + c.b; - } - - // Get the euclidean distance between the given colors. - inline static uint colorDistance(Color32 c0, Color32 c1) - { - return (c0.r - c1.r) * (c0.r - c1.r) + (c0.g - c1.g) * (c0.g - c1.g) + (c0.b - c1.b) * (c0.b - c1.b); - } - + + // Get approximate luminance. + inline static uint colorLuminance(Color32 c) + { + return c.r + c.g + c.b; + } + + // Get the euclidean distance between the given colors. + inline static uint colorDistance(Color32 c0, Color32 c1) + { + return (c0.r - c1.r) * (c0.r - c1.r) + (c0.g - c1.g) * (c0.g - c1.g) + (c0.b - c1.b) * (c0.b - c1.b); + } + } // namespace` @@ -31,374 +39,701 @@ /// Init the color block from an array of colors. ColorBlock::ColorBlock(const uint * linearImage) { - for(uint i = 0; i < 16; i++) { - color(i) = Color32(linearImage[i]); - } + for(uint i = 0; i < 16; i++) { + color(i) = Color32(linearImage[i]); + } } /// Init the color block with the contents of the given block. ColorBlock::ColorBlock(const ColorBlock & block) { - for(uint i = 0; i < 16; i++) { - color(i) = block.color(i); - } + for(uint i = 0; i < 16; i++) { + color(i) = block.color(i); + } } /// Initialize this color block. ColorBlock::ColorBlock(const Image * img, uint x, uint y) { - init(img, x, y); + init(img, x, y); } void ColorBlock::init(const Image * img, uint x, uint y) { - nvDebugCheck(img != NULL); - - const uint bw = min(img->width() - x, 4U); - const uint bh = min(img->height() - y, 4U); + init(img->width(), img->height(), (const uint *)img->pixels(), x, y); +} - nvDebugCheck(bw != 0); - nvDebugCheck(bh != 0); +void ColorBlock::init(uint w, uint h, const uint * data, uint x, uint y) +{ + nvDebugCheck(data != NULL); - static int remainder[] = { - 0, 0, 0, 0, - 0, 1, 0, 1, - 0, 1, 2, 0, - 0, 1, 2, 3, - }; + const uint bw = min(w - x, 4U); + const uint bh = min(h - y, 4U); + nvDebugCheck(bw != 0 && bh != 0); - // Blocks that are smaller than 4x4 are handled by repeating the pixels. - // @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :( + // Blocks that are smaller than 4x4 are handled by repeating the pixels. + // @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :( + // @@ Ideally we should zero the weights of the pixels out of range. - for(uint i = 0; i < 4; i++) { - //const int by = i % bh; - const int by = remainder[(bh - 1) * 4 + i]; - for(uint e = 0; e < 4; e++) { - //const int bx = e % bw; - const int bx = remainder[(bw - 1) * 4 + e]; - color(e, i) = img->pixel(x + bx, y + by); - } - } -} + for (uint i = 0; i < 4; i++) + { + const int by = i % bh; + for (uint e = 0; e < 4; e++) + { + const int bx = e % bw; + const uint idx = (y + by) * w + x + bx; -void ColorBlock::swizzleDXT5n() + color(e, i).u = data[idx]; + } + } +} + +void ColorBlock::init(uint w, uint h, const float * data, uint x, uint y) { - for(int i = 0; i < 16; i++) - { - Color32 c = m_color[i]; - m_color[i] = Color32(0xFF, c.g, 0, c.r); - } + nvDebugCheck(data != NULL); + + const uint bw = min(w - x, 4U); + const uint bh = min(h - y, 4U); + nvDebugCheck(bw != 0 && bh != 0); + + // Blocks that are smaller than 4x4 are handled by repeating the pixels. + // @@ Thats only correct when block size is 1, 2 or 4, but not with 3. :( + // @@ Ideally we should zero the weights of the pixels out of range. + + uint srcPlane = w * h; + + for (uint i = 0; i < 4; i++) + { + const uint by = i % bh; + + for (uint e = 0; e < 4; e++) + { + const uint bx = e % bw; + const uint idx = ((y + by) * w + x + bx); + + Color32 & c = color(e, i); + c.r = uint8(255 * clamp(data[idx + 0 * srcPlane], 0.0f, 1.0f)); // @@ Is this the right way to quantize floats to bytes? + c.g = uint8(255 * clamp(data[idx + 1 * srcPlane], 0.0f, 1.0f)); + c.b = uint8(255 * clamp(data[idx + 2 * srcPlane], 0.0f, 1.0f)); + c.a = uint8(255 * clamp(data[idx + 3 * srcPlane], 0.0f, 1.0f)); + } + } } -void ColorBlock::splatX() +static inline uint8 component(Color32 c, uint i) { - for(int i = 0; i < 16; i++) - { - uint8 x = m_color[i].r; - m_color[i] = Color32(x, x, x, x); - } + if (i == 0) return c.r; + if (i == 1) return c.g; + if (i == 2) return c.b; + if (i == 3) return c.a; + if (i == 4) return 0xFF; + return 0; } -void ColorBlock::splatY() +void ColorBlock::swizzle(uint x, uint y, uint z, uint w) { - for(int i = 0; i < 16; i++) - { - uint8 y = m_color[i].g; - m_color[i] = Color32(y, y, y, y); - } + for (int i = 0; i < 16; i++) + { + Color32 c = m_color[i]; + m_color[i].r = component(c, x); + m_color[i].g = component(c, y); + m_color[i].b = component(c, z); + m_color[i].a = component(c, w); + } } + /// Returns true if the block has a single color. -bool ColorBlock::isSingleColor() const +bool ColorBlock::isSingleColor(Color32 mask/*= Color32(0xFF, 0xFF, 0xFF, 0x00)*/) const { - Color32 mask(0xFF, 0xFF, 0xFF, 0x00); - uint u = m_color[0].u & mask.u; - - for (int i = 1; i < 16; i++) - { - if (u != (m_color[i].u & mask.u)) - { - return false; - } - } - - return true; + uint u = m_color[0].u & mask.u; + + for (int i = 1; i < 16; i++) + { + if (u != (m_color[i].u & mask.u)) + { + return false; + } + } + + return true; +} + +/* +/// Returns true if the block has a single color, ignoring transparent pixels. +bool ColorBlock::isSingleColorNoAlpha() const +{ + Color32 c; + int i; + for(i = 0; i < 16; i++) + { + if (m_color[i].a != 0) c = m_color[i]; + } + + Color32 mask(0xFF, 0xFF, 0xFF, 0x00); + uint u = c.u & mask.u; + + for(; i < 16; i++) + { + if (u != (m_color[i].u & mask.u)) + { + return false; + } + } + + return true; } +*/ /// Count number of unique colors in this color block. -uint ColorBlock::countUniqueColors() const +/*uint ColorBlock::countUniqueColors() const { - uint count = 0; + uint count = 0; - // @@ This does not have to be o(n^2) - for(int i = 0; i < 16; i++) - { - bool unique = true; - for(int j = 0; j < i; j++) { - if( m_color[i] != m_color[j] ) { - unique = false; - } - } - - if( unique ) { - count++; - } - } - - return count; -} + // @@ This does not have to be o(n^2) + for(int i = 0; i < 16; i++) + { + bool unique = true; + for(int j = 0; j < i; j++) { + if( m_color[i] != m_color[j] ) { + unique = false; + } + } + + if( unique ) { + count++; + } + } -/// Get average color of the block. + return count; +}*/ + +/*/// Get average color of the block. Color32 ColorBlock::averageColor() const { - uint r, g, b, a; - r = g = b = a = 0; + uint r, g, b, a; + r = g = b = a = 0; - for(uint i = 0; i < 16; i++) { - r += m_color[i].r; - g += m_color[i].g; - b += m_color[i].b; - a += m_color[i].a; - } - - return Color32(uint8(r / 16), uint8(g / 16), uint8(b / 16), uint8(a / 16)); -} + for(uint i = 0; i < 16; i++) { + r += m_color[i].r; + g += m_color[i].g; + b += m_color[i].b; + a += m_color[i].a; + } + + return Color32(uint8(r / 16), uint8(g / 16), uint8(b / 16), uint8(a / 16)); +}*/ /// Return true if the block is not fully opaque. bool ColorBlock::hasAlpha() const { - for (uint i = 0; i < 16; i++) - { - if (m_color[i].a != 255) return true; - } - return false; + for (uint i = 0; i < 16; i++) + { + if (m_color[i].a != 255) return true; + } + return false; } +#if 0 /// Get diameter color range. void ColorBlock::diameterRange(Color32 * start, Color32 * end) const { - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); - - Color32 c0, c1; - uint best_dist = 0; - - for(int i = 0; i < 16; i++) { - for (int j = i+1; j < 16; j++) { - uint dist = colorDistance(m_color[i], m_color[j]); - if( dist > best_dist ) { - best_dist = dist; - c0 = m_color[i]; - c1 = m_color[j]; - } - } - } - - *start = c0; - *end = c1; + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); + + Color32 c0, c1; + uint best_dist = 0; + + for(int i = 0; i < 16; i++) { + for (int j = i+1; j < 16; j++) { + uint dist = colorDistance(m_color[i], m_color[j]); + if( dist > best_dist ) { + best_dist = dist; + c0 = m_color[i]; + c1 = m_color[j]; + } + } + } + + *start = c0; + *end = c1; } /// Get luminance color range. void ColorBlock::luminanceRange(Color32 * start, Color32 * end) const { - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); - - Color32 minColor, maxColor; - uint minLuminance, maxLuminance; - - maxLuminance = minLuminance = colorLuminance(m_color[0]); - - for(uint i = 1; i < 16; i++) - { - uint luminance = colorLuminance(m_color[i]); - - if (luminance > maxLuminance) { - maxLuminance = luminance; - maxColor = m_color[i]; - } - else if (luminance < minLuminance) { - minLuminance = luminance; - minColor = m_color[i]; - } - } + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); + + Color32 minColor, maxColor; + uint minLuminance, maxLuminance; + + maxLuminance = minLuminance = colorLuminance(m_color[0]); - *start = minColor; - *end = maxColor; + for(uint i = 1; i < 16; i++) + { + uint luminance = colorLuminance(m_color[i]); + + if (luminance > maxLuminance) { + maxLuminance = luminance; + maxColor = m_color[i]; + } + else if (luminance < minLuminance) { + minLuminance = luminance; + minColor = m_color[i]; + } + } + + *start = minColor; + *end = maxColor; } /// Get color range based on the bounding box. void ColorBlock::boundsRange(Color32 * start, Color32 * end) const { - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); - Color32 minColor(255, 255, 255); - Color32 maxColor(0, 0, 0); + Color32 minColor(255, 255, 255); + Color32 maxColor(0, 0, 0); - for(uint i = 0; i < 16; i++) - { - if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; } - if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; } - if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; } - if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; } - if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; } - if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; } - } - - // Offset range by 1/16 of the extents - Color32 inset; - inset.r = (maxColor.r - minColor.r) >> 4; - inset.g = (maxColor.g - minColor.g) >> 4; - inset.b = (maxColor.b - minColor.b) >> 4; - - minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255; - minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255; - minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255; - - maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0; - maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0; - maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0; + for(uint i = 0; i < 16; i++) + { + if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; } + if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; } + if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; } + if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; } + if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; } + if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; } + } + + // Offset range by 1/16 of the extents + Color32 inset; + inset.r = (maxColor.r - minColor.r) >> 4; + inset.g = (maxColor.g - minColor.g) >> 4; + inset.b = (maxColor.b - minColor.b) >> 4; + + minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255; + minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255; + minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255; + + maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0; + maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0; + maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0; - *start = minColor; - *end = maxColor; + *start = minColor; + *end = maxColor; } /// Get color range based on the bounding box. void ColorBlock::boundsRangeAlpha(Color32 * start, Color32 * end) const { - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); - Color32 minColor(255, 255, 255, 255); - Color32 maxColor(0, 0, 0, 0); + Color32 minColor(255, 255, 255, 255); + Color32 maxColor(0, 0, 0, 0); - for(uint i = 0; i < 16; i++) - { - if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; } - if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; } - if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; } - if (m_color[i].a < minColor.a) { minColor.a = m_color[i].a; } - if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; } - if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; } - if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; } - if (m_color[i].a > maxColor.a) { maxColor.a = m_color[i].a; } - } - - // Offset range by 1/16 of the extents - Color32 inset; - inset.r = (maxColor.r - minColor.r) >> 4; - inset.g = (maxColor.g - minColor.g) >> 4; - inset.b = (maxColor.b - minColor.b) >> 4; - inset.a = (maxColor.a - minColor.a) >> 4; - - minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255; - minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255; - minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255; - minColor.a = (minColor.a + inset.a <= 255) ? minColor.a + inset.a : 255; - - maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0; - maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0; - maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0; - maxColor.a = (maxColor.a >= inset.a) ? maxColor.a - inset.a : 0; - - *start = minColor; - *end = maxColor; -} + for(uint i = 0; i < 16; i++) + { + if (m_color[i].r < minColor.r) { minColor.r = m_color[i].r; } + if (m_color[i].g < minColor.g) { minColor.g = m_color[i].g; } + if (m_color[i].b < minColor.b) { minColor.b = m_color[i].b; } + if (m_color[i].a < minColor.a) { minColor.a = m_color[i].a; } + if (m_color[i].r > maxColor.r) { maxColor.r = m_color[i].r; } + if (m_color[i].g > maxColor.g) { maxColor.g = m_color[i].g; } + if (m_color[i].b > maxColor.b) { maxColor.b = m_color[i].b; } + if (m_color[i].a > maxColor.a) { maxColor.a = m_color[i].a; } + } + + // Offset range by 1/16 of the extents + Color32 inset; + inset.r = (maxColor.r - minColor.r) >> 4; + inset.g = (maxColor.g - minColor.g) >> 4; + inset.b = (maxColor.b - minColor.b) >> 4; + inset.a = (maxColor.a - minColor.a) >> 4; + + minColor.r = (minColor.r + inset.r <= 255) ? minColor.r + inset.r : 255; + minColor.g = (minColor.g + inset.g <= 255) ? minColor.g + inset.g : 255; + minColor.b = (minColor.b + inset.b <= 255) ? minColor.b + inset.b : 255; + minColor.a = (minColor.a + inset.a <= 255) ? minColor.a + inset.a : 255; + + maxColor.r = (maxColor.r >= inset.r) ? maxColor.r - inset.r : 0; + maxColor.g = (maxColor.g >= inset.g) ? maxColor.g - inset.g : 0; + maxColor.b = (maxColor.b >= inset.b) ? maxColor.b - inset.b : 0; + maxColor.a = (maxColor.a >= inset.a) ? maxColor.a - inset.a : 0; + *start = minColor; + *end = maxColor; +} +#endif -/// Sort colors by abosolute value in their 16 bit representation. +/*/// Sort colors by abosolute value in their 16 bit representation. void ColorBlock::sortColorsByAbsoluteValue() { - // Dummy selection sort. - for( uint a = 0; a < 16; a++ ) { - uint max = a; - Color16 cmax(m_color[a]); - - for( uint b = a+1; b < 16; b++ ) { - Color16 cb(m_color[b]); - - if( cb.u > cmax.u ) { - max = b; - cmax = cb; - } - } - swap( m_color[a], m_color[max] ); - } -} + // Dummy selection sort. + for( uint a = 0; a < 16; a++ ) { + uint max = a; + Color16 cmax(m_color[a]); + + for( uint b = a+1; b < 16; b++ ) { + Color16 cb(m_color[b]); + + if( cb.u > cmax.u ) { + max = b; + cmax = cb; + } + } + swap( m_color[a], m_color[max] ); + } +}*/ -/// Find extreme colors in the given axis. +/*/// Find extreme colors in the given axis. void ColorBlock::computeRange(Vector3::Arg axis, Color32 * start, Color32 * end) const { - nvDebugCheck(start != NULL); - nvDebugCheck(end != NULL); - - int mini, maxi; - mini = maxi = 0; - - float min, max; - min = max = dot(Vector3(m_color[0].r, m_color[0].g, m_color[0].b), axis); - - for(uint i = 1; i < 16; i++) - { - const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b); - - float val = dot(vec, axis); - if( val < min ) { - mini = i; - min = val; - } - else if( val > max ) { - maxi = i; - max = val; - } - } - - *start = m_color[mini]; - *end = m_color[maxi]; -} + nvDebugCheck(start != NULL); + nvDebugCheck(end != NULL); + + int mini, maxi; + mini = maxi = 0; + + float min, max; + min = max = dot(Vector3(m_color[0].r, m_color[0].g, m_color[0].b), axis); + for(uint i = 1; i < 16; i++) + { + const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b); + + float val = dot(vec, axis); + if( val < min ) { + mini = i; + min = val; + } + else if( val > max ) { + maxi = i; + max = val; + } + } + + *start = m_color[mini]; + *end = m_color[maxi]; +}*/ -/// Sort colors in the given axis. + +/*/// Sort colors in the given axis. void ColorBlock::sortColors(const Vector3 & axis) { - float luma_array[16]; - - for(uint i = 0; i < 16; i++) { - const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b); - luma_array[i] = dot(vec, axis); - } - - // Dummy selection sort. - for( uint a = 0; a < 16; a++ ) { - uint min = a; - for( uint b = a+1; b < 16; b++ ) { - if( luma_array[b] < luma_array[min] ) { - min = b; - } - } - swap( luma_array[a], luma_array[min] ); - swap( m_color[a], m_color[min] ); - } -} + float luma_array[16]; + for(uint i = 0; i < 16; i++) { + const Vector3 vec(m_color[i].r, m_color[i].g, m_color[i].b); + luma_array[i] = dot(vec, axis); + } + + // Dummy selection sort. + for( uint a = 0; a < 16; a++ ) { + uint min = a; + for( uint b = a+1; b < 16; b++ ) { + if( luma_array[b] < luma_array[min] ) { + min = b; + } + } + swap( luma_array[a], luma_array[min] ); + swap( m_color[a], m_color[min] ); + } +}*/ -/// Get the volume of the color block. + +/*/// Get the volume of the color block. float ColorBlock::volume() const { - Box bounds; - bounds.clearBounds(); - - for(int i = 0; i < 16; i++) { - const Vector3 point(m_color[i].r, m_color[i].g, m_color[i].b); - bounds.addPointToBounds(point); - } - - return bounds.volume(); + Box bounds; + bounds.clearBounds(); + + for(int i = 0; i < 16; i++) { + const Vector3 point(m_color[i].r, m_color[i].g, m_color[i].b); + bounds.addPointToBounds(point); + } + + return bounds.volume(); +}*/ + +#if 0 +void ColorSet::allocate(uint w, uint h) +{ + nvDebugCheck(w <= 4 && h <= 4); + + this->colorCount = w * h; + this->indexCount = 16; + this->w = 4; + this->h = 4; + + //colors = new Vector4[colorCount]; + //weights = new float[colorCount]; + //indices = new int[indexCount]; +} + +// Allocate 4x4 block and fill with +void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y) +{ + nvDebugCheck(img_x < img_w && img_y < img_h); + + const uint block_w = min(4U, img_w - img_x); + const uint block_h = min(4U, img_h - img_y); + nvDebugCheck(block_w != 0 && block_h != 0); + + allocate(block_w, block_h); + + const float * r = data + img_w * img_h * 0; + const float * g = data + img_w * img_h * 1; + const float * b = data + img_w * img_h * 2; + const float * a = data + img_w * img_h * 3; + + // Set colors. + for (uint y = 0, i = 0; y < block_h; y++) + { + for (uint x = 0; x < block_w; x++, i++) + { + uint idx = x + img_x + (y + img_y) * img_w; + colors[i].x = r[idx]; + colors[i].y = g[idx]; + colors[i].z = b[idx]; + colors[i].w = a[idx]; + } + } + + // Set default indices. + for (uint y = 0, i = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + if (x < block_w && y < block_h) { + indices[y*4+x] = i++; + } + else { + indices[y*4+x] = -1; + } + } + } } +void ColorSet::setColors(const Vector3 colors[16], const float weights[16]) +{ + +} + +void ColorSet::setColors(const Vector4 colors[16], const float weights[16]) +{ + +} + + + +void ColorSet::setAlphaWeights() +{ + for (uint i = 0; i < colorCount; i++) + { + //weights[i] = max(colors[i].w, 0.001f); // Avoid division by zero. + weights[i] = max(colors[i].w, 0.0f); + } +} + +void ColorSet::setUniformWeights() +{ + for (uint i = 0; i < colorCount; i++) + { + weights[i] = 1.0f; + } +} + + +// @@ Handle complex blocks (not 4x4). +void ColorSet::createMinimalSet(bool ignoreTransparent) +{ + nvDebugCheck(indexCount == 16); + nvDebugCheck(colorCount <= 16); + + Vector4 C[16]; + float W[16]; + memcpy(C, colors, sizeof(Vector4)*colorCount); + memcpy(W, weights, sizeof(float)*colorCount); + + uint n = 0; + for (uint i = 0; i < indexCount; i++) + { + if (indices[i] < 0) { + continue; + } + + Vector4 ci = C[indices[i]]; + float wi = W[indices[i]]; + + if (ignoreTransparent && wi == 0) { + indices[i] = -1; + continue; + } + + // Find matching color. + uint j; + for (j = 0; j < n; j++) { + bool colorMatch = equal(colors[j].x, ci.x) && equal(colors[j].y, ci.y) && equal(colors[j].z, ci.z); + //bool alphaMatch = equal(colors[j].w, ci.w); + + if (colorMatch) { + weights[j] += wi; + indices[i] = j; + break; + } + } + + // No match found. Add new color. + if (j == n) { + colors[n] = ci; + weights[n] = wi; + indices[i] = n; + n++; + } + } + //nvDebugCheck(n != 0); // Fully transparent blocks are OK. + + for (uint i = n; i < colorCount; i++) { + colors[i] = Vector4(0); + weights[i] = 0; + } + + colorCount = n; + + // Avoid empty blocks. + if (colorCount == 0) { + colorCount = 1; + indices[0] = 0; + //colors[0] = Vector4(0); + weights[0] = 1; + } +} + + +// Fill blocks that are smaller than (4,4) by wrapping indices. +void ColorSet::wrapIndices() +{ + for (uint y = h; y < 4; y++) + { + uint base = (y % h) * w; + for (uint x = w; x < 4; x++) + { + indices[y*4+3] = indices[base + (x % w)]; + } + } +} + +bool ColorSet::isSingleColor(bool ignoreAlpha) const +{ + Vector4 v = colors[0]; + if (ignoreAlpha) v.w = 1.0f; + + for (uint i = 1; i < colorCount; i++) + { + Vector4 c = colors[i]; + if (ignoreAlpha) c.w = 1.0f; + + if (v != c) { + return false; + } + } + + return true; +} + + +// 0=r, 1=g, 2=b, 3=a, 4=0xFF, 5=0 +static inline float component(Vector4::Arg c, uint i) +{ + if (i == 0) return c.x; + if (i == 1) return c.y; + if (i == 2) return c.z; + if (i == 3) return c.w; + if (i == 4) return 0xFF; + return 0; +} + +void ColorSet::swizzle(uint x, uint y, uint z, uint w) +{ + for (uint i = 0; i < colorCount; i++) + { + Vector4 c = colors[i]; + colors[i].x = component(c, x); + colors[i].y = component(c, y); + colors[i].z = component(c, z); + colors[i].w = component(c, w); + } +} + +bool ColorSet::hasAlpha() const +{ + for (uint i = 0; i < colorCount; i++) + { + if (colors[i].w != 0.0f) return true; + } + return false; +} +#endif // 0 + + +void AlphaBlock4x4::init(uint8 a) +{ + for (int i = 0; i < 16; i++) { + alpha[i] = a; + weights[i] = 1.0f; + } +} + +void AlphaBlock4x4::init(const ColorBlock & src, uint channel) +{ + nvCheck(channel >= 0 && channel < 4); + + // Colors are in BGRA format. + if (channel == 0) channel = 2; + else if (channel == 2) channel = 0; + + for (int i = 0; i < 16; i++) { + alpha[i] = src.color(i).component[channel]; + weights[i] = 1.0f; + } +} + + + + +/*void AlphaBlock4x4::init(const ColorSet & src, uint channel) +{ + nvCheck(channel >= 0 && channel < 4); + + for (int i = 0; i < 16; i++) { + float f = src.color(i).component[channel]; + alpha[i] = unitFloatToFixed8(f); + weights[i] = 1.0f; + } +} + +void AlphaBlock4x4::initMaxRGB(const ColorSet & src, float threshold) +{ + for (int i = 0; i < 16; i++) { + float x = src.color(i).x; + float y = src.color(i).y; + float z = src.color(i).z; + alpha[i] = unitFloatToFixed8(max(max(x, y), max(z, threshold))); + weights[i] = 1.0f; + } +}*/ + +/*void AlphaBlock4x4::initWeights(const ColorSet & src) +{ + for (int i = 0; i < 16; i++) { + weights[i] = src.weight(i); + } +}*/ Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorSpace.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorSpace.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorSpace.h @@ -0,0 +1,22 @@ +// This code is in the public domain -- jim@tilander.org + +#pragma once +#ifndef NV_IMAGE_COLORSPACE_H +#define NV_IMAGE_COLORSPACE_H + +namespace nv +{ + class Image; + + // Defines simple mappings between different color spaces and encodes them in the + // input image. + namespace ColorSpace + { + void RGBtoYCoCg_R(Image* img); + void YCoCg_RtoRGB(Image* img); + } +} + + + +#endif Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorSpace.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorSpace.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ColorSpace.cpp @@ -0,0 +1,69 @@ +// This code is in the public domain -- jim@tilander.org + +#include "ColorSpace.h" + +#include "nvimage/Image.h" +#include "nvmath/Color.h" + + +namespace nv +{ + void ColorSpace::RGBtoYCoCg_R(Image* img) + { + const uint w = img->width(); + const uint h = img->height(); + + for( uint y=0; y < h; y++ ) + { + for( uint x=0; x < w; x++ ) + { + Color32 pixel = img->pixel(x, y); + + const int r = pixel.r; + const int g = pixel.g; + const int b = pixel.b; + + const int Co = r - b; + const int t = b + Co/2; + const int Cg = g - t; + const int Y = t + Cg/2; + + // Just saturate the chroma here (we loose out of one bit in each channel) + // this just means that we won't have as high dynamic range. Perhaps a better option + // is to loose the least significant bit instead? + pixel.r = clamp(Co + 128, 0, 255); + pixel.g = clamp(Cg + 128, 0, 255); + pixel.b = 0; + pixel.a = Y; + } + } + } + + void ColorSpace::YCoCg_RtoRGB(Image* img) + { + const uint w = img->width(); + const uint h = img->height(); + + for( uint y=0; y < h; y++ ) + { + for( uint x=0; x < w; x++ ) + { + Color32 pixel = img->pixel(x, y); + + const int Co = (int)pixel.r - 128; + const int Cg = (int)pixel.g - 128; + const int Y = pixel.a; + + const int t = Y - Cg/2; + const int g = Cg + t; + const int b = t - Co/2; + const int r = b + Co; + + pixel.r = r; + pixel.g = g; + pixel.b = b; + pixel.a = 1; + } + } + } +} Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ConeMap.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ConeMap.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ConeMap.h @@ -1,39 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_IMAGE_CONEMAP_H -#define NV_IMAGE_CONEMAP_H - -#include -#include - -namespace nv -{ - class Image; - class FloatImage; - - FloatImage * createConeMap(const Image * img, Vector4::Arg heightWeights); - -} // nv namespace - -#endif // NV_IMAGE_CONEMAP_H Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ConeMap.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ConeMap.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ConeMap.cpp @@ -1,122 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include - -#include - -#include -#include -#include -#include - -using namespace nv; - - -static float processPixel(const FloatImage * img, uint x, uint y) -{ - nvDebugCheck(img != NULL); - - const uint w = img->width(); - const uint h = img->height(); - - float d = img->pixel(x, y, 0); - - float fx0 = (float) x / w; - float fy0 = (float) y / h; - - float best_ratio = INF; - uint best_x = w; - uint best_y = h; - - for (uint yy = 0; yy < h; yy++) - { - for (uint xx = 0; xx < w; xx++) - { - float ch = d - img->pixel(xx, yy, 0); - - if (ch > 0) - { - float dx = float(xx - x); - float dy = float(yy - y); - - float ratio = (dx * dx + dy * dy) / ch; - - if (ratio < best_ratio) - { - best_x = xx; - best_y = yy; - } - } - } - } - - if (best_x != w) - { - nvDebugCheck(best_y !=h); - - float dx = float(best_x - x) / w; - float dy = float(best_y - y) / h; - - float cw = sqrtf(dx*dx + dy*dy); - float ch = d - img->pixel(xx, yy, 0); - - return min(1, sqrtf(cw / ch)); - } - - return 1; -} - - -// Create cone map using the given kernels. -FloatImage * createConeMap(const Image * img, Vector4::Arg heightWeights) -{ - nvCheck(img != NULL); - - const uint w = img->width(); - const uint h = img->height(); - - AutoPtr fimage(new FloatImage()); - //fimage->allocate(2, w, h); - fimage->allocate(4, w, h); - - // Compute height and store in red channel: - float * heightChannel = fimage->channel(0); - for(uint i = 0; i < w*h; i++) - { - Vector4 color = toVector4(img->pixel(i)); - heightChannel[i] = dot(color, heightWeights); - } - - // Compute cones: - for(uint y = 0; y < h; y++) - { - for(uint x = 0; x < w; x++) - { - processPixel(fimage, x, y); - } - } - - return fimage.release(); -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/DirectDrawSurface.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/DirectDrawSurface.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/DirectDrawSurface.h @@ -21,134 +21,410 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. +#pragma once #ifndef NV_IMAGE_DIRECTDRAWSURFACE_H #define NV_IMAGE_DIRECTDRAWSURFACE_H -#include +#include "nvimage.h" + +#if !defined(MAKEFOURCC) +#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ + (uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \ + (uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 )) +#endif namespace nv { - class Image; - class Stream; - struct ColorBlock; - - struct NVIMAGE_CLASS DDSPixelFormat - { - uint size; - uint flags; - uint fourcc; - uint bitcount; - uint rmask; - uint gmask; - uint bmask; - uint amask; - }; - - struct NVIMAGE_CLASS DDSCaps - { - uint caps1; - uint caps2; - uint caps3; - uint caps4; - }; - - /// DDS file header for DX10. - struct NVIMAGE_CLASS DDSHeader10 - { - uint dxgiFormat; - uint resourceDimension; - uint miscFlag; - uint arraySize; - uint reserved; - }; - - /// DDS file header. - struct NVIMAGE_CLASS DDSHeader - { - uint fourcc; - uint size; - uint flags; - uint height; - uint width; - uint pitch; - uint depth; - uint mipmapcount; - uint reserved[11]; - DDSPixelFormat pf; - DDSCaps caps; - uint notused; - DDSHeader10 header10; - - - // Helper methods. - DDSHeader(); - - void setWidth(uint w); - void setHeight(uint h); - void setDepth(uint d); - void setMipmapCount(uint count); - void setTexture2D(); - void setTexture3D(); - void setTextureCube(); - void setLinearSize(uint size); - void setPitch(uint pitch); - void setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3); - void setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); - void setDX10Format(uint format); - void setNormalFlag(bool b); - - void swapBytes(); - - bool hasDX10Header() const; - }; - - NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); - - - /// DirectDraw Surface. (DDS) - class NVIMAGE_CLASS DirectDrawSurface - { - public: - DirectDrawSurface(const char * file); - ~DirectDrawSurface(); - - bool isValid() const; - bool isSupported() const; - - uint mipmapCount() const; - uint width() const; - uint height() const; - uint depth() const; - bool isTexture1D() const; - bool isTexture2D() const; - bool isTexture3D() const; - bool isTextureCube() const; - - void setNormalFlag(bool b); - - void mipmap(Image * img, uint f, uint m); - // void mipmap(FloatImage * img, uint f, uint m); - - void printInfo() const; - - private: - - uint blockSize() const; - uint faceSize() const; - uint mipmapSize(uint m) const; - - uint offset(uint f, uint m); - - void readLinearImage(Image * img); - void readBlockImage(Image * img); - void readBlock(ColorBlock * rgba); - - - private: - Stream * const stream; - DDSHeader header; - DDSHeader10 header10; - }; + class Image; + class Stream; + struct ColorBlock; + + enum DDPF + { + DDPF_ALPHAPIXELS = 0x00000001U, + DDPF_ALPHA = 0x00000002U, + DDPF_FOURCC = 0x00000004U, + DDPF_RGB = 0x00000040U, + DDPF_PALETTEINDEXED1 = 0x00000800U, + DDPF_PALETTEINDEXED2 = 0x00001000U, + DDPF_PALETTEINDEXED4 = 0x00000008U, + DDPF_PALETTEINDEXED8 = 0x00000020U, + DDPF_LUMINANCE = 0x00020000U, + DDPF_ALPHAPREMULT = 0x00008000U, + + // Custom NVTT flags. + DDPF_NORMAL = 0x80000000U, + DDPF_SRGB = 0x40000000U, + }; + + + enum D3DFORMAT + { + // 32 bit RGB formats. + D3DFMT_R8G8B8 = 20, + D3DFMT_A8R8G8B8 = 21, + D3DFMT_X8R8G8B8 = 22, + D3DFMT_R5G6B5 = 23, + D3DFMT_X1R5G5B5 = 24, + D3DFMT_A1R5G5B5 = 25, + D3DFMT_A4R4G4B4 = 26, + D3DFMT_R3G3B2 = 27, + D3DFMT_A8 = 28, + D3DFMT_A8R3G3B2 = 29, + D3DFMT_X4R4G4B4 = 30, + D3DFMT_A2B10G10R10 = 31, + D3DFMT_A8B8G8R8 = 32, + D3DFMT_X8B8G8R8 = 33, + D3DFMT_G16R16 = 34, + D3DFMT_A2R10G10B10 = 35, + + D3DFMT_A16B16G16R16 = 36, + + // Palette formats. + D3DFMT_A8P8 = 40, + D3DFMT_P8 = 41, + + // Luminance formats. + D3DFMT_L8 = 50, + D3DFMT_A8L8 = 51, + D3DFMT_A4L4 = 52, + D3DFMT_L16 = 81, + + // Floating point formats + D3DFMT_R16F = 111, + D3DFMT_G16R16F = 112, + D3DFMT_A16B16G16R16F = 113, + D3DFMT_R32F = 114, + D3DFMT_G32R32F = 115, + D3DFMT_A32B32G32R32F = 116, + }; + + enum FOURCC + { + FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'), + FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '), + FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'), + FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'), + FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'), + FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'), + FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'), + FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'), + FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'), + FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'), + FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'), + FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'), + FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'), + }; + + + // D3D1x resource dimensions. + enum DDS_DIMENSION // D3D10_RESOURCE_DIMENSION + { + DDS_DIMENSION_UNKNOWN = 0, + DDS_DIMENSION_BUFFER = 1, + DDS_DIMENSION_TEXTURE1D = 2, + DDS_DIMENSION_TEXTURE2D = 3, + DDS_DIMENSION_TEXTURE3D = 4, + }; + + enum DDS_MISC_FLAG + { + DDS_MISC_TEXTURECUBE = 0x4, + }; + + // DXGI formats. + enum DXGI_FORMAT + { + DXGI_FORMAT_UNKNOWN = 0, + + DXGI_FORMAT_R32G32B32A32_TYPELESS = 1, + DXGI_FORMAT_R32G32B32A32_FLOAT = 2, + DXGI_FORMAT_R32G32B32A32_UINT = 3, + DXGI_FORMAT_R32G32B32A32_SINT = 4, + + DXGI_FORMAT_R32G32B32_TYPELESS = 5, + DXGI_FORMAT_R32G32B32_FLOAT = 6, + DXGI_FORMAT_R32G32B32_UINT = 7, + DXGI_FORMAT_R32G32B32_SINT = 8, + + DXGI_FORMAT_R16G16B16A16_TYPELESS = 9, + DXGI_FORMAT_R16G16B16A16_FLOAT = 10, + DXGI_FORMAT_R16G16B16A16_UNORM = 11, + DXGI_FORMAT_R16G16B16A16_UINT = 12, + DXGI_FORMAT_R16G16B16A16_SNORM = 13, + DXGI_FORMAT_R16G16B16A16_SINT = 14, + + DXGI_FORMAT_R32G32_TYPELESS = 15, + DXGI_FORMAT_R32G32_FLOAT = 16, + DXGI_FORMAT_R32G32_UINT = 17, + DXGI_FORMAT_R32G32_SINT = 18, + + DXGI_FORMAT_R32G8X24_TYPELESS = 19, + DXGI_FORMAT_D32_FLOAT_S8X24_UINT = 20, + DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS = 21, + DXGI_FORMAT_X32_TYPELESS_G8X24_UINT = 22, + + DXGI_FORMAT_R10G10B10A2_TYPELESS = 23, + DXGI_FORMAT_R10G10B10A2_UNORM = 24, + DXGI_FORMAT_R10G10B10A2_UINT = 25, + + DXGI_FORMAT_R11G11B10_FLOAT = 26, + + DXGI_FORMAT_R8G8B8A8_TYPELESS = 27, + DXGI_FORMAT_R8G8B8A8_UNORM = 28, + DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29, + DXGI_FORMAT_R8G8B8A8_UINT = 30, + DXGI_FORMAT_R8G8B8A8_SNORM = 31, + DXGI_FORMAT_R8G8B8A8_SINT = 32, + + DXGI_FORMAT_R16G16_TYPELESS = 33, + DXGI_FORMAT_R16G16_FLOAT = 34, + DXGI_FORMAT_R16G16_UNORM = 35, + DXGI_FORMAT_R16G16_UINT = 36, + DXGI_FORMAT_R16G16_SNORM = 37, + DXGI_FORMAT_R16G16_SINT = 38, + + DXGI_FORMAT_R32_TYPELESS = 39, + DXGI_FORMAT_D32_FLOAT = 40, + DXGI_FORMAT_R32_FLOAT = 41, + DXGI_FORMAT_R32_UINT = 42, + DXGI_FORMAT_R32_SINT = 43, + + DXGI_FORMAT_R24G8_TYPELESS = 44, + DXGI_FORMAT_D24_UNORM_S8_UINT = 45, + DXGI_FORMAT_R24_UNORM_X8_TYPELESS = 46, + DXGI_FORMAT_X24_TYPELESS_G8_UINT = 47, + + DXGI_FORMAT_R8G8_TYPELESS = 48, + DXGI_FORMAT_R8G8_UNORM = 49, + DXGI_FORMAT_R8G8_UINT = 50, + DXGI_FORMAT_R8G8_SNORM = 51, + DXGI_FORMAT_R8G8_SINT = 52, + + DXGI_FORMAT_R16_TYPELESS = 53, + DXGI_FORMAT_R16_FLOAT = 54, + DXGI_FORMAT_D16_UNORM = 55, + DXGI_FORMAT_R16_UNORM = 56, + DXGI_FORMAT_R16_UINT = 57, + DXGI_FORMAT_R16_SNORM = 58, + DXGI_FORMAT_R16_SINT = 59, + + DXGI_FORMAT_R8_TYPELESS = 60, + DXGI_FORMAT_R8_UNORM = 61, + DXGI_FORMAT_R8_UINT = 62, + DXGI_FORMAT_R8_SNORM = 63, + DXGI_FORMAT_R8_SINT = 64, + DXGI_FORMAT_A8_UNORM = 65, + + DXGI_FORMAT_R1_UNORM = 66, + + DXGI_FORMAT_R9G9B9E5_SHAREDEXP = 67, + + DXGI_FORMAT_R8G8_B8G8_UNORM = 68, + DXGI_FORMAT_G8R8_G8B8_UNORM = 69, + + DXGI_FORMAT_BC1_TYPELESS = 70, + DXGI_FORMAT_BC1_UNORM = 71, + DXGI_FORMAT_BC1_UNORM_SRGB = 72, + + DXGI_FORMAT_BC2_TYPELESS = 73, + DXGI_FORMAT_BC2_UNORM = 74, + DXGI_FORMAT_BC2_UNORM_SRGB = 75, + + DXGI_FORMAT_BC3_TYPELESS = 76, + DXGI_FORMAT_BC3_UNORM = 77, + DXGI_FORMAT_BC3_UNORM_SRGB = 78, + + DXGI_FORMAT_BC4_TYPELESS = 79, + DXGI_FORMAT_BC4_UNORM = 80, + DXGI_FORMAT_BC4_SNORM = 81, + + DXGI_FORMAT_BC5_TYPELESS = 82, + DXGI_FORMAT_BC5_UNORM = 83, + DXGI_FORMAT_BC5_SNORM = 84, + + DXGI_FORMAT_B5G6R5_UNORM = 85, + DXGI_FORMAT_B5G5R5A1_UNORM = 86, + DXGI_FORMAT_B8G8R8A8_UNORM = 87, + DXGI_FORMAT_B8G8R8X8_UNORM = 88, + + DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM = 89, + DXGI_FORMAT_B8G8R8A8_TYPELESS = 90, + DXGI_FORMAT_B8G8R8A8_UNORM_SRGB = 91, + DXGI_FORMAT_B8G8R8X8_TYPELESS = 92, + DXGI_FORMAT_B8G8R8X8_UNORM_SRGB = 93, + + DXGI_FORMAT_BC6H_TYPELESS = 94, + DXGI_FORMAT_BC6H_UF16 = 95, + DXGI_FORMAT_BC6H_SF16 = 96, + + DXGI_FORMAT_BC7_TYPELESS = 97, + DXGI_FORMAT_BC7_UNORM = 98, + DXGI_FORMAT_BC7_UNORM_SRGB = 99, + }; + + NVIMAGE_API extern uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + + NVIMAGE_API extern uint findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + + struct RGBAPixelFormat + { + uint bitcount; + uint rmask; + uint gmask; + uint bmask; + uint amask; + }; + + extern const RGBAPixelFormat *findDXGIPixelFormat(uint dxgiFormat); + + struct NVIMAGE_CLASS DDSPixelFormat + { + uint size; + uint flags; + uint fourcc; + uint bitcount; + uint rmask; + uint gmask; + uint bmask; + uint amask; + }; + + struct NVIMAGE_CLASS DDSCaps + { + uint caps1; + uint caps2; + uint caps3; + uint caps4; + }; + + /// DDS file header for DX10. + struct NVIMAGE_CLASS DDSHeader10 + { + uint dxgiFormat; + uint resourceDimension; + uint miscFlag; + uint arraySize; + uint reserved; + }; + + /// DDS file header. + struct NVIMAGE_CLASS DDSHeader + { + uint fourcc; + uint size; + uint flags; + uint height; + uint width; + uint pitch; + uint depth; + uint mipmapcount; + uint reserved[11]; + DDSPixelFormat pf; + DDSCaps caps; + uint notused; + DDSHeader10 header10; + + + // Helper methods. + DDSHeader(); + + void setWidth(uint w); + void setHeight(uint h); + void setDepth(uint d); + void setMipmapCount(uint count); + void setTexture2D(); + void setTexture3D(); + void setTextureCube(); + void setTextureArray(int imageCount); + void setLinearSize(uint size); + void setPitch(uint pitch); + void setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3); + void setFormatCode(uint code); + void setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3); + void setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + void setDX10Format(uint format); + void setNormalFlag(bool b); + void setSrgbFlag(bool b); + void setHasAlphaFlag(bool b); + void setUserVersion(int version); + + void swapBytes(); + + bool hasDX10Header() const; + uint signature() const; + uint toolVersion() const; + uint userVersion() const; + bool isNormalMap() const; + bool isSrgb() const; + bool hasAlpha() const; + uint d3d9Format() const; + uint pixelSize() const; // In bits! + uint blockSize() const; // In bytes! + bool isBlockFormat() const; + }; + + NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); + + + /// DirectDraw Surface. (DDS) + class NVIMAGE_CLASS DirectDrawSurface + { + public: + DirectDrawSurface(); + DirectDrawSurface(const char * file); + DirectDrawSurface(Stream * stream); + ~DirectDrawSurface(); + + bool load(const char * filename); + bool load(Stream * stream); + + bool isValid() const; + bool isSupported() const; + + bool hasAlpha() const; + + uint mipmapCount() const; + uint width() const; + uint height() const; + uint depth() const; + uint arraySize() const; + bool isTexture1D() const; + bool isTexture2D() const; + bool isTexture3D() const; + bool isTextureCube() const; + bool isTextureArray() const; + + void setNormalFlag(bool b); + void setHasAlphaFlag(bool b); + void setUserVersion(int version); + + void mipmap(Image * img, uint f, uint m); + + uint surfaceWidth(uint mipmap) const; + uint surfaceHeight(uint mipmap) const; + uint surfaceDepth(uint mipmap) const; + uint surfaceSize(uint mipmap) const; + bool readSurface(uint face, uint mipmap, void * data, uint size); + + void printInfo() const; + + // Only initialized after loading. + DDSHeader header; + + private: + + uint faceSize() const; + uint offset(uint face, uint mipmap); + + void readLinearImage(Image * img, uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + void readBlockImage(Image * img); + void readBlock(ColorBlock * rgba); + + + private: + Stream * stream; + }; } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/DirectDrawSurface.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/DirectDrawSurface.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/DirectDrawSurface.cpp @@ -21,1301 +21,1670 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include -#include // max -#include - -#include -#include -#include -#include -#include +#include "DirectDrawSurface.h" +#include "ColorBlock.h" +#include "Image.h" +#include "BlockDXT.h" +#include "PixelFormat.h" + +#include "nvcore/Debug.h" +#include "nvcore/Utils.h" // max +#include "nvcore/StdStream.h" +#include "nvmath/Vector.inl" +#include "nvmath/ftoi.h" #include // memset using namespace nv; -#if !defined(MAKEFOURCC) -# define MAKEFOURCC(ch0, ch1, ch2, ch3) \ - (uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \ - (uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 )) -#endif - namespace { - static const uint FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '); - static const uint FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'); - static const uint FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'); - static const uint FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'); - static const uint FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'); - static const uint FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'); - static const uint FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'); - static const uint FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'); - static const uint FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'); - - static const uint FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'); - - static const uint FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'); - - // 32 bit RGB formats. - static const uint D3DFMT_R8G8B8 = 20; - static const uint D3DFMT_A8R8G8B8 = 21; - static const uint D3DFMT_X8R8G8B8 = 22; - static const uint D3DFMT_R5G6B5 = 23; - static const uint D3DFMT_X1R5G5B5 = 24; - static const uint D3DFMT_A1R5G5B5 = 25; - static const uint D3DFMT_A4R4G4B4 = 26; - static const uint D3DFMT_R3G3B2 = 27; - static const uint D3DFMT_A8 = 28; - static const uint D3DFMT_A8R3G3B2 = 29; - static const uint D3DFMT_X4R4G4B4 = 30; - static const uint D3DFMT_A2B10G10R10 = 31; - static const uint D3DFMT_A8B8G8R8 = 32; - static const uint D3DFMT_X8B8G8R8 = 33; - static const uint D3DFMT_G16R16 = 34; - static const uint D3DFMT_A2R10G10B10 = 35; - - static const uint D3DFMT_A16B16G16R16 = 36; - - // Palette formats. - static const uint D3DFMT_A8P8 = 40; - static const uint D3DFMT_P8 = 41; - - // Luminance formats. - static const uint D3DFMT_L8 = 50; - static const uint D3DFMT_A8L8 = 51; - static const uint D3DFMT_A4L4 = 52; - static const uint D3DFMT_L16 = 81; - - // Floating point formats - static const uint D3DFMT_R16F = 111; - static const uint D3DFMT_G16R16F = 112; - static const uint D3DFMT_A16B16G16R16F = 113; - static const uint D3DFMT_R32F = 114; - static const uint D3DFMT_G32R32F = 115; - static const uint D3DFMT_A32B32G32R32F = 116; - - static const uint DDSD_CAPS = 0x00000001U; - static const uint DDSD_PIXELFORMAT = 0x00001000U; - static const uint DDSD_WIDTH = 0x00000004U; - static const uint DDSD_HEIGHT = 0x00000002U; - static const uint DDSD_PITCH = 0x00000008U; - static const uint DDSD_MIPMAPCOUNT = 0x00020000U; - static const uint DDSD_LINEARSIZE = 0x00080000U; - static const uint DDSD_DEPTH = 0x00800000U; - - static const uint DDSCAPS_COMPLEX = 0x00000008U; - static const uint DDSCAPS_TEXTURE = 0x00001000U; - static const uint DDSCAPS_MIPMAP = 0x00400000U; - static const uint DDSCAPS2_VOLUME = 0x00200000U; - static const uint DDSCAPS2_CUBEMAP = 0x00000200U; - - static const uint DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400U; - static const uint DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800U; - static const uint DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000U; - static const uint DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000U; - static const uint DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000U; - static const uint DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000U; - static const uint DDSCAPS2_CUBEMAP_ALL_FACES = 0x0000FC00U; - - static const uint DDPF_ALPHAPIXELS = 0x00000001U; - static const uint DDPF_ALPHA = 0x00000002U; - static const uint DDPF_FOURCC = 0x00000004U; - static const uint DDPF_RGB = 0x00000040U; - static const uint DDPF_PALETTEINDEXED1 = 0x00000800U; - static const uint DDPF_PALETTEINDEXED2 = 0x00001000U; - static const uint DDPF_PALETTEINDEXED4 = 0x00000008U; - static const uint DDPF_PALETTEINDEXED8 = 0x00000020U; - static const uint DDPF_LUMINANCE = 0x00020000U; - static const uint DDPF_ALPHAPREMULT = 0x00008000U; - static const uint DDPF_NORMAL = 0x80000000U; // @@ Custom nv flag. - - // DX10 formats. - enum DXGI_FORMAT - { - DXGI_FORMAT_UNKNOWN = 0, - - DXGI_FORMAT_R32G32B32A32_TYPELESS = 1, - DXGI_FORMAT_R32G32B32A32_FLOAT = 2, - DXGI_FORMAT_R32G32B32A32_UINT = 3, - DXGI_FORMAT_R32G32B32A32_SINT = 4, - - DXGI_FORMAT_R32G32B32_TYPELESS = 5, - DXGI_FORMAT_R32G32B32_FLOAT = 6, - DXGI_FORMAT_R32G32B32_UINT = 7, - DXGI_FORMAT_R32G32B32_SINT = 8, - - DXGI_FORMAT_R16G16B16A16_TYPELESS = 9, - DXGI_FORMAT_R16G16B16A16_FLOAT = 10, - DXGI_FORMAT_R16G16B16A16_UNORM = 11, - DXGI_FORMAT_R16G16B16A16_UINT = 12, - DXGI_FORMAT_R16G16B16A16_SNORM = 13, - DXGI_FORMAT_R16G16B16A16_SINT = 14, - - DXGI_FORMAT_R32G32_TYPELESS = 15, - DXGI_FORMAT_R32G32_FLOAT = 16, - DXGI_FORMAT_R32G32_UINT = 17, - DXGI_FORMAT_R32G32_SINT = 18, - - DXGI_FORMAT_R32G8X24_TYPELESS = 19, - DXGI_FORMAT_D32_FLOAT_S8X24_UINT = 20, - DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS = 21, - DXGI_FORMAT_X32_TYPELESS_G8X24_UINT = 22, - - DXGI_FORMAT_R10G10B10A2_TYPELESS = 23, - DXGI_FORMAT_R10G10B10A2_UNORM = 24, - DXGI_FORMAT_R10G10B10A2_UINT = 25, - - DXGI_FORMAT_R11G11B10_FLOAT = 26, - - DXGI_FORMAT_R8G8B8A8_TYPELESS = 27, - DXGI_FORMAT_R8G8B8A8_UNORM = 28, - DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29, - DXGI_FORMAT_R8G8B8A8_UINT = 30, - DXGI_FORMAT_R8G8B8A8_SNORM = 31, - DXGI_FORMAT_R8G8B8A8_SINT = 32, - - DXGI_FORMAT_R16G16_TYPELESS = 33, - DXGI_FORMAT_R16G16_FLOAT = 34, - DXGI_FORMAT_R16G16_UNORM = 35, - DXGI_FORMAT_R16G16_UINT = 36, - DXGI_FORMAT_R16G16_SNORM = 37, - DXGI_FORMAT_R16G16_SINT = 38, - - DXGI_FORMAT_R32_TYPELESS = 39, - DXGI_FORMAT_D32_FLOAT = 40, - DXGI_FORMAT_R32_FLOAT = 41, - DXGI_FORMAT_R32_UINT = 42, - DXGI_FORMAT_R32_SINT = 43, - - DXGI_FORMAT_R24G8_TYPELESS = 44, - DXGI_FORMAT_D24_UNORM_S8_UINT = 45, - DXGI_FORMAT_R24_UNORM_X8_TYPELESS = 46, - DXGI_FORMAT_X24_TYPELESS_G8_UINT = 47, - - DXGI_FORMAT_R8G8_TYPELESS = 48, - DXGI_FORMAT_R8G8_UNORM = 49, - DXGI_FORMAT_R8G8_UINT = 50, - DXGI_FORMAT_R8G8_SNORM = 51, - DXGI_FORMAT_R8G8_SINT = 52, - - DXGI_FORMAT_R16_TYPELESS = 53, - DXGI_FORMAT_R16_FLOAT = 54, - DXGI_FORMAT_D16_UNORM = 55, - DXGI_FORMAT_R16_UNORM = 56, - DXGI_FORMAT_R16_UINT = 57, - DXGI_FORMAT_R16_SNORM = 58, - DXGI_FORMAT_R16_SINT = 59, - - DXGI_FORMAT_R8_TYPELESS = 60, - DXGI_FORMAT_R8_UNORM = 61, - DXGI_FORMAT_R8_UINT = 62, - DXGI_FORMAT_R8_SNORM = 63, - DXGI_FORMAT_R8_SINT = 64, - DXGI_FORMAT_A8_UNORM = 65, - - DXGI_FORMAT_R1_UNORM = 66, - - DXGI_FORMAT_R9G9B9E5_SHAREDEXP = 67, - - DXGI_FORMAT_R8G8_B8G8_UNORM = 68, - DXGI_FORMAT_G8R8_G8B8_UNORM = 69, - - DXGI_FORMAT_BC1_TYPELESS = 70, - DXGI_FORMAT_BC1_UNORM = 71, - DXGI_FORMAT_BC1_UNORM_SRGB = 72, - - DXGI_FORMAT_BC2_TYPELESS = 73, - DXGI_FORMAT_BC2_UNORM = 74, - DXGI_FORMAT_BC2_UNORM_SRGB = 75, - - DXGI_FORMAT_BC3_TYPELESS = 76, - DXGI_FORMAT_BC3_UNORM = 77, - DXGI_FORMAT_BC3_UNORM_SRGB = 78, - - DXGI_FORMAT_BC4_TYPELESS = 79, - DXGI_FORMAT_BC4_UNORM = 80, - DXGI_FORMAT_BC4_SNORM = 81, - - DXGI_FORMAT_BC5_TYPELESS = 82, - DXGI_FORMAT_BC5_UNORM = 83, - DXGI_FORMAT_BC5_SNORM = 84, - - DXGI_FORMAT_B5G6R5_UNORM = 85, - DXGI_FORMAT_B5G5R5A1_UNORM = 86, - DXGI_FORMAT_B8G8R8A8_UNORM = 87, - DXGI_FORMAT_B8G8R8X8_UNORM = 88, - }; - - enum D3D10_RESOURCE_DIMENSION - { - D3D10_RESOURCE_DIMENSION_UNKNOWN = 0, - D3D10_RESOURCE_DIMENSION_BUFFER = 1, - D3D10_RESOURCE_DIMENSION_TEXTURE1D = 2, - D3D10_RESOURCE_DIMENSION_TEXTURE2D = 3, - D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4, - }; + + static const uint DDSD_CAPS = 0x00000001U; + static const uint DDSD_PIXELFORMAT = 0x00001000U; + static const uint DDSD_WIDTH = 0x00000004U; + static const uint DDSD_HEIGHT = 0x00000002U; + static const uint DDSD_PITCH = 0x00000008U; + static const uint DDSD_MIPMAPCOUNT = 0x00020000U; + static const uint DDSD_LINEARSIZE = 0x00080000U; + static const uint DDSD_DEPTH = 0x00800000U; + + static const uint DDSCAPS_COMPLEX = 0x00000008U; + static const uint DDSCAPS_TEXTURE = 0x00001000U; + static const uint DDSCAPS_MIPMAP = 0x00400000U; + static const uint DDSCAPS2_VOLUME = 0x00200000U; + static const uint DDSCAPS2_CUBEMAP = 0x00000200U; + + static const uint DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400U; + static const uint DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800U; + static const uint DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000U; + static const uint DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000U; + static const uint DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000U; + static const uint DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000U; + static const uint DDSCAPS2_CUBEMAP_ALL_FACES = 0x0000FC00U; - const char * getDxgiFormatString(DXGI_FORMAT dxgiFormat) - { + const char * getDxgiFormatString(DXGI_FORMAT dxgiFormat) + { #define CASE(format) case DXGI_FORMAT_##format: return #format - switch(dxgiFormat) - { - CASE(UNKNOWN); - - CASE(R32G32B32A32_TYPELESS); - CASE(R32G32B32A32_FLOAT); - CASE(R32G32B32A32_UINT); - CASE(R32G32B32A32_SINT); - - CASE(R32G32B32_TYPELESS); - CASE(R32G32B32_FLOAT); - CASE(R32G32B32_UINT); - CASE(R32G32B32_SINT); - - CASE(R16G16B16A16_TYPELESS); - CASE(R16G16B16A16_FLOAT); - CASE(R16G16B16A16_UNORM); - CASE(R16G16B16A16_UINT); - CASE(R16G16B16A16_SNORM); - CASE(R16G16B16A16_SINT); - - CASE(R32G32_TYPELESS); - CASE(R32G32_FLOAT); - CASE(R32G32_UINT); - CASE(R32G32_SINT); - - CASE(R32G8X24_TYPELESS); - CASE(D32_FLOAT_S8X24_UINT); - CASE(R32_FLOAT_X8X24_TYPELESS); - CASE(X32_TYPELESS_G8X24_UINT); - - CASE(R10G10B10A2_TYPELESS); - CASE(R10G10B10A2_UNORM); - CASE(R10G10B10A2_UINT); - - CASE(R11G11B10_FLOAT); - - CASE(R8G8B8A8_TYPELESS); - CASE(R8G8B8A8_UNORM); - CASE(R8G8B8A8_UNORM_SRGB); - CASE(R8G8B8A8_UINT); - CASE(R8G8B8A8_SNORM); - CASE(R8G8B8A8_SINT); - - CASE(R16G16_TYPELESS); - CASE(R16G16_FLOAT); - CASE(R16G16_UNORM); - CASE(R16G16_UINT); - CASE(R16G16_SNORM); - CASE(R16G16_SINT); - - CASE(R32_TYPELESS); - CASE(D32_FLOAT); - CASE(R32_FLOAT); - CASE(R32_UINT); - CASE(R32_SINT); - - CASE(R24G8_TYPELESS); - CASE(D24_UNORM_S8_UINT); - CASE(R24_UNORM_X8_TYPELESS); - CASE(X24_TYPELESS_G8_UINT); - - CASE(R8G8_TYPELESS); - CASE(R8G8_UNORM); - CASE(R8G8_UINT); - CASE(R8G8_SNORM); - CASE(R8G8_SINT); - - CASE(R16_TYPELESS); - CASE(R16_FLOAT); - CASE(D16_UNORM); - CASE(R16_UNORM); - CASE(R16_UINT); - CASE(R16_SNORM); - CASE(R16_SINT); - - CASE(R8_TYPELESS); - CASE(R8_UNORM); - CASE(R8_UINT); - CASE(R8_SNORM); - CASE(R8_SINT); - CASE(A8_UNORM); - - CASE(R1_UNORM); - - CASE(R9G9B9E5_SHAREDEXP); - - CASE(R8G8_B8G8_UNORM); - CASE(G8R8_G8B8_UNORM); - - CASE(BC1_TYPELESS); - CASE(BC1_UNORM); - CASE(BC1_UNORM_SRGB); - - CASE(BC2_TYPELESS); - CASE(BC2_UNORM); - CASE(BC2_UNORM_SRGB); - - CASE(BC3_TYPELESS); - CASE(BC3_UNORM); - CASE(BC3_UNORM_SRGB); - - CASE(BC4_TYPELESS); - CASE(BC4_UNORM); - CASE(BC4_SNORM); - - CASE(BC5_TYPELESS); - CASE(BC5_UNORM); - CASE(BC5_SNORM); - - CASE(B5G6R5_UNORM); - CASE(B5G5R5A1_UNORM); - CASE(B8G8R8A8_UNORM); - CASE(B8G8R8X8_UNORM); - - default: - return "UNKNOWN"; - } + switch(dxgiFormat) + { + CASE(UNKNOWN); + + CASE(R32G32B32A32_TYPELESS); + CASE(R32G32B32A32_FLOAT); + CASE(R32G32B32A32_UINT); + CASE(R32G32B32A32_SINT); + + CASE(R32G32B32_TYPELESS); + CASE(R32G32B32_FLOAT); + CASE(R32G32B32_UINT); + CASE(R32G32B32_SINT); + + CASE(R16G16B16A16_TYPELESS); + CASE(R16G16B16A16_FLOAT); + CASE(R16G16B16A16_UNORM); + CASE(R16G16B16A16_UINT); + CASE(R16G16B16A16_SNORM); + CASE(R16G16B16A16_SINT); + + CASE(R32G32_TYPELESS); + CASE(R32G32_FLOAT); + CASE(R32G32_UINT); + CASE(R32G32_SINT); + + CASE(R32G8X24_TYPELESS); + CASE(D32_FLOAT_S8X24_UINT); + CASE(R32_FLOAT_X8X24_TYPELESS); + CASE(X32_TYPELESS_G8X24_UINT); + + CASE(R10G10B10A2_TYPELESS); + CASE(R10G10B10A2_UNORM); + CASE(R10G10B10A2_UINT); + + CASE(R11G11B10_FLOAT); + + CASE(R8G8B8A8_TYPELESS); + CASE(R8G8B8A8_UNORM); + CASE(R8G8B8A8_UNORM_SRGB); + CASE(R8G8B8A8_UINT); + CASE(R8G8B8A8_SNORM); + CASE(R8G8B8A8_SINT); + + CASE(R16G16_TYPELESS); + CASE(R16G16_FLOAT); + CASE(R16G16_UNORM); + CASE(R16G16_UINT); + CASE(R16G16_SNORM); + CASE(R16G16_SINT); + + CASE(R32_TYPELESS); + CASE(D32_FLOAT); + CASE(R32_FLOAT); + CASE(R32_UINT); + CASE(R32_SINT); + + CASE(R24G8_TYPELESS); + CASE(D24_UNORM_S8_UINT); + CASE(R24_UNORM_X8_TYPELESS); + CASE(X24_TYPELESS_G8_UINT); + + CASE(R8G8_TYPELESS); + CASE(R8G8_UNORM); + CASE(R8G8_UINT); + CASE(R8G8_SNORM); + CASE(R8G8_SINT); + + CASE(R16_TYPELESS); + CASE(R16_FLOAT); + CASE(D16_UNORM); + CASE(R16_UNORM); + CASE(R16_UINT); + CASE(R16_SNORM); + CASE(R16_SINT); + + CASE(R8_TYPELESS); + CASE(R8_UNORM); + CASE(R8_UINT); + CASE(R8_SNORM); + CASE(R8_SINT); + CASE(A8_UNORM); + + CASE(R1_UNORM); + + CASE(R9G9B9E5_SHAREDEXP); + + CASE(R8G8_B8G8_UNORM); + CASE(G8R8_G8B8_UNORM); + + CASE(BC1_TYPELESS); + CASE(BC1_UNORM); + CASE(BC1_UNORM_SRGB); + + CASE(BC2_TYPELESS); + CASE(BC2_UNORM); + CASE(BC2_UNORM_SRGB); + + CASE(BC3_TYPELESS); + CASE(BC3_UNORM); + CASE(BC3_UNORM_SRGB); + + CASE(BC4_TYPELESS); + CASE(BC4_UNORM); + CASE(BC4_SNORM); + + CASE(BC5_TYPELESS); + CASE(BC5_UNORM); + CASE(BC5_SNORM); + + CASE(B5G6R5_UNORM); + CASE(B5G5R5A1_UNORM); + CASE(B8G8R8A8_UNORM); + CASE(B8G8R8X8_UNORM); + + default: + return "UNKNOWN"; + } #undef CASE - } - - const char * getD3d10ResourceDimensionString(D3D10_RESOURCE_DIMENSION resourceDimension) - { - switch(resourceDimension) - { - default: - case D3D10_RESOURCE_DIMENSION_UNKNOWN: return "UNKNOWN"; - case D3D10_RESOURCE_DIMENSION_BUFFER: return "BUFFER"; - case D3D10_RESOURCE_DIMENSION_TEXTURE1D: return "TEXTURE1D"; - case D3D10_RESOURCE_DIMENSION_TEXTURE2D: return "TEXTURE2D"; - case D3D10_RESOURCE_DIMENSION_TEXTURE3D: return "TEXTURE3D"; - } - } + } + + const char * getD3d10ResourceDimensionString(DDS_DIMENSION resourceDimension) + { + switch(resourceDimension) + { + default: + case DDS_DIMENSION_UNKNOWN: return "UNKNOWN"; + case DDS_DIMENSION_BUFFER: return "BUFFER"; + case DDS_DIMENSION_TEXTURE1D: return "TEXTURE1D"; + case DDS_DIMENSION_TEXTURE2D: return "TEXTURE2D"; + case DDS_DIMENSION_TEXTURE3D: return "TEXTURE3D"; + } + } + + static uint pixelSize(D3DFORMAT format) { + if (format == D3DFMT_R16F) return 8*2; + if (format == D3DFMT_G16R16F) return 8*4; + if (format == D3DFMT_A16B16G16R16F) return 8*8; + if (format == D3DFMT_R32F) return 8*4; + if (format == D3DFMT_G32R32F) return 8*8; + if (format == D3DFMT_A32B32G32R32F) return 8*16; + + if (format == D3DFMT_R8G8B8) return 8*3; + if (format == D3DFMT_A8R8G8B8) return 8*4; + if (format == D3DFMT_X8R8G8B8) return 8*4; + if (format == D3DFMT_R5G6B5) return 8*2; + if (format == D3DFMT_X1R5G5B5) return 8*2; + if (format == D3DFMT_A1R5G5B5) return 8*2; + if (format == D3DFMT_A4R4G4B4) return 8*2; + if (format == D3DFMT_R3G3B2) return 8*1; + if (format == D3DFMT_A8) return 8*1; + if (format == D3DFMT_A8R3G3B2) return 8*2; + if (format == D3DFMT_X4R4G4B4) return 8*2; + if (format == D3DFMT_A2B10G10R10) return 8*4; + if (format == D3DFMT_A8B8G8R8) return 8*4; + if (format == D3DFMT_X8B8G8R8) return 8*4; + if (format == D3DFMT_G16R16) return 8*4; + if (format == D3DFMT_A2R10G10B10) return 8*4; + if (format == D3DFMT_A2B10G10R10) return 8*4; + + if (format == D3DFMT_L8) return 8*1; + if (format == D3DFMT_L16) return 8*2; + + return 0; + } + + static uint pixelSize(DXGI_FORMAT format) { + switch(format) { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + return 8*16; + + case DXGI_FORMAT_R32G32B32_TYPELESS: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R32G32B32_UINT: + case DXGI_FORMAT_R32G32B32_SINT: + return 8*12; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R32G32_UINT: + case DXGI_FORMAT_R32G32_SINT: + + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return 8*8; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + + case DXGI_FORMAT_R11G11B10_FLOAT: + + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + case DXGI_FORMAT_R16G16_SNORM: + case DXGI_FORMAT_R16G16_SINT: + + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R32_UINT: + case DXGI_FORMAT_R32_SINT: + + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return 8*4; + + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_UINT: + case DXGI_FORMAT_R8G8_SNORM: + case DXGI_FORMAT_R8G8_SINT: + + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_UINT: + case DXGI_FORMAT_R16_SNORM: + case DXGI_FORMAT_R16_SINT: + return 8*2; + + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + return 8*1; + + case DXGI_FORMAT_R1_UNORM: + return 1; + + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + return 8*4; + + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + return 8*4; + + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_B5G5R5A1_UNORM: + return 8*2; + + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + return 8*4; + + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_TYPELESS: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + return 8*4; + + default: + return 0; + } + nvUnreachable(); + } } // namespace namespace nv { - static Stream & operator<< (Stream & s, DDSPixelFormat & pf) - { - nvStaticCheck(sizeof(DDSPixelFormat) == 32); - s << pf.size; - s << pf.flags; - s << pf.fourcc; - s << pf.bitcount; - s << pf.rmask; - s << pf.gmask; - s << pf.bmask; - s << pf.amask; - return s; - } - - static Stream & operator<< (Stream & s, DDSCaps & caps) - { - nvStaticCheck(sizeof(DDSCaps) == 16); - s << caps.caps1; - s << caps.caps2; - s << caps.caps3; - s << caps.caps4; - return s; - } - - static Stream & operator<< (Stream & s, DDSHeader10 & header) - { - nvStaticCheck(sizeof(DDSHeader10) == 20); - s << header.dxgiFormat; - s << header.resourceDimension; - s << header.miscFlag; - s << header.arraySize; - s << header.reserved; - return s; - } - - Stream & operator<< (Stream & s, DDSHeader & header) - { - nvStaticCheck(sizeof(DDSHeader) == 148); - s << header.fourcc; - s << header.size; - s << header.flags; - s << header.height; - s << header.width; - s << header.pitch; - s << header.depth; - s << header.mipmapcount; - s.serialize(header.reserved, 11 * sizeof(uint)); - s << header.pf; - s << header.caps; - s << header.notused; - - if (header.hasDX10Header()) - { - s << header.header10; - } - - return s; - } + static Stream & operator<< (Stream & s, DDSPixelFormat & pf) + { + nvStaticCheck(sizeof(DDSPixelFormat) == 32); + s << pf.size; + s << pf.flags; + s << pf.fourcc; + s << pf.bitcount; + s.serialize(&pf.rmask, sizeof(pf.rmask)); + s.serialize(&pf.gmask, sizeof(pf.gmask)); + s.serialize(&pf.bmask, sizeof(pf.bmask)); + s.serialize(&pf.amask, sizeof(pf.amask)); + // s << pf.rmask; + // s << pf.gmask; + // s << pf.bmask; + // s << pf.amask; + return s; + } + + static Stream & operator<< (Stream & s, DDSCaps & caps) + { + nvStaticCheck(sizeof(DDSCaps) == 16); + s << caps.caps1; + s << caps.caps2; + s << caps.caps3; + s << caps.caps4; + return s; + } + + static Stream & operator<< (Stream & s, DDSHeader10 & header) + { + nvStaticCheck(sizeof(DDSHeader10) == 20); + s << header.dxgiFormat; + s << header.resourceDimension; + s << header.miscFlag; + s << header.arraySize; + s << header.reserved; + return s; + } + + Stream & operator<< (Stream & s, DDSHeader & header) + { + nvStaticCheck(sizeof(DDSHeader) == 148); + s << header.fourcc; + s << header.size; + s << header.flags; + s << header.height; + s << header.width; + s << header.pitch; + s << header.depth; + s << header.mipmapcount; + for (int i = 0; i < 11; i++) { + s << header.reserved[i]; + } + s << header.pf; + s << header.caps; + s << header.notused; + + if (header.hasDX10Header()) + { + s << header.header10; + } + + return s; + } } // nv namespace -/* Not used! namespace { - struct FormatDescriptor - { - uint format; - uint bitcount; - uint rmask; - uint gmask; - uint bmask; - uint amask; - }; - - static const FormatDescriptor s_d3dFormats[] = - { - { D3DFMT_R8G8B8, 24, 0xFF0000, 0xFF00, 0xFF, 0 }, - { D3DFMT_A8R8G8B8, 32, 0xFF0000, 0xFF00, 0xFF, 0xFF000000 }, // DXGI_FORMAT_B8G8R8A8_UNORM - { D3DFMT_X8R8G8B8, 32, 0xFF0000, 0xFF00, 0xFF, 0 }, // DXGI_FORMAT_B8G8R8X8_UNORM - { D3DFMT_R5G6B5, 16, 0xF800, 0x7E0, 0x1F, 0 }, // DXGI_FORMAT_B5G6R5_UNORM - { D3DFMT_X1R5G5B5, 16, 0x7C00, 0x3E0, 0x1F, 0 }, - { D3DFMT_A1R5G5B5, 16, 0x7C00, 0x3E0, 0x1F, 0x8000 }, // DXGI_FORMAT_B5G5R5A1_UNORM - { D3DFMT_A4R4G4B4, 16, 0xF00, 0xF0, 0xF, 0xF000 }, - { D3DFMT_R3G3B2, 8, 0xE0, 0x1C, 0x3, 0 }, - { D3DFMT_A8, 8, 0, 0, 0, 8 }, // DXGI_FORMAT_A8_UNORM - { D3DFMT_A8R3G3B2, 16, 0xE0, 0x1C, 0x3, 0xFF00 }, - { D3DFMT_X4R4G4B4, 16, 0xF00, 0xF0, 0xF, 0 }, - { D3DFMT_A2B10G10R10, 32, 0x3FF, 0xFFC00, 0x3FF00000, 0xC0000000 }, // DXGI_FORMAT_R10G10B10A2 - { D3DFMT_A8B8G8R8, 32, 0xFF, 0xFF00, 0xFF0000, 0xFF000000 }, // DXGI_FORMAT_R8G8B8A8_UNORM - { D3DFMT_X8B8G8R8, 32, 0xFF, 0xFF00, 0xFF0000, 0 }, - { D3DFMT_G16R16, 32, 0xFFFF, 0xFFFF0000, 0, 0 }, // DXGI_FORMAT_R16G16_UNORM - { D3DFMT_A2R10G10B10, 32, 0x3FF00000, 0xFFC00, 0x3FF, 0xC0000000 }, - - { D3DFMT_L8, 8, 8, 0, 0, 0 }, // DXGI_FORMAT_R8_UNORM - { D3DFMT_L16, 16, 16, 0, 0, 0 }, // DXGI_FORMAT_R16_UNORM - }; - - static const uint s_d3dFormatCount = sizeof(s_d3dFormats) / sizeof(s_d3dFormats[0]); - - static uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) - { - for (int i = 0; i < s_d3dFormatCount; i++) - { - if (s_d3dFormats[i].bitcount == bitcount && - s_d3dFormats[i].rmask == rmask && - s_d3dFormats[i].gmask == gmask && - s_d3dFormats[i].bmask == bmask && - s_d3dFormats[i].amask == amask) - { - return s_d3dFormats[i].format; - } - } + struct FormatDescriptor + { + uint d3d9Format; + uint dxgiFormat; + RGBAPixelFormat pixelFormat; + }; + + static const FormatDescriptor s_formats[] = + { + { D3DFMT_R8G8B8, DXGI_FORMAT_UNKNOWN, { 24, 0xFF0000, 0xFF00, 0xFF, 0 } }, + { D3DFMT_A8R8G8B8, DXGI_FORMAT_B8G8R8A8_UNORM, { 32, 0xFF0000, 0xFF00, 0xFF, 0xFF000000 } }, + { D3DFMT_X8R8G8B8, DXGI_FORMAT_B8G8R8X8_UNORM, { 32, 0xFF0000, 0xFF00, 0xFF, 0 } }, + { D3DFMT_R5G6B5, DXGI_FORMAT_B5G6R5_UNORM, { 16, 0xF800, 0x7E0, 0x1F, 0 } }, + { D3DFMT_X1R5G5B5, DXGI_FORMAT_UNKNOWN, { 16, 0x7C00, 0x3E0, 0x1F, 0 } }, + { D3DFMT_A1R5G5B5, DXGI_FORMAT_B5G5R5A1_UNORM, { 16, 0x7C00, 0x3E0, 0x1F, 0x8000 } }, + { D3DFMT_A4R4G4B4, DXGI_FORMAT_UNKNOWN, { 16, 0xF00, 0xF0, 0xF, 0xF000 } }, + { D3DFMT_R3G3B2, DXGI_FORMAT_UNKNOWN, { 8, 0xE0, 0x1C, 0x3, 0 } }, + { D3DFMT_A8, DXGI_FORMAT_A8_UNORM, { 8, 0, 0, 0, 8 } }, + { D3DFMT_A8R3G3B2, DXGI_FORMAT_UNKNOWN, { 16, 0xE0, 0x1C, 0x3, 0xFF00 } }, + { D3DFMT_X4R4G4B4, DXGI_FORMAT_UNKNOWN, { 16, 0xF00, 0xF0, 0xF, 0 } }, + { D3DFMT_A2B10G10R10, DXGI_FORMAT_R10G10B10A2_UNORM, { 32, 0x3FF, 0xFFC00, 0x3FF00000, 0xC0000000 } }, + { D3DFMT_A8B8G8R8, DXGI_FORMAT_R8G8B8A8_UNORM, { 32, 0xFF, 0xFF00, 0xFF0000, 0xFF000000 } }, + { D3DFMT_X8B8G8R8, DXGI_FORMAT_UNKNOWN, { 32, 0xFF, 0xFF00, 0xFF0000, 0 } }, + { D3DFMT_G16R16, DXGI_FORMAT_R16G16_UNORM, { 32, 0xFFFF, 0xFFFF0000, 0, 0 } }, + { D3DFMT_A2R10G10B10, DXGI_FORMAT_UNKNOWN, { 32, 0x3FF00000, 0xFFC00, 0x3FF, 0xC0000000 } }, + { D3DFMT_A2B10G10R10, DXGI_FORMAT_UNKNOWN, { 32, 0x3FF, 0xFFC00, 0x3FF00000, 0xC0000000 } }, + + { D3DFMT_L8, DXGI_FORMAT_R8_UNORM , { 8, 0xFF, 0, 0, 0 } }, + { D3DFMT_L16, DXGI_FORMAT_R16_UNORM, { 16, 0xFFFF, 0, 0, 0 } }, + { D3DFMT_A8L8, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0, 0, 0xFF00 } }, + }; - return 0; - } + static const uint s_formatCount = NV_ARRAY_SIZE(s_formats); -} // nv namespace -*/ +} // namespace + +NVIMAGE_API uint nv::findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) +{ + for (int i = 0; i < s_formatCount; i++) + { + if (s_formats[i].pixelFormat.bitcount == bitcount && + s_formats[i].pixelFormat.rmask == rmask && + s_formats[i].pixelFormat.gmask == gmask && + s_formats[i].pixelFormat.bmask == bmask && + s_formats[i].pixelFormat.amask == amask) + { + return s_formats[i].d3d9Format; + } + } + + return 0; +} + +NVIMAGE_API uint nv::findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) +{ + for (int i = 0; i < s_formatCount; i++) + { + if (s_formats[i].pixelFormat.bitcount == bitcount && + s_formats[i].pixelFormat.rmask == rmask && + s_formats[i].pixelFormat.gmask == gmask && + s_formats[i].pixelFormat.bmask == bmask && + s_formats[i].pixelFormat.amask == amask) + { + return s_formats[i].dxgiFormat; + } + } + + return DXGI_FORMAT_UNKNOWN; +} + +const RGBAPixelFormat *nv::findDXGIPixelFormat(uint dxgiFormat) +{ + for (int i = 0; i < s_formatCount; i++) + { + if (s_formats[i].dxgiFormat == dxgiFormat) { + return &s_formats[i].pixelFormat; + } + } + + return NULL; +} DDSHeader::DDSHeader() { - this->fourcc = FOURCC_DDS; - this->size = 124; - this->flags = (DDSD_CAPS|DDSD_PIXELFORMAT); - this->height = 0; - this->width = 0; - this->pitch = 0; - this->depth = 0; - this->mipmapcount = 0; - memset(this->reserved, 0, sizeof(this->reserved)); - - // Store version information on the reserved header attributes. - this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T'); - this->reserved[10] = (2 << 16) | (0 << 8) | (8); // major.minor.revision - - this->pf.size = 32; - this->pf.flags = 0; - this->pf.fourcc = 0; - this->pf.bitcount = 0; - this->pf.rmask = 0; - this->pf.gmask = 0; - this->pf.bmask = 0; - this->pf.amask = 0; - this->caps.caps1 = DDSCAPS_TEXTURE; - this->caps.caps2 = 0; - this->caps.caps3 = 0; - this->caps.caps4 = 0; - this->notused = 0; - - this->header10.dxgiFormat = DXGI_FORMAT_UNKNOWN; - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_UNKNOWN; - this->header10.miscFlag = 0; - this->header10.arraySize = 0; - this->header10.reserved = 0; + this->fourcc = FOURCC_DDS; + this->size = 124; + this->flags = (DDSD_CAPS|DDSD_PIXELFORMAT); + this->height = 0; + this->width = 0; + this->pitch = 0; + this->depth = 0; + this->mipmapcount = 0; + memset(this->reserved, 0, sizeof(this->reserved)); + + // Store version information on the reserved header attributes. + this->reserved[9] = FOURCC_NVTT; + this->reserved[10] = (2 << 16) | (1 << 8) | (0); // major.minor.revision + + this->pf.size = 32; + this->pf.flags = 0; + this->pf.fourcc = 0; + this->pf.bitcount = 0; + this->pf.rmask = 0; + this->pf.gmask = 0; + this->pf.bmask = 0; + this->pf.amask = 0; + this->caps.caps1 = DDSCAPS_TEXTURE; + this->caps.caps2 = 0; + this->caps.caps3 = 0; + this->caps.caps4 = 0; + this->notused = 0; + + this->header10.dxgiFormat = DXGI_FORMAT_UNKNOWN; + this->header10.resourceDimension = DDS_DIMENSION_UNKNOWN; + this->header10.miscFlag = 0; + this->header10.arraySize = 0; + this->header10.reserved = 0; } void DDSHeader::setWidth(uint w) { - this->flags |= DDSD_WIDTH; - this->width = w; + this->flags |= DDSD_WIDTH; + this->width = w; } void DDSHeader::setHeight(uint h) { - this->flags |= DDSD_HEIGHT; - this->height = h; + this->flags |= DDSD_HEIGHT; + this->height = h; } void DDSHeader::setDepth(uint d) { - this->flags |= DDSD_DEPTH; - this->height = d; + this->flags |= DDSD_DEPTH; + this->depth = d; } void DDSHeader::setMipmapCount(uint count) { - if (count == 0 || count == 1) - { - this->flags &= ~DDSD_MIPMAPCOUNT; - this->mipmapcount = 0; - - if (this->caps.caps2 == 0) { - this->caps.caps1 = DDSCAPS_TEXTURE; - } - else { - this->caps.caps1 = DDSCAPS_TEXTURE | DDSCAPS_COMPLEX; - } - } - else - { - this->flags |= DDSD_MIPMAPCOUNT; - this->mipmapcount = count; + if (count == 0 || count == 1) + { + this->flags &= ~DDSD_MIPMAPCOUNT; + this->mipmapcount = 1; + + if (this->caps.caps2 == 0) { + this->caps.caps1 = DDSCAPS_TEXTURE; + } + else { + this->caps.caps1 = DDSCAPS_TEXTURE | DDSCAPS_COMPLEX; + } + } + else + { + this->flags |= DDSD_MIPMAPCOUNT; + this->mipmapcount = count; - this->caps.caps1 |= DDSCAPS_COMPLEX | DDSCAPS_MIPMAP; - } + this->caps.caps1 |= DDSCAPS_COMPLEX | DDSCAPS_MIPMAP; + } } void DDSHeader::setTexture2D() { - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; + this->header10.resourceDimension = DDS_DIMENSION_TEXTURE2D; + this->header10.miscFlag = 0; + this->header10.arraySize = 1; } void DDSHeader::setTexture3D() { - this->caps.caps2 = DDSCAPS2_VOLUME; - - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE3D; + this->caps.caps2 = DDSCAPS2_VOLUME; + + this->header10.resourceDimension = DDS_DIMENSION_TEXTURE3D; + this->header10.miscFlag = 0; + this->header10.arraySize = 1; } void DDSHeader::setTextureCube() { - this->caps.caps1 |= DDSCAPS_COMPLEX; - this->caps.caps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALL_FACES; - - this->header10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; - this->header10.arraySize = 6; + this->caps.caps1 |= DDSCAPS_COMPLEX; + this->caps.caps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_ALL_FACES; + + this->header10.resourceDimension = DDS_DIMENSION_TEXTURE2D; + this->header10.miscFlag = DDS_MISC_TEXTURECUBE; + this->header10.arraySize = 1; +} + +void DDSHeader::setTextureArray(int imageCount) +{ + this->header10.resourceDimension = DDS_DIMENSION_TEXTURE2D; + this->header10.arraySize = imageCount; } void DDSHeader::setLinearSize(uint size) { - this->flags &= ~DDSD_PITCH; - this->flags |= DDSD_LINEARSIZE; - this->pitch = size; + this->flags &= ~DDSD_PITCH; + this->flags |= DDSD_LINEARSIZE; + this->pitch = size; } void DDSHeader::setPitch(uint pitch) { - this->flags &= ~DDSD_LINEARSIZE; - this->flags |= DDSD_PITCH; - this->pitch = pitch; + this->flags &= ~DDSD_LINEARSIZE; + this->flags |= DDSD_PITCH; + this->pitch = pitch; } void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3) { - // set fourcc pixel format. - this->pf.flags = DDPF_FOURCC; - this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3); - - if (this->pf.fourcc == FOURCC_ATI2) - { - this->pf.bitcount = FOURCC_A2XY; - } - else - { - this->pf.bitcount = 0; - } - - this->pf.rmask = 0; - this->pf.gmask = 0; - this->pf.bmask = 0; - this->pf.amask = 0; + // set fourcc pixel format. + this->pf.flags = DDPF_FOURCC; + this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3); + + this->pf.bitcount = 0; + this->pf.rmask = 0; + this->pf.gmask = 0; + this->pf.bmask = 0; + this->pf.amask = 0; +} + +void DDSHeader::setFormatCode(uint32 code) +{ + // set fourcc pixel format. + this->pf.flags = DDPF_FOURCC; + this->pf.fourcc = code; + + this->pf.bitcount = 0; + this->pf.rmask = 0; + this->pf.gmask = 0; + this->pf.bmask = 0; + this->pf.amask = 0; +} + +void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3) +{ + this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3); } + void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) { - // Make sure the masks are correct. - nvCheck((rmask & gmask) == 0); - nvCheck((rmask & bmask) == 0); - nvCheck((rmask & amask) == 0); - nvCheck((gmask & bmask) == 0); - nvCheck((gmask & amask) == 0); - nvCheck((bmask & amask) == 0); - - this->pf.flags = DDPF_RGB; - - if (amask != 0) { - this->pf.flags |= DDPF_ALPHAPIXELS; - } - - if (bitcount == 0) - { - // Compute bit count from the masks. - uint total = rmask | gmask | bmask | amask; - while(total != 0) { - bitcount++; - total >>= 1; - } - } - - nvCheck(bitcount > 0 && bitcount <= 32); - - // Align to 8. - if (bitcount <= 8) bitcount = 8; - else if (bitcount <= 16) bitcount = 16; - else if (bitcount <= 24) bitcount = 24; - else bitcount = 32; - - this->pf.fourcc = 0; //findD3D9Format(bitcount, rmask, gmask, bmask, amask); - this->pf.bitcount = bitcount; - this->pf.rmask = rmask; - this->pf.gmask = gmask; - this->pf.bmask = bmask; - this->pf.amask = amask; + // Make sure the masks are correct. + nvCheck((rmask & gmask) == 0); + nvCheck((rmask & bmask) == 0); + nvCheck((rmask & amask) == 0); + nvCheck((gmask & bmask) == 0); + nvCheck((gmask & amask) == 0); + nvCheck((bmask & amask) == 0); + + if (rmask != 0 || gmask != 0 || bmask != 0) + { + if (gmask == 0 && bmask == 0) + { + this->pf.flags = DDPF_LUMINANCE; + } + else + { + this->pf.flags = DDPF_RGB; + } + + if (amask != 0) { + this->pf.flags |= DDPF_ALPHAPIXELS; + } + } + else if (amask != 0) + { + this->pf.flags |= DDPF_ALPHA; + } + + if (bitcount == 0) + { + // Compute bit count from the masks. + uint total = rmask | gmask | bmask | amask; + while(total != 0) { + bitcount++; + total >>= 1; + } + } + + // D3DX functions do not like this: + this->pf.fourcc = 0; //findD3D9Format(bitcount, rmask, gmask, bmask, amask); + /*if (this->pf.fourcc) { + this->pf.flags |= DDPF_FOURCC; + }*/ + + nvCheck(bitcount > 0 && bitcount <= 32); + this->pf.bitcount = bitcount; + this->pf.rmask = rmask; + this->pf.gmask = gmask; + this->pf.bmask = bmask; + this->pf.amask = amask; } void DDSHeader::setDX10Format(uint format) { - //this->pf.flags = 0; - this->pf.fourcc = FOURCC_DX10; - this->header10.dxgiFormat = format; + this->pf.flags = DDPF_FOURCC; + this->pf.fourcc = FOURCC_DX10; + this->header10.dxgiFormat = format; } void DDSHeader::setNormalFlag(bool b) { - if (b) this->pf.flags |= DDPF_NORMAL; - else this->pf.flags &= ~DDPF_NORMAL; + if (b) this->pf.flags |= DDPF_NORMAL; + else this->pf.flags &= ~DDPF_NORMAL; +} + +void DDSHeader::setSrgbFlag(bool b) +{ + if (b) this->pf.flags |= DDPF_SRGB; + else this->pf.flags &= ~DDPF_SRGB; +} + +void DDSHeader::setHasAlphaFlag(bool b) +{ + if (b) this->pf.flags |= DDPF_ALPHAPIXELS; + else this->pf.flags &= ~DDPF_ALPHAPIXELS; +} + +void DDSHeader::setUserVersion(int version) +{ + this->reserved[7] = FOURCC_UVER; + this->reserved[8] = version; } void DDSHeader::swapBytes() { - this->fourcc = POSH_LittleU32(this->fourcc); - this->size = POSH_LittleU32(this->size); - this->flags = POSH_LittleU32(this->flags); - this->height = POSH_LittleU32(this->height); - this->width = POSH_LittleU32(this->width); - this->pitch = POSH_LittleU32(this->pitch); - this->depth = POSH_LittleU32(this->depth); - this->mipmapcount = POSH_LittleU32(this->mipmapcount); - - for(int i = 0; i < 11; i++) { - this->reserved[i] = POSH_LittleU32(this->reserved[i]); - } - - this->pf.size = POSH_LittleU32(this->pf.size); - this->pf.flags = POSH_LittleU32(this->pf.flags); - this->pf.fourcc = POSH_LittleU32(this->pf.fourcc); - this->pf.bitcount = POSH_LittleU32(this->pf.bitcount); - this->pf.rmask = POSH_LittleU32(this->pf.rmask); - this->pf.gmask = POSH_LittleU32(this->pf.gmask); - this->pf.bmask = POSH_LittleU32(this->pf.bmask); - this->pf.amask = POSH_LittleU32(this->pf.amask); - this->caps.caps1 = POSH_LittleU32(this->caps.caps1); - this->caps.caps2 = POSH_LittleU32(this->caps.caps2); - this->caps.caps3 = POSH_LittleU32(this->caps.caps3); - this->caps.caps4 = POSH_LittleU32(this->caps.caps4); - this->notused = POSH_LittleU32(this->notused); - - this->header10.dxgiFormat = POSH_LittleU32(this->header10.dxgiFormat); - this->header10.resourceDimension = POSH_LittleU32(this->header10.resourceDimension); - this->header10.miscFlag = POSH_LittleU32(this->header10.miscFlag); - this->header10.arraySize = POSH_LittleU32(this->header10.arraySize); - this->header10.reserved = POSH_LittleU32(this->header10.reserved); + this->fourcc = POSH_LittleU32(this->fourcc); + this->size = POSH_LittleU32(this->size); + this->flags = POSH_LittleU32(this->flags); + this->height = POSH_LittleU32(this->height); + this->width = POSH_LittleU32(this->width); + this->pitch = POSH_LittleU32(this->pitch); + this->depth = POSH_LittleU32(this->depth); + this->mipmapcount = POSH_LittleU32(this->mipmapcount); + + for(int i = 0; i < 11; i++) { + this->reserved[i] = POSH_LittleU32(this->reserved[i]); + } + + this->pf.size = POSH_LittleU32(this->pf.size); + this->pf.flags = POSH_LittleU32(this->pf.flags); + this->pf.fourcc = POSH_LittleU32(this->pf.fourcc); + this->pf.bitcount = POSH_LittleU32(this->pf.bitcount); + this->pf.rmask = POSH_LittleU32(this->pf.rmask); + this->pf.gmask = POSH_LittleU32(this->pf.gmask); + this->pf.bmask = POSH_LittleU32(this->pf.bmask); + this->pf.amask = POSH_LittleU32(this->pf.amask); + this->caps.caps1 = POSH_LittleU32(this->caps.caps1); + this->caps.caps2 = POSH_LittleU32(this->caps.caps2); + this->caps.caps3 = POSH_LittleU32(this->caps.caps3); + this->caps.caps4 = POSH_LittleU32(this->caps.caps4); + this->notused = POSH_LittleU32(this->notused); + + this->header10.dxgiFormat = POSH_LittleU32(this->header10.dxgiFormat); + this->header10.resourceDimension = POSH_LittleU32(this->header10.resourceDimension); + this->header10.miscFlag = POSH_LittleU32(this->header10.miscFlag); + this->header10.arraySize = POSH_LittleU32(this->header10.arraySize); + this->header10.reserved = POSH_LittleU32(this->header10.reserved); } bool DDSHeader::hasDX10Header() const { - return this->pf.fourcc == FOURCC_DX10; // @@ This is according to AMD - //return this->pf.flags == 0; // @@ This is according to MS + //if (pf.flags & DDPF_FOURCC) { + return this->pf.fourcc == FOURCC_DX10; + //} + //return false; } +uint DDSHeader::signature() const +{ + return this->reserved[9]; +} +uint DDSHeader::toolVersion() const +{ + return this->reserved[10]; +} -DirectDrawSurface::DirectDrawSurface(const char * name) : stream(new StdInputStream(name)) +uint DDSHeader::userVersion() const { - if (!stream->isError()) - { - (*stream) << header; - } + if (this->reserved[7] == FOURCC_UVER) { + return this->reserved[8]; + } + return 0; +} + +bool DDSHeader::isNormalMap() const +{ + return (pf.flags & DDPF_NORMAL) != 0; +} + +bool DDSHeader::isSrgb() const +{ + return (pf.flags & DDPF_SRGB) != 0; +} + +bool DDSHeader::hasAlpha() const +{ + return (pf.flags & DDPF_ALPHAPIXELS) != 0; +} + +uint DDSHeader::d3d9Format() const +{ + if (pf.flags & DDPF_FOURCC) { + return pf.fourcc; + } + else { + return findD3D9Format(pf.bitcount, pf.rmask, pf.gmask, pf.bmask, pf.amask); + } +} + +uint DDSHeader::pixelSize() const +{ + if (hasDX10Header()) { + return ::pixelSize((DXGI_FORMAT)header10.dxgiFormat); + } + else { + if (pf.flags & DDPF_FOURCC) { + return ::pixelSize((D3DFORMAT)pf.fourcc); + } + else { + nvDebugCheck((pf.flags & DDPF_RGB) || (pf.flags & DDPF_LUMINANCE)); + return pf.bitcount; + } + } +} + +uint DDSHeader::blockSize() const +{ + switch(pf.fourcc) + { + case FOURCC_DXT1: + case FOURCC_ATI1: + return 8; + case FOURCC_DXT2: + case FOURCC_DXT3: + case FOURCC_DXT4: + case FOURCC_DXT5: + case FOURCC_RXGB: + case FOURCC_ATI2: + return 16; + case FOURCC_DX10: + switch(header10.dxgiFormat) + { + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + return 8; + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_SF16: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + return 16; + }; + }; + + // Not a block image. + return 0; +} + +bool DDSHeader::isBlockFormat() const +{ + return blockSize() != 0; +} + + + + + +DirectDrawSurface::DirectDrawSurface() : stream(NULL) +{ +} + +DirectDrawSurface::DirectDrawSurface(const char * name) : stream(NULL) +{ + load(name); +} + +DirectDrawSurface::DirectDrawSurface(Stream * s) : stream(NULL) +{ + load(s); } DirectDrawSurface::~DirectDrawSurface() { - delete stream; + delete stream; +} + +bool DirectDrawSurface::load(const char * filename) +{ + return load(new StdInputStream(filename)); +} + +bool DirectDrawSurface::load(Stream * stream) +{ + delete this->stream; + this->stream = stream; + + if (!stream->isError()) + { + (*stream) << header; + return true; + } + + return false; } bool DirectDrawSurface::isValid() const { - if (stream->isError()) - { - return false; - } - - if (header.fourcc != FOURCC_DDS || header.size != 124) - { - return false; - } - - const uint required = (DDSD_WIDTH|DDSD_HEIGHT/*|DDSD_CAPS|DDSD_PIXELFORMAT*/); - if( (header.flags & required) != required ) { - return false; - } - - if (header.pf.size != 32) { - return false; - } - - if( !(header.caps.caps1 & DDSCAPS_TEXTURE) ) { - return false; - } - - return true; + if (stream == NULL || stream->isError()) + { + return false; + } + + if (header.fourcc != FOURCC_DDS || header.size != 124) + { + return false; + } + + const uint required = (DDSD_WIDTH|DDSD_HEIGHT/*|DDSD_CAPS|DDSD_PIXELFORMAT*/); + if( (header.flags & required) != required ) { + return false; + } + + if (header.pf.size != 32) { + return false; + } + + if( !(header.caps.caps1 & DDSCAPS_TEXTURE) ) { + return false; + } + + return true; } bool DirectDrawSurface::isSupported() const { - nvDebugCheck(isValid()); - - if (header.hasDX10Header()) - { - } - else - { - if (header.pf.flags & DDPF_FOURCC) - { - if (header.pf.fourcc != FOURCC_DXT1 && - header.pf.fourcc != FOURCC_DXT2 && - header.pf.fourcc != FOURCC_DXT3 && - header.pf.fourcc != FOURCC_DXT4 && - header.pf.fourcc != FOURCC_DXT5 && - header.pf.fourcc != FOURCC_RXGB && - header.pf.fourcc != FOURCC_ATI1 && - header.pf.fourcc != FOURCC_ATI2) - { - // Unknown fourcc code. - return false; - } - } - else if (header.pf.flags & DDPF_RGB) - { - // All RGB formats are supported now. - } - else - { - return false; - } - - if (isTextureCube() && (header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES) - { - // Cubemaps must contain all faces. - return false; - } - - if (isTexture3D()) - { - // @@ 3D textures not supported yet. - return false; - } - } - - return true; -} + nvDebugCheck(isValid()); + if (header.hasDX10Header()) + { + if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16 || + header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM) + { + return true; + } + else { + return findDXGIPixelFormat(header.header10.dxgiFormat) != NULL; + } + } + else + { + if (header.pf.flags & DDPF_FOURCC) + { + if (header.pf.fourcc != FOURCC_DXT1 && + header.pf.fourcc != FOURCC_DXT2 && + header.pf.fourcc != FOURCC_DXT3 && + header.pf.fourcc != FOURCC_DXT4 && + header.pf.fourcc != FOURCC_DXT5 && + header.pf.fourcc != FOURCC_RXGB && + header.pf.fourcc != FOURCC_ATI1 && + header.pf.fourcc != FOURCC_ATI2) + { + // Unknown fourcc code. + return false; + } + } + else if ((header.pf.flags & DDPF_RGB) || (header.pf.flags & DDPF_LUMINANCE)) + { + // All RGB and luminance formats are supported now. + } + else + { + return false; + } + + if (isTextureCube()) { + if (header.width != header.height) return false; + + if ((header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES) + { + // Cubemaps must contain all faces. + return false; + } + } + } + + return true; +} + +bool DirectDrawSurface::hasAlpha() const +{ + if (header.hasDX10Header()) + { +#pragma NV_MESSAGE("TODO: Update hasAlpha to handle all DX10 formats.") + return + header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM || + header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM; + } + else + { + if (header.pf.flags & DDPF_RGB) + { + return header.pf.amask != 0; + } + else if (header.pf.flags & DDPF_FOURCC) + { + if (header.pf.fourcc == FOURCC_RXGB || + header.pf.fourcc == FOURCC_ATI1 || + header.pf.fourcc == FOURCC_ATI2 || + header.pf.flags & DDPF_NORMAL) + { + return false; + } + else + { + // @@ Here we could check the ALPHA_PIXELS flag, but nobody sets it. (except us?) + return true; + } + } + + return false; + } +} uint DirectDrawSurface::mipmapCount() const { - nvDebugCheck(isValid()); - if (header.flags & DDSD_MIPMAPCOUNT) return header.mipmapcount; - else return 1; + nvDebugCheck(isValid()); + if (header.flags & DDSD_MIPMAPCOUNT) return header.mipmapcount; + else return 1; } uint DirectDrawSurface::width() const { - nvDebugCheck(isValid()); - if (header.flags & DDSD_WIDTH) return header.width; - else return 1; + nvDebugCheck(isValid()); + if (header.flags & DDSD_WIDTH) return header.width; + else return 1; } uint DirectDrawSurface::height() const { - nvDebugCheck(isValid()); - if (header.flags & DDSD_HEIGHT) return header.height; - else return 1; + nvDebugCheck(isValid()); + if (header.flags & DDSD_HEIGHT) return header.height; + else return 1; } uint DirectDrawSurface::depth() const { - nvDebugCheck(isValid()); - if (header.flags & DDSD_DEPTH) return header.depth; - else return 1; + nvDebugCheck(isValid()); + if (header.flags & DDSD_DEPTH) return header.depth; + else return 1; +} + +uint DirectDrawSurface::arraySize() const +{ + nvDebugCheck(isValid()); + if (header.hasDX10Header()) return header.header10.arraySize; + else return 1; } bool DirectDrawSurface::isTexture1D() const { - nvDebugCheck(isValid()); - if (header.hasDX10Header()) - { - return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE1D; - } - return false; + nvDebugCheck(isValid()); + if (header.hasDX10Header()) + { + return header.header10.resourceDimension == DDS_DIMENSION_TEXTURE1D; + } + return false; } bool DirectDrawSurface::isTexture2D() const { - nvDebugCheck(isValid()); - if (header.hasDX10Header()) - { - return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE2D; - } - else - { - return !isTexture3D() && !isTextureCube(); - } + nvDebugCheck(isValid()); + if (header.hasDX10Header()) + { + return header.header10.resourceDimension == DDS_DIMENSION_TEXTURE2D && header.header10.arraySize == 1; + } + else + { + return !isTexture3D() && !isTextureCube(); + } } bool DirectDrawSurface::isTexture3D() const { - nvDebugCheck(isValid()); - if (header.hasDX10Header()) - { - return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE3D; - } - else - { - return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0; - } + nvDebugCheck(isValid()); + if (header.hasDX10Header()) + { + return header.header10.resourceDimension == DDS_DIMENSION_TEXTURE3D; + } + else + { + return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0; + } } bool DirectDrawSurface::isTextureCube() const { - nvDebugCheck(isValid()); - return (header.caps.caps2 & DDSCAPS2_CUBEMAP) != 0; + nvDebugCheck(isValid()); + return (header.caps.caps2 & DDSCAPS2_CUBEMAP) != 0; +} + +bool DirectDrawSurface::isTextureArray() const +{ + nvDebugCheck(isValid()); + return header.hasDX10Header() && header.header10.arraySize > 1; } void DirectDrawSurface::setNormalFlag(bool b) { - nvDebugCheck(isValid()); - header.setNormalFlag(b); + nvDebugCheck(isValid()); + header.setNormalFlag(b); +} + +void DirectDrawSurface::setHasAlphaFlag(bool b) +{ + nvDebugCheck(isValid()); + header.setHasAlphaFlag(b); +} + +void DirectDrawSurface::setUserVersion(int version) +{ + nvDebugCheck(isValid()); + header.setUserVersion(version); } void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap) { - nvDebugCheck(isValid()); - - stream->seek(offset(face, mipmap)); - - uint w = width(); - uint h = height(); - - // Compute width and height. - for (uint m = 0; m < mipmap; m++) - { - w = max(1U, w / 2); - h = max(1U, h / 2); - } - - img->allocate(w, h); - - if (header.pf.flags & DDPF_RGB) - { - readLinearImage(img); - } - else if (header.pf.flags & DDPF_FOURCC) - { - readBlockImage(img); - } -} - -void DirectDrawSurface::readLinearImage(Image * img) -{ - nvDebugCheck(stream != NULL); - nvDebugCheck(img != NULL); - - const uint w = img->width(); - const uint h = img->height(); - - uint rshift, rsize; - PixelFormat::maskShiftAndSize(header.pf.rmask, &rshift, &rsize); - - uint gshift, gsize; - PixelFormat::maskShiftAndSize(header.pf.gmask, &gshift, &gsize); - - uint bshift, bsize; - PixelFormat::maskShiftAndSize(header.pf.bmask, &bshift, &bsize); - - uint ashift, asize; - PixelFormat::maskShiftAndSize(header.pf.amask, &ashift, &asize); - - uint byteCount = (header.pf.bitcount + 7) / 8; - - // set image format: RGB or ARGB - // alpha channel exists if and only if the alpha mask is non-zero - if (header.pf.amask == 0) - { - img->setFormat(Image::Format_RGB); - } - else - { - img->setFormat(Image::Format_ARGB); - } - - // Read linear RGB images. - for (uint y = 0; y < h; y++) - { - for (uint x = 0; x < w; x++) - { - uint c = 0; - stream->serialize(&c, byteCount); - - Color32 pixel(0, 0, 0, 0xFF); - pixel.r = PixelFormat::convert((c & header.pf.rmask) >> rshift, rsize, 8); - pixel.g = PixelFormat::convert((c & header.pf.gmask) >> gshift, gsize, 8); - pixel.b = PixelFormat::convert((c & header.pf.bmask) >> bshift, bsize, 8); - pixel.a = PixelFormat::convert((c & header.pf.amask) >> ashift, asize, 8); - - img->pixel(x, y) = pixel; - } - } + nvDebugCheck(isValid()); + + stream->seek(offset(face, mipmap)); + + uint w = width(); + uint h = height(); + uint d = depth(); + + // Compute width and height. + for (uint m = 0; m < mipmap; m++) + { + w = max(1U, w / 2); + h = max(1U, h / 2); + d = max(1U, d / 2); + } + + img->allocate(w, h, d); + + if (hasAlpha()) + { + img->setFormat(Image::Format_ARGB); + } + else + { + img->setFormat(Image::Format_RGB); + } + + if (header.hasDX10Header()) + { + if (const RGBAPixelFormat *format = findDXGIPixelFormat(header.header10.dxgiFormat)) { + readLinearImage(img, format->bitcount, format->rmask, format->gmask, format->bmask, format->amask); + } + else { + readBlockImage(img); + } + } + else + { + if (header.pf.flags & DDPF_RGB) + { + readLinearImage(img, header.pf.bitcount, header.pf.rmask, header.pf.gmask, header.pf.bmask, header.pf.amask); + } + else if (header.pf.flags & DDPF_FOURCC) + { + readBlockImage(img); + } + } +} + +/*void * DirectDrawSurface::readData(uint * sizePtr) +{ + uint header_size = 128; // sizeof(DDSHeader); + + if (header.hasDX10Header()) + { + header_size += 20; // sizeof(DDSHeader10); + } + + stream->seek(header_size); + + int size = stream->size() - header_size; + *sizePtr = size; + + void * data = new unsigned char [size]; + + size = stream->serialize(data, size); + nvDebugCheck(size == *sizePtr); + + return data; +}*/ + +/*uint DirectDrawSurface::surfaceSize(uint mipmap) const +{ + uint w = header.width(); + uint h = header.height(); + uint d = header.depth(); + for (int m = 0; m < mipmap; m++) { + w = (w + 1) / 2; + h = (h + 1) / 2; + d = (d + 1) / 2; + } + + bool isBlockFormat; + uint blockOrPixelSize; + + if (header.hasDX10Header()) { + blockOrPixelSize = blockSize(header10.dxgiFormat); + isBlockFormat = (blockOrPixelSize != 0); + if (isBlockFormat) { + blockOrPixelSize = pixelSize(header10.dxgiFormat); + } + } + else { + header.pf.flags + } + + if (isBlockFormat) { + w = (w + 3) / 4; + h = (h + 3) / 4; + d = (d + 3) / 4; // @@ Is it necessary to align the depths? + } + + uint blockOrPixelCount = w * h * d; + + return blockCount = blockOrPixelSize; +}*/ + +bool DirectDrawSurface::readSurface(uint face, uint mipmap, void * data, uint size) +{ + if (size != surfaceSize(mipmap)) return false; + + stream->seek(offset(face, mipmap)); + if (stream->isError()) return false; + + return stream->serialize(data, size) == size; +} + + +void DirectDrawSurface::readLinearImage(Image * img, uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) +{ + nvDebugCheck(stream != NULL); + nvDebugCheck(img != NULL); + + const uint w = img->width(); + const uint h = img->height(); + const uint d = img->depth(); + + uint rshift, rsize; + PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize); + + uint gshift, gsize; + PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize); + + uint bshift, bsize; + PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize); + + uint ashift, asize; + PixelFormat::maskShiftAndSize(amask, &ashift, &asize); + + uint byteCount = (bitcount + 7) / 8; + +#pragma NV_MESSAGE("TODO: Support floating point linear images and other FOURCC codes.") + + // Read linear RGB images. + for (uint z = 0; z < d; z++) + { + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + uint c = 0; + stream->serialize(&c, byteCount); + + Color32 pixel(0, 0, 0, 0xFF); + pixel.r = PixelFormat::convert((c & rmask) >> rshift, rsize, 8); + pixel.g = PixelFormat::convert((c & gmask) >> gshift, gsize, 8); + pixel.b = PixelFormat::convert((c & bmask) >> bshift, bsize, 8); + pixel.a = PixelFormat::convert((c & amask) >> ashift, asize, 8); + + img->pixel(x, y, z) = pixel; + } + } + } } void DirectDrawSurface::readBlockImage(Image * img) { - nvDebugCheck(stream != NULL); - nvDebugCheck(img != NULL); + nvDebugCheck(stream != NULL); + nvDebugCheck(img != NULL); + + const uint w = img->width(); + const uint h = img->height(); + + const uint bw = (w + 3) / 4; + const uint bh = (h + 3) / 4; - // set image format: RGB or ARGB - if (header.pf.fourcc == FOURCC_RXGB || - header.pf.fourcc == FOURCC_ATI1 || - header.pf.fourcc == FOURCC_ATI2 || - header.pf.flags & DDPF_NORMAL) - { - img->setFormat(Image::Format_RGB); - } - else - { - img->setFormat(Image::Format_ARGB); - } - - const uint w = img->width(); - const uint h = img->height(); - - const uint bw = (w + 3) / 4; - const uint bh = (h + 3) / 4; - - for (uint by = 0; by < bh; by++) - { - for (uint bx = 0; bx < bw; bx++) - { - ColorBlock block; - - // Read color block. - readBlock(&block); - - // Write color block. - for (uint y = 0; y < min(4U, h-4*by); y++) - { - for (uint x = 0; x < min(4U, w-4*bx); x++) - { - img->pixel(4*bx+x, 4*by+y) = block.color(x, y); - } - } - } - } + for (uint by = 0; by < bh; by++) + { + for (uint bx = 0; bx < bw; bx++) + { + ColorBlock block; + + // Read color block. + readBlock(&block); + + // Write color block. + for (uint y = 0; y < min(4U, h-4*by); y++) + { + for (uint x = 0; x < min(4U, w-4*bx); x++) + { + img->pixel(4*bx+x, 4*by+y) = block.color(x, y); + } + } + } + } } static Color32 buildNormal(uint8 x, uint8 y) { - float nx = 2 * (x / 255.0f) - 1; - float ny = 2 * (y / 255.0f) - 1; - float nz = 0.0f; - if (1 - nx*nx - ny*ny > 0) nz = sqrtf(1 - nx*nx - ny*ny); - uint8 z = clamp(int(255.0f * (nz + 1) / 2.0f), 0, 255); - - return Color32(x, y, z); + float nx = 2 * (x / 255.0f) - 1; + float ny = 2 * (y / 255.0f) - 1; + float nz = 0.0f; + if (1 - nx*nx - ny*ny > 0) nz = sqrtf(1 - nx*nx - ny*ny); + uint8 z = clamp(int(255.0f * (nz + 1) / 2.0f), 0, 255); + + return Color32(x, y, z); } void DirectDrawSurface::readBlock(ColorBlock * rgba) { - nvDebugCheck(stream != NULL); - nvDebugCheck(rgba != NULL); - - if (header.pf.fourcc == FOURCC_DXT1) - { - BlockDXT1 block; - *stream << block; - block.decodeBlock(rgba); - } - else if (header.pf.fourcc == FOURCC_DXT2 || - header.pf.fourcc == FOURCC_DXT3) - { - BlockDXT3 block; - *stream << block; - block.decodeBlock(rgba); - } - else if (header.pf.fourcc == FOURCC_DXT4 || - header.pf.fourcc == FOURCC_DXT5 || - header.pf.fourcc == FOURCC_RXGB) - { - BlockDXT5 block; - *stream << block; - block.decodeBlock(rgba); - - if (header.pf.fourcc == FOURCC_RXGB) - { - // Swap R & A. - for (int i = 0; i < 16; i++) - { - Color32 & c = rgba->color(i); - uint tmp = c.r; - c.r = c.a; - c.a = tmp; - } - } - } - else if (header.pf.fourcc == FOURCC_ATI1) - { - BlockATI1 block; - *stream << block; - block.decodeBlock(rgba); - } - else if (header.pf.fourcc == FOURCC_ATI2) - { - BlockATI2 block; - *stream << block; - block.decodeBlock(rgba); - } - - // If normal flag set, convert to normal. - if (header.pf.flags & DDPF_NORMAL) - { - if (header.pf.fourcc == FOURCC_ATI2) - { - for (int i = 0; i < 16; i++) - { - Color32 & c = rgba->color(i); - c = buildNormal(c.r, c.g); - } - } - else if (header.pf.fourcc == FOURCC_DXT5) - { - for (int i = 0; i < 16; i++) - { - Color32 & c = rgba->color(i); - c = buildNormal(c.a, c.g); - } - } - } -} - - -uint DirectDrawSurface::blockSize() const -{ - switch(header.pf.fourcc) - { - case FOURCC_DXT1: - case FOURCC_ATI1: - return 8; - case FOURCC_DXT2: - case FOURCC_DXT3: - case FOURCC_DXT4: - case FOURCC_DXT5: - case FOURCC_RXGB: - case FOURCC_ATI2: - return 16; - }; - - // Not a block image. - return 0; -} + nvDebugCheck(stream != NULL); + nvDebugCheck(rgba != NULL); -uint DirectDrawSurface::mipmapSize(uint mipmap) const -{ - uint w = width(); - uint h = height(); - uint d = depth(); - - for (uint m = 0; m < mipmap; m++) - { - w = max(1U, w / 2); - h = max(1U, h / 2); - d = max(1U, d / 2); - } + uint fourcc = header.pf.fourcc; - if (header.pf.flags & DDPF_FOURCC) - { - // @@ How are 3D textures aligned? - w = (w + 3) / 4; - h = (h + 3) / 4; - return blockSize() * w * h; - } - else - { - nvDebugCheck(header.pf.flags & DDPF_RGB); - - // Align pixels to bytes. - uint byteCount = (header.pf.bitcount + 7) / 8; - - // Align pitch to 4 bytes. - uint pitch = 4 * ((w * byteCount + 3) / 4); - - return pitch * h * d; - } + // Map DX10 block formats to fourcc codes. + if (header.hasDX10Header()) + { + if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM) fourcc = FOURCC_DXT1; + else if (header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM) fourcc = FOURCC_DXT3; + else if (header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM) fourcc = FOURCC_DXT5; + else if (header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM) fourcc = FOURCC_ATI1; + else if (header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) fourcc = FOURCC_ATI2; + } + + if (fourcc == FOURCC_DXT1) + { + BlockDXT1 block; + *stream << block; + block.decodeBlock(rgba); + } + else if (fourcc == FOURCC_DXT2 || fourcc == FOURCC_DXT3) + { + BlockDXT3 block; + *stream << block; + block.decodeBlock(rgba); + } + else if (fourcc == FOURCC_DXT4 || fourcc == FOURCC_DXT5 || fourcc == FOURCC_RXGB) + { + BlockDXT5 block; + *stream << block; + block.decodeBlock(rgba); + + if (fourcc == FOURCC_RXGB) + { + // Swap R & A. + for (int i = 0; i < 16; i++) + { + Color32 & c = rgba->color(i); + uint tmp = c.r; + c.r = c.a; + c.a = tmp; + } + } + } + else if (fourcc == FOURCC_ATI1) + { + BlockATI1 block; + *stream << block; + block.decodeBlock(rgba); + } + else if (fourcc == FOURCC_ATI2) + { + BlockATI2 block; + *stream << block; + block.decodeBlock(rgba); + } + else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16) + { + BlockBC6 block; + *stream << block; + Vector3 colors[16]; + block.decodeBlock(colors); + + // Clamp to [0, 1] and round to 8-bit + for (int y = 0; y < 4; ++y) + { + for (int x = 0; x < 4; ++x) + { + Vector3 px = colors[y*4 + x]; + rgba->color(x, y).setRGBA( + ftoi_round(clamp(px.x, 0.0f, 1.0f) * 255.0f), + ftoi_round(clamp(px.y, 0.0f, 1.0f) * 255.0f), + ftoi_round(clamp(px.z, 0.0f, 1.0f) * 255.0f), + 0xFF); + } + } + } + else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM) + { + BlockBC7 block; + *stream << block; + block.decodeBlock(rgba); + } + else + { + nvDebugCheck(false); + } + + // If normal flag set, convert to normal. + if (header.pf.flags & DDPF_NORMAL) + { + if (fourcc == FOURCC_ATI2) + { + for (int i = 0; i < 16; i++) + { + Color32 & c = rgba->color(i); + c = buildNormal(c.r, c.g); + } + } + else if (fourcc == FOURCC_DXT5) + { + for (int i = 0; i < 16; i++) + { + Color32 & c = rgba->color(i); + c = buildNormal(c.a, c.g); + } + } + } +} + + +static uint mipmapExtent(uint mipmap, uint x) +{ + for (uint m = 0; m < mipmap; m++) { + x = max(1U, x / 2); + } + return x; +} + +uint DirectDrawSurface::surfaceWidth(uint mipmap) const +{ + return mipmapExtent(mipmap, width()); +} + +uint DirectDrawSurface::surfaceHeight(uint mipmap) const +{ + return mipmapExtent(mipmap, height()); +} + +uint DirectDrawSurface::surfaceDepth(uint mipmap) const +{ + return mipmapExtent(mipmap, depth()); +} + +uint DirectDrawSurface::surfaceSize(uint mipmap) const +{ + uint w = surfaceWidth(mipmap); + uint h = surfaceHeight(mipmap); + uint d = surfaceDepth(mipmap); + + uint blockSize = header.blockSize(); + + if (blockSize == 0) { + uint bitCount = header.pixelSize(); + uint pitch = computeBytePitch(w, bitCount, 1); // Asuming 1 byte alignment, which is the same D3DX expects. + return pitch * h * d; + } + else { + w = (w + 3) / 4; + h = (h + 3) / 4; + d = d; // @@ How are 3D textures aligned? + return blockSize * w * h * d; + } } uint DirectDrawSurface::faceSize() const { - const uint count = mipmapCount(); - uint size = 0; - - for (uint m = 0; m < count; m++) - { - size += mipmapSize(m); - } - - return size; + const uint count = mipmapCount(); + uint size = 0; + + for (uint m = 0; m < count; m++) + { + size += surfaceSize(m); + } + + return size; } uint DirectDrawSurface::offset(const uint face, const uint mipmap) { - uint size = 128; // sizeof(DDSHeader); - - if (header.hasDX10Header()) - { - size += 20; // sizeof(DDSHeader10); - } - - if (face != 0) - { - size += face * faceSize(); - } - - for (uint m = 0; m < mipmap; m++) - { - size += mipmapSize(m); - } - - return size; + uint size = 128; // sizeof(DDSHeader); + + if (header.hasDX10Header()) + { + size += 20; // sizeof(DDSHeader10); + } + + if (face != 0) + { + size += face * faceSize(); + } + + for (uint m = 0; m < mipmap; m++) + { + size += surfaceSize(m); + } + + return size; } void DirectDrawSurface::printInfo() const { - printf("Flags: 0x%.8X\n", header.flags); - if (header.flags & DDSD_CAPS) printf("\tDDSD_CAPS\n"); - if (header.flags & DDSD_PIXELFORMAT) printf("\tDDSD_PIXELFORMAT\n"); - if (header.flags & DDSD_WIDTH) printf("\tDDSD_WIDTH\n"); - if (header.flags & DDSD_HEIGHT) printf("\tDDSD_HEIGHT\n"); - if (header.flags & DDSD_DEPTH) printf("\tDDSD_DEPTH\n"); - if (header.flags & DDSD_PITCH) printf("\tDDSD_PITCH\n"); - if (header.flags & DDSD_LINEARSIZE) printf("\tDDSD_LINEARSIZE\n"); - if (header.flags & DDSD_MIPMAPCOUNT) printf("\tDDSD_MIPMAPCOUNT\n"); - - printf("Height: %d\n", header.height); - printf("Width: %d\n", header.width); - printf("Depth: %d\n", header.depth); - if (header.flags & DDSD_PITCH) printf("Pitch: %d\n", header.pitch); - else if (header.flags & DDSD_LINEARSIZE) printf("Linear size: %d\n", header.pitch); - printf("Mipmap count: %d\n", header.mipmapcount); - - printf("Pixel Format:\n"); - printf("\tFlags: 0x%.8X\n", header.pf.flags); - if (header.pf.flags & DDPF_RGB) printf("\t\tDDPF_RGB\n"); - if (header.pf.flags & DDPF_FOURCC) printf("\t\tDDPF_FOURCC\n"); - if (header.pf.flags & DDPF_ALPHAPIXELS) printf("\t\tDDPF_ALPHAPIXELS\n"); - if (header.pf.flags & DDPF_ALPHA) printf("\t\tDDPF_ALPHA\n"); - if (header.pf.flags & DDPF_PALETTEINDEXED1) printf("\t\tDDPF_PALETTEINDEXED1\n"); - if (header.pf.flags & DDPF_PALETTEINDEXED2) printf("\t\tDDPF_PALETTEINDEXED2\n"); - if (header.pf.flags & DDPF_PALETTEINDEXED4) printf("\t\tDDPF_PALETTEINDEXED4\n"); - if (header.pf.flags & DDPF_PALETTEINDEXED8) printf("\t\tDDPF_PALETTEINDEXED8\n"); - if (header.pf.flags & DDPF_ALPHAPREMULT) printf("\t\tDDPF_ALPHAPREMULT\n"); - if (header.pf.flags & DDPF_NORMAL) printf("\t\tDDPF_NORMAL\n"); - - printf("\tFourCC: '%c%c%c%c'\n", - ((header.pf.fourcc >> 0) & 0xFF), - ((header.pf.fourcc >> 8) & 0xFF), - ((header.pf.fourcc >> 16) & 0xFF), - ((header.pf.fourcc >> 24) & 0xFF)); - if ((header.pf.fourcc & DDPF_FOURCC) && (header.pf.bitcount != 0)) - { - printf("\tSwizzle: '%c%c%c%c'\n", - (header.pf.bitcount >> 0) & 0xFF, - (header.pf.bitcount >> 8) & 0xFF, - (header.pf.bitcount >> 16) & 0xFF, - (header.pf.bitcount >> 24) & 0xFF); - } - else - { - printf("\tBit count: %d\n", header.pf.bitcount); - } - printf("\tRed mask: 0x%.8X\n", header.pf.rmask); - printf("\tGreen mask: 0x%.8X\n", header.pf.gmask); - printf("\tBlue mask: 0x%.8X\n", header.pf.bmask); - printf("\tAlpha mask: 0x%.8X\n", header.pf.amask); - - printf("Caps:\n"); - printf("\tCaps 1: 0x%.8X\n", header.caps.caps1); - if (header.caps.caps1 & DDSCAPS_COMPLEX) printf("\t\tDDSCAPS_COMPLEX\n"); - if (header.caps.caps1 & DDSCAPS_TEXTURE) printf("\t\tDDSCAPS_TEXTURE\n"); - if (header.caps.caps1 & DDSCAPS_MIPMAP) printf("\t\tDDSCAPS_MIPMAP\n"); - - printf("\tCaps 2: 0x%.8X\n", header.caps.caps2); - if (header.caps.caps2 & DDSCAPS2_VOLUME) printf("\t\tDDSCAPS2_VOLUME\n"); - else if (header.caps.caps2 & DDSCAPS2_CUBEMAP) - { - printf("\t\tDDSCAPS2_CUBEMAP\n"); - if ((header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) == DDSCAPS2_CUBEMAP_ALL_FACES) printf("\t\tDDSCAPS2_CUBEMAP_ALL_FACES\n"); - else { - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEX) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEX\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEX) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEX\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEY) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEY\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEY) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEY\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEZ) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEZ\n"); - if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEZ) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEZ\n"); - } - } - - printf("\tCaps 3: 0x%.8X\n", header.caps.caps3); - printf("\tCaps 4: 0x%.8X\n", header.caps.caps4); - - if (header.hasDX10Header()) - { - printf("DX10 Header:\n"); - printf("\tDXGI Format: %u (%s)\n", header.header10.dxgiFormat, getDxgiFormatString((DXGI_FORMAT)header.header10.dxgiFormat)); - printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((D3D10_RESOURCE_DIMENSION)header.header10.resourceDimension)); - printf("\tMisc flag: %u\n", header.header10.miscFlag); - printf("\tArray size: %u\n", header.header10.arraySize); - } - - if (header.reserved[9] == MAKEFOURCC('N', 'V', 'T', 'T')) - { - int major = (header.reserved[10] >> 16) & 0xFF; - int minor = (header.reserved[10] >> 8) & 0xFF; - int revision= header.reserved[10] & 0xFF; - - printf("Version:\n"); - printf("\tNVIDIA Texture Tools %d.%d.%d\n", major, minor, revision); - } + printf("Flags: 0x%.8X\n", header.flags); + if (header.flags & DDSD_CAPS) printf("\tDDSD_CAPS\n"); + if (header.flags & DDSD_PIXELFORMAT) printf("\tDDSD_PIXELFORMAT\n"); + if (header.flags & DDSD_WIDTH) printf("\tDDSD_WIDTH\n"); + if (header.flags & DDSD_HEIGHT) printf("\tDDSD_HEIGHT\n"); + if (header.flags & DDSD_DEPTH) printf("\tDDSD_DEPTH\n"); + if (header.flags & DDSD_PITCH) printf("\tDDSD_PITCH\n"); + if (header.flags & DDSD_LINEARSIZE) printf("\tDDSD_LINEARSIZE\n"); + if (header.flags & DDSD_MIPMAPCOUNT) printf("\tDDSD_MIPMAPCOUNT\n"); + + printf("Height: %d\n", header.height); + printf("Width: %d\n", header.width); + printf("Depth: %d\n", header.depth); + if (header.flags & DDSD_PITCH) printf("Pitch: %d\n", header.pitch); + else if (header.flags & DDSD_LINEARSIZE) printf("Linear size: %d\n", header.pitch); + printf("Mipmap count: %d\n", header.mipmapcount); + + printf("Pixel Format:\n"); + printf("\tFlags: 0x%.8X\n", header.pf.flags); + if (header.pf.flags & DDPF_RGB) printf("\t\tDDPF_RGB\n"); + if (header.pf.flags & DDPF_LUMINANCE) printf("\t\tDDPF_LUMINANCE\n"); + if (header.pf.flags & DDPF_FOURCC) printf("\t\tDDPF_FOURCC\n"); + if (header.pf.flags & DDPF_ALPHAPIXELS) printf("\t\tDDPF_ALPHAPIXELS\n"); + if (header.pf.flags & DDPF_ALPHA) printf("\t\tDDPF_ALPHA\n"); + if (header.pf.flags & DDPF_PALETTEINDEXED1) printf("\t\tDDPF_PALETTEINDEXED1\n"); + if (header.pf.flags & DDPF_PALETTEINDEXED2) printf("\t\tDDPF_PALETTEINDEXED2\n"); + if (header.pf.flags & DDPF_PALETTEINDEXED4) printf("\t\tDDPF_PALETTEINDEXED4\n"); + if (header.pf.flags & DDPF_PALETTEINDEXED8) printf("\t\tDDPF_PALETTEINDEXED8\n"); + if (header.pf.flags & DDPF_ALPHAPREMULT) printf("\t\tDDPF_ALPHAPREMULT\n"); + if (header.pf.flags & DDPF_NORMAL) printf("\t\tDDPF_NORMAL\n"); + + if (header.pf.fourcc != 0) { + // Display fourcc code even when DDPF_FOURCC flag not set. + printf("\tFourCC: '%c%c%c%c' (0x%.8X)\n", + ((header.pf.fourcc >> 0) & 0xFF), + ((header.pf.fourcc >> 8) & 0xFF), + ((header.pf.fourcc >> 16) & 0xFF), + ((header.pf.fourcc >> 24) & 0xFF), + header.pf.fourcc); + } + + if ((header.pf.flags & DDPF_FOURCC) && (header.pf.bitcount != 0)) + { + printf("\tSwizzle: '%c%c%c%c' (0x%.8X)\n", + (header.pf.bitcount >> 0) & 0xFF, + (header.pf.bitcount >> 8) & 0xFF, + (header.pf.bitcount >> 16) & 0xFF, + (header.pf.bitcount >> 24) & 0xFF, + header.pf.bitcount); + } + else + { + printf("\tBit count: %d\n", header.pf.bitcount); + } + + printf("\tRed mask: 0x%.8X\n", header.pf.rmask); + printf("\tGreen mask: 0x%.8X\n", header.pf.gmask); + printf("\tBlue mask: 0x%.8X\n", header.pf.bmask); + printf("\tAlpha mask: 0x%.8X\n", header.pf.amask); + + printf("Caps:\n"); + printf("\tCaps 1: 0x%.8X\n", header.caps.caps1); + if (header.caps.caps1 & DDSCAPS_COMPLEX) printf("\t\tDDSCAPS_COMPLEX\n"); + if (header.caps.caps1 & DDSCAPS_TEXTURE) printf("\t\tDDSCAPS_TEXTURE\n"); + if (header.caps.caps1 & DDSCAPS_MIPMAP) printf("\t\tDDSCAPS_MIPMAP\n"); + + printf("\tCaps 2: 0x%.8X\n", header.caps.caps2); + if (header.caps.caps2 & DDSCAPS2_VOLUME) printf("\t\tDDSCAPS2_VOLUME\n"); + else if (header.caps.caps2 & DDSCAPS2_CUBEMAP) + { + printf("\t\tDDSCAPS2_CUBEMAP\n"); + if ((header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) == DDSCAPS2_CUBEMAP_ALL_FACES) printf("\t\tDDSCAPS2_CUBEMAP_ALL_FACES\n"); + else { + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEX) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEX\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEX) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEX\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEY) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEY\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEY) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEY\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_POSITIVEZ) printf("\t\tDDSCAPS2_CUBEMAP_POSITIVEZ\n"); + if (header.caps.caps2 & DDSCAPS2_CUBEMAP_NEGATIVEZ) printf("\t\tDDSCAPS2_CUBEMAP_NEGATIVEZ\n"); + } + } + + printf("\tCaps 3: 0x%.8X\n", header.caps.caps3); + printf("\tCaps 4: 0x%.8X\n", header.caps.caps4); + + if (header.hasDX10Header()) + { + printf("DX10 Header:\n"); + printf("\tDXGI Format: %u (%s)\n", header.header10.dxgiFormat, getDxgiFormatString((DXGI_FORMAT)header.header10.dxgiFormat)); + printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((DDS_DIMENSION)header.header10.resourceDimension)); + printf("\tMisc flag: %u\n", header.header10.miscFlag); + printf("\tArray size: %u\n", header.header10.arraySize); + } + + if (header.reserved[9] == FOURCC_NVTT) + { + int major = (header.reserved[10] >> 16) & 0xFF; + int minor = (header.reserved[10] >> 8) & 0xFF; + int revision= header.reserved[10] & 0xFF; + + printf("Version:\n"); + printf("\tNVIDIA Texture Tools %d.%d.%d\n", major, minor, revision); + } + + if (header.reserved[7] == FOURCC_UVER) + { + printf("User Version: %d\n", header.reserved[8]); + } } Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ErrorMetric.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ErrorMetric.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ErrorMetric.h @@ -0,0 +1,22 @@ + +#include "nvimage.h" + + +namespace nv +{ + class FloatImage; + + NVIMAGE_API float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); + NVIMAGE_API float rmsAlphaError(const FloatImage * ref, const FloatImage * img); + + NVIMAGE_API float cieLabError(const FloatImage * ref, const FloatImage * img); + float cieLab94Error(const FloatImage * ref, const FloatImage * img); + float spatialCieLabError(const FloatImage * ref, const FloatImage * img); + + float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); + float averageAlphaError(const FloatImage * ref, const FloatImage * img); + + float averageAngularError(const FloatImage * img0, const FloatImage * img1); + NVIMAGE_API float rmsAngularError(const FloatImage * img0, const FloatImage * img1); + +} // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ErrorMetric.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ErrorMetric.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ErrorMetric.cpp @@ -0,0 +1,460 @@ + +#include "ErrorMetric.h" +#include "FloatImage.h" +#include "Filter.h" + +#include "nvmath/Matrix.h" +#include "nvmath/Vector.inl" + +#include // FLT_MAX + +using namespace nv; + +float nv::rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) +{ + if (!sameLayout(img, ref)) { + return FLT_MAX; + } + nvDebugCheck(img->componentCount() == 4); + nvDebugCheck(ref->componentCount() == 4); + + double mse = 0; + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + float r0 = ref->pixel(i + count * 0); + float g0 = ref->pixel(i + count * 1); + float b0 = ref->pixel(i + count * 2); + float a0 = ref->pixel(i + count * 3); + float r1 = img->pixel(i + count * 0); + float g1 = img->pixel(i + count * 1); + float b1 = img->pixel(i + count * 2); + //float a1 = img->pixel(i + count * 3); + + float r = r0 - r1; + float g = g0 - g1; + float b = b0 - b1; + + float a = 1; + if (alphaWeight) a = a0 * a0; // @@ a0*a1 or a0*a0 ? + + mse += (r * r) * a; + mse += (g * g) * a; + mse += (b * b) * a; + } + + return float(sqrt(mse / count)); +} + +float nv::rmsAlphaError(const FloatImage * ref, const FloatImage * img) +{ + if (!sameLayout(img, ref)) { + return FLT_MAX; + } + nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4); + + double mse = 0; + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + float a0 = img->pixel(i + count * 3); + float a1 = ref->pixel(i + count * 3); + + float a = a0 - a1; + + mse += a * a; + } + + return float(sqrt(mse / count)); +} + + +float nv::averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) +{ + if (!sameLayout(img, ref)) { + return FLT_MAX; + } + nvDebugCheck(img->componentCount() == 4); + nvDebugCheck(ref->componentCount() == 4); + + double mae = 0; + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + float r0 = img->pixel(i + count * 0); + float g0 = img->pixel(i + count * 1); + float b0 = img->pixel(i + count * 2); + //float a0 = img->pixel(i + count * 3); + float r1 = ref->pixel(i + count * 0); + float g1 = ref->pixel(i + count * 1); + float b1 = ref->pixel(i + count * 2); + float a1 = ref->pixel(i + count * 3); + + float r = fabs(r0 - r1); + float g = fabs(g0 - g1); + float b = fabs(b0 - b1); + + float a = 1; + if (alphaWeight) a = a1; + + mae += r * a; + mae += g * a; + mae += b * a; + } + + return float(mae / count); +} + +float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img) +{ + if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) { + return FLT_MAX; + } + nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4); + + double mae = 0; + + const uint count = img->width() * img->height(); + for (uint i = 0; i < count; i++) + { + float a0 = img->pixel(i + count * 3); + float a1 = ref->pixel(i + count * 3); + + float a = a0 - a1; + + mae += fabs(a); + } + + return float(mae / count); +} + + +// Color space conversions based on: +// http://www.brucelindbloom.com/ + +// Assumes input is in *linear* sRGB color space. +static Vector3 rgbToXyz(Vector3::Arg c) +{ + Vector3 xyz; + xyz.x = 0.412453f * c.x + 0.357580f * c.y + 0.180423f * c.z; + xyz.y = 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z; + xyz.z = 0.019334f * c.x + 0.119193f * c.y + 0.950227f * c.z; + return xyz; +} + +static Vector3 xyzToRgb(Vector3::Arg c) +{ + Vector3 rgb; + rgb.x = 3.2404542f * c.x - 1.5371385f * c.y - 0.4985314f * c.z; + rgb.y = -0.9692660f * c.x + 1.8760108f * c.y + 0.0415560f * c.z; + rgb.z = 0.0556434f * c.x - 0.2040259f * c.y + 1.0572252f * c.z; + return rgb; +} + +static float toLinear(float f) +{ + return powf(f, 2.2f); +} + +static float toGamma(float f) +{ + // @@ Use sRGB space? + return powf(f, 1.0f/2.2f); +} + +static Vector3 toLinear(Vector3::Arg c) +{ + return Vector3(toLinear(c.x), toLinear(c.y), toLinear(c.z)); +} + +static Vector3 toGamma(Vector3::Arg c) +{ + return Vector3(toGamma(c.x), toGamma(c.y), toGamma(c.z)); +} + +static float f(float t) +{ + const float epsilon = powf(6.0f/29.0f, 3); + + if (t > epsilon) { + return powf(t, 1.0f/3.0f); + } + else { + return 1.0f/3.0f * powf(29.0f/6.0f, 2) * t + 4.0f / 29.0f; + } +} + +static float finv(float t) +{ + if (t > 6.0f / 29.0f) { + return 3.0f * powf(6.0f / 29.0f, 2) * (t - 4.0f / 29.0f); + } + else { + return powf(t, 3.0f); + } +} + +static Vector3 xyzToCieLab(Vector3::Arg c) +{ + // Normalized white point. + const float Xn = 0.950456f; + const float Yn = 1.0f; + const float Zn = 1.088754f; + + float Xr = c.x / Xn; + float Yr = c.y / Yn; + float Zr = c.z / Zn; + + float fx = f(Xr); + float fy = f(Yr); + float fz = f(Zr); + + float L = 116 * fx - 16; + float a = 500 * (fx - fy); + float b = 200 * (fy - fz); + + return Vector3(L, a, b); +} + +static Vector3 rgbToCieLab(Vector3::Arg c) +{ + return xyzToCieLab(rgbToXyz(toLinear(c))); +} + +// h is hue-angle in radians +static Vector3 cieLabToLCh(Vector3::Arg c) +{ + return Vector3(c.x, sqrtf(c.y*c.y + c.z*c.z), atan2f(c.y, c.z)); +} + +static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage) +{ + nvDebugCheck(rgbImage != NULL && LabImage != NULL); + nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height()); + nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3); + + const uint w = rgbImage->width(); + const uint h = LabImage->height(); + + const float * R = rgbImage->channel(0); + const float * G = rgbImage->channel(1); + const float * B = rgbImage->channel(2); + + float * L = LabImage->channel(0); + float * a = LabImage->channel(1); + float * b = LabImage->channel(2); + + const uint count = w*h; + for (uint i = 0; i < count; i++) + { + Vector3 Lab = rgbToCieLab(Vector3(R[i], G[i], B[i])); + L[i] = Lab.x; + a[i] = Lab.y; + b[i] = Lab.z; + } +} + + +// Assumes input images are in linear sRGB space. +float nv::cieLabError(const FloatImage * img0, const FloatImage * img1) +{ + if (!sameLayout(img0, img1)) return FLT_MAX; + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + const float * r0 = img0->channel(0); + const float * g0 = img0->channel(1); + const float * b0 = img0->channel(2); + + const float * r1 = img1->channel(0); + const float * g1 = img1->channel(1); + const float * b1 = img1->channel(2); + + double error = 0.0f; + + const uint count = img0->pixelCount(); + for (uint i = 0; i < count; i++) + { + Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i])); + Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i])); + + // @@ Measure Delta E. + Vector3 delta = lab0 - lab1; + + error += length(delta); + } + + return float(error / count); +} + +// Assumes input images are in linear sRGB space. +float nv::cieLab94Error(const FloatImage * img0, const FloatImage * img1) +{ + if (!sameLayout(img0, img1)) return FLT_MAX; + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + const float kL = 1; + const float kC = 1; + const float kH = 1; + const float k1 = 0.045f; + const float k2 = 0.015f; + + const float sL = 1; + + const float * r0 = img0->channel(0); + const float * g0 = img0->channel(1); + const float * b0 = img0->channel(2); + + const float * r1 = img1->channel(0); + const float * g1 = img1->channel(1); + const float * b1 = img1->channel(2); + + double error = 0.0f; + + const uint count = img0->pixelCount(); + for (uint i = 0; i < count; ++i) + { + Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i])); + Vector3 lch0 = cieLabToLCh(lab0); + Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i])); + Vector3 lch1 = cieLabToLCh(lab1); + + const float sC = 1 + k1*lch0.x; + const float sH = 1 + k2*lch0.x; + + // @@ Measure Delta E using the 1994 definition + Vector3 labDelta = lab0 - lab1; + Vector3 lchDelta = lch0 - lch1; + + double deltaLsq = powf(lchDelta.x / (kL*sL), 2); + double deltaCsq = powf(lchDelta.y / (kC*sC), 2); + + // avoid possible sqrt of negative value by computing (deltaH/(kH*sH))^2 + double deltaHsq = powf(labDelta.y, 2) + powf(labDelta.z, 2) - powf(lchDelta.y, 2); + deltaHsq /= powf(kH*sH, 2); + + error += sqrt(deltaLsq + deltaCsq + deltaHsq); + } + + return float(error / count); +} + +float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1) +{ + if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { + return FLT_MAX; + } + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + uint w = img0->width(); + uint h = img0->height(); + uint d = img0->depth(); + + FloatImage lab0, lab1; // Original images in CIE-Lab space. + lab0.allocate(3, w, h, d); + lab1.allocate(3, w, h, d); + + // Convert input images to CIE-Lab. + rgbToCieLab(img0, &lab0); + rgbToCieLab(img1, &lab1); + + // @@ Convolve each channel by the corresponding filter. + /* + GaussianFilter LFilter(5); + GaussianFilter aFilter(5); + GaussianFilter bFilter(5); + + lab0.convolve(0, LFilter); + lab0.convolve(1, aFilter); + lab0.convolve(2, bFilter); + + lab1.convolve(0, LFilter); + lab1.convolve(1, aFilter); + lab1.convolve(2, bFilter); + */ + // @@ Measure Delta E between lab0 and lab1. + + return 0.0f; +} + + +// Assumes input images are normal maps. +float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1) +{ + if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { + return FLT_MAX; + } + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + uint w = img0->width(); + uint h = img0->height(); + + const float * x0 = img0->channel(0); + const float * y0 = img0->channel(1); + const float * z0 = img0->channel(2); + + const float * x1 = img1->channel(0); + const float * y1 = img1->channel(1); + const float * z1 = img1->channel(2); + + double error = 0.0f; + + const uint count = w*h; + for (uint i = 0; i < count; i++) + { + Vector3 n0 = Vector3(x0[i], y0[i], z0[i]); + Vector3 n1 = Vector3(x1[i], y1[i], z1[i]); + + n0 = 2.0f * n0 - Vector3(1); + n1 = 2.0f * n1 - Vector3(1); + + n0 = normalizeSafe(n0, Vector3(0), 0.0f); + n1 = normalizeSafe(n1, Vector3(0), 0.0f); + + error += acos(clamp(dot(n0, n1), -1.0f, 1.0f)); + } + + return float(error / count); +} + +float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1) +{ + if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { + return FLT_MAX; + } + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + uint w = img0->width(); + uint h = img0->height(); + + const float * x0 = img0->channel(0); + const float * y0 = img0->channel(1); + const float * z0 = img0->channel(2); + + const float * x1 = img1->channel(0); + const float * y1 = img1->channel(1); + const float * z1 = img1->channel(2); + + double error = 0.0f; + + const uint count = w*h; + for (uint i = 0; i < count; i++) + { + Vector3 n0 = Vector3(x0[i], y0[i], z0[i]); + Vector3 n1 = Vector3(x1[i], y1[i], z1[i]); + + n0 = 2.0f * n0 - Vector3(1); + n1 = 2.0f * n1 - Vector3(1); + + n0 = normalizeSafe(n0, Vector3(0), 0.0f); + n1 = normalizeSafe(n1, Vector3(0), 0.0f); + + float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f)); + error += angle * angle; + } + + return float(sqrt(error / count)); +} + Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/Filter.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/Filter.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/Filter.h @@ -1,218 +1,233 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_IMAGE_FILTER_H #define NV_IMAGE_FILTER_H -#include -#include +#include "nvimage.h" +#include "nvcore/Debug.h" namespace nv { - class Vector4; + class Vector4; - /// Base filter class. - class NVIMAGE_CLASS Filter - { - public: - Filter(float width); - virtual ~Filter(); - - float width() const { return m_width; } - float sampleDelta(float x, float scale) const; - float sampleBox(float x, float scale, int samples) const; - float sampleTriangle(float x, float scale, int samples) const; - - virtual float evaluate(float x) const = 0; - - protected: - const float m_width; - }; - - // Box filter. - class NVIMAGE_CLASS BoxFilter : public Filter - { - public: - BoxFilter(); - BoxFilter(float width); - virtual float evaluate(float x) const; - }; - - // Triangle (bilinear/tent) filter. - class NVIMAGE_CLASS TriangleFilter : public Filter - { - public: - TriangleFilter(); - TriangleFilter(float width); - virtual float evaluate(float x) const; - }; - - // Quadratic (bell) filter. - class NVIMAGE_CLASS QuadraticFilter : public Filter - { - public: - QuadraticFilter(); - virtual float evaluate(float x) const; - }; - - // Cubic filter from Thatcher Ulrich. - class NVIMAGE_CLASS CubicFilter : public Filter - { - public: - CubicFilter(); - virtual float evaluate(float x) const; - }; - - // Cubic b-spline filter from Paul Heckbert. - class NVIMAGE_CLASS BSplineFilter : public Filter - { - public: - BSplineFilter(); - virtual float evaluate(float x) const; - }; - - /// Mitchell & Netravali's two-param cubic - /// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88 - class NVIMAGE_CLASS MitchellFilter : public Filter - { - public: - MitchellFilter(); - virtual float evaluate(float x) const; - - void setParameters(float b, float c); - - private: - float p0, p2, p3; - float q0, q1, q2, q3; - }; - - // Lanczos3 filter. - class NVIMAGE_CLASS LanczosFilter : public Filter - { - public: - LanczosFilter(); - virtual float evaluate(float x) const; - }; - - // Sinc filter. - class NVIMAGE_CLASS SincFilter : public Filter - { - public: - SincFilter(float w); - virtual float evaluate(float x) const; - }; - - // Kaiser filter. - class NVIMAGE_CLASS KaiserFilter : public Filter - { - public: - KaiserFilter(float w); - virtual float evaluate(float x) const; - - void setParameters(float a, float stretch); - - private: - float alpha; - float stretch; - }; - - - - /// A 1D kernel. Used to precompute filter weights. - class NVIMAGE_CLASS Kernel1 - { - NV_FORBID_COPY(Kernel1); - public: - Kernel1(const Filter & f, int iscale, int samples = 32); - ~Kernel1(); - - float valueAt(uint x) const { - nvDebugCheck(x < (uint)m_windowSize); - return m_data[x]; - } - - int windowSize() const { - return m_windowSize; - } - - float width() const { - return m_width; - } - - void debugPrint(); - - private: - int m_windowSize; - float m_width; - float * m_data; - }; - - - /// A 2D kernel. - class NVIMAGE_CLASS Kernel2 - { - public: - Kernel2(uint width); - Kernel2(const Kernel2 & k); - ~Kernel2(); - - void normalize(); - void transpose(); - - float valueAt(uint x, uint y) const { - return m_data[y * m_windowSize + x]; - } - - uint windowSize() const { - return m_windowSize; - } - - void initLaplacian(); - void initEdgeDetection(); - void initSobel(); - void initPrewitt(); - - void initBlendedSobel(const Vector4 & scale); - - private: - const uint m_windowSize; - float * m_data; - }; - - - /// A 1D polyphase kernel - class NVIMAGE_CLASS PolyphaseKernel - { - NV_FORBID_COPY(PolyphaseKernel); - public: - PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32); - ~PolyphaseKernel(); - - int windowSize() const { - return m_windowSize; - } - - uint length() const { - return m_length; - } - - float width() const { - return m_width; - } - - float valueAt(uint column, uint x) const { - nvDebugCheck(column < m_length); - nvDebugCheck(x < (uint)m_windowSize); - return m_data[column * m_windowSize + x]; - } - - void debugPrint() const; - - private: - int m_windowSize; - uint m_length; - float m_width; - float * m_data; - }; + /// Base filter class. + class NVIMAGE_CLASS Filter + { + public: + Filter(float width); + virtual ~Filter(); + + float width() const { return m_width; } + float sampleDelta(float x, float scale) const; + float sampleBox(float x, float scale, int samples) const; + float sampleTriangle(float x, float scale, int samples) const; + + virtual float evaluate(float x) const = 0; + + protected: + const float m_width; + }; + + // Box filter. + class NVIMAGE_CLASS BoxFilter : public Filter + { + public: + BoxFilter(); + BoxFilter(float width); + virtual float evaluate(float x) const; + }; + + // Triangle (bilinear/tent) filter. + class NVIMAGE_CLASS TriangleFilter : public Filter + { + public: + TriangleFilter(); + TriangleFilter(float width); + virtual float evaluate(float x) const; + }; + + // Quadratic (bell) filter. + class NVIMAGE_CLASS QuadraticFilter : public Filter + { + public: + QuadraticFilter(); + virtual float evaluate(float x) const; + }; + + // Cubic filter from Thatcher Ulrich. + class NVIMAGE_CLASS CubicFilter : public Filter + { + public: + CubicFilter(); + virtual float evaluate(float x) const; + }; + + // Cubic b-spline filter from Paul Heckbert. + class NVIMAGE_CLASS BSplineFilter : public Filter + { + public: + BSplineFilter(); + virtual float evaluate(float x) const; + }; + + /// Mitchell & Netravali's two-param cubic + /// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88 + class NVIMAGE_CLASS MitchellFilter : public Filter + { + public: + MitchellFilter(); + virtual float evaluate(float x) const; + + void setParameters(float b, float c); + + private: + float p0, p2, p3; + float q0, q1, q2, q3; + }; + + // Lanczos3 filter. + class NVIMAGE_CLASS LanczosFilter : public Filter + { + public: + LanczosFilter(); + virtual float evaluate(float x) const; + }; + + // Sinc filter. + class NVIMAGE_CLASS SincFilter : public Filter + { + public: + SincFilter(float w); + virtual float evaluate(float x) const; + }; + + // Kaiser filter. + class NVIMAGE_CLASS KaiserFilter : public Filter + { + public: + KaiserFilter(float w); + virtual float evaluate(float x) const; + + void setParameters(float a, float stretch); + + private: + float alpha; + float stretch; + }; + + // Gaussian filter. + class GaussianFilter : public Filter + { + public: + GaussianFilter(float w); + virtual float evaluate(float x) const; + + void setParameters(float variance); + + private: + float variance; + }; + + + + /// A 1D kernel. Used to precompute filter weights. + class NVIMAGE_CLASS Kernel1 + { + NV_FORBID_COPY(Kernel1); + public: + Kernel1(const Filter & f, int iscale, int samples = 32); + ~Kernel1(); + + float valueAt(uint x) const { + nvDebugCheck(x < (uint)m_windowSize); + return m_data[x]; + } + + int windowSize() const { + return m_windowSize; + } + + float width() const { + return m_width; + } + + void debugPrint(); + + private: + int m_windowSize; + float m_width; + float * m_data; + }; + + + /// A 2D kernel. + class NVIMAGE_CLASS Kernel2 + { + public: + Kernel2(uint width); + Kernel2(uint width, const float * data); + Kernel2(const Kernel2 & k); + ~Kernel2(); + + void normalize(); + void transpose(); + + float valueAt(uint x, uint y) const { + return m_data[y * m_windowSize + x]; + } + + uint windowSize() const { + return m_windowSize; + } + + void initLaplacian(); + void initEdgeDetection(); + void initSobel(); + void initPrewitt(); + + void initBlendedSobel(const Vector4 & scale); + + private: + const uint m_windowSize; + float * m_data; + }; + + + /// A 1D polyphase kernel + class NVIMAGE_CLASS PolyphaseKernel + { + NV_FORBID_COPY(PolyphaseKernel); + public: + PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32); + ~PolyphaseKernel(); + + int windowSize() const { + return m_windowSize; + } + + uint length() const { + return m_length; + } + + float width() const { + return m_width; + } + + float valueAt(uint column, uint x) const { + nvDebugCheck(column < m_length); + nvDebugCheck(x < (uint)m_windowSize); + return m_data[column * m_windowSize + x]; + } + + void debugPrint() const; + + private: + int m_windowSize; + uint m_length; + float m_width; + float * m_data; + }; } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/Filter.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/Filter.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/Filter.cpp @@ -35,62 +35,64 @@ #include "Filter.h" -#include // Vector4 -#include // swap +#include "nvmath/Vector.h" // Vector4 +#include "nvcore/Utils.h" // swap + +#include // memset using namespace nv; namespace { - // Sinc function. - inline static float sincf(const float x) - { - if (fabs(x) < NV_EPSILON) { - //return 1.0; - return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f); - } - else { - return sin(x) / x; - } - } - - // Bessel function of the first kind from Jon Blow's article. - // http://mathworld.wolfram.com/BesselFunctionoftheFirstKind.html - // http://en.wikipedia.org/wiki/Bessel_function - inline static float bessel0(float x) - { - const float EPSILON_RATIO = 1e-6f; - float xh, sum, pow, ds; - int k; - - xh = 0.5f * x; - sum = 1.0f; - pow = 1.0f; - k = 0; - ds = 1.0; - while (ds > sum * EPSILON_RATIO) { - ++k; - pow = pow * (xh / k); - ds = pow * pow; - sum = sum + ds; - } - - return sum; - } - - /*// Alternative bessel function from Paul Heckbert. - static float _bessel0(float x) - { - const float EPSILON_RATIO = 1E-6; - float sum = 1.0f; - float y = x * x / 4.0f; - float t = y; - for(int i = 2; t > EPSILON_RATIO; i++) { - sum += t; - t *= y / float(i * i); - } - return sum; - }*/ + // Sinc function. + inline static float sincf(const float x) + { + if (fabs(x) < NV_EPSILON) { + //return 1.0; + return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f); + } + else { + return sin(x) / x; + } + } + + // Bessel function of the first kind from Jon Blow's article. + // http://mathworld.wolfram.com/BesselFunctionoftheFirstKind.html + // http://en.wikipedia.org/wiki/Bessel_function + inline static float bessel0(float x) + { + const float EPSILON_RATIO = 1e-6f; + float xh, sum, pow, ds; + int k; + + xh = 0.5f * x; + sum = 1.0f; + pow = 1.0f; + k = 0; + ds = 1.0; + while (ds > sum * EPSILON_RATIO) { + ++k; + pow = pow * (xh / k); + ds = pow * pow; + sum = sum + ds; + } + + return sum; + } + + /*// Alternative bessel function from Paul Heckbert. + static float _bessel0(float x) + { + const float EPSILON_RATIO = 1E-6; + float sum = 1.0f; + float y = x * x / 4.0f; + float t = y; + for(int i = 2; t > EPSILON_RATIO; i++) { + sum += t; + t *= y / float(i * i); + } + return sum; + }*/ } // namespace @@ -105,42 +107,45 @@ float Filter::sampleDelta(float x, float scale) const { - return evaluate((x + 0.5f)* scale); + return evaluate((x + 0.5f)* scale); } float Filter::sampleBox(float x, float scale, int samples) const { - float sum = 0; - float isamples = 1.0f / float(samples); + double sum = 0; + float isamples = 1.0f / float(samples); + + for(int s = 0; s < samples; s++) + { + float p = (x + (float(s) + 0.5f) * isamples) * scale; + float value = evaluate(p); + + //printf("%f: %.8f (%X)\n", p, value, *(uint32 *)&value); - for(int s = 0; s < samples; s++) - { - float p = (x + (float(s) + 0.5f) * isamples) * scale; - float value = evaluate(p); - sum += value; - } - - return sum * isamples; + sum += value; + } + + return float(sum * isamples); } float Filter::sampleTriangle(float x, float scale, int samples) const { - float sum = 0; - float isamples = 1.0f / float(samples); + double sum = 0; + float isamples = 1.0f / float(samples); + + for(int s = 0; s < samples; s++) + { + float offset = (2 * float(s) + 1.0f) * isamples; + float p = (x + offset - 0.5f) * scale; + float value = evaluate(p); + + float weight = offset; + if (weight > 1.0f) weight = 2.0f - weight; - for(int s = 0; s < samples; s++) - { - float offset = (2 * float(s) + 1.0f) * isamples; - float p = (x + offset - 0.5f) * scale; - float value = evaluate(p); - - float weight = offset; - if (weight > 1.0f) weight = 2.0f - weight; - - sum += value * weight; - } - - return 2 * sum * isamples; + sum += value * weight; + } + + return float(2 * sum * isamples); } @@ -152,8 +157,8 @@ float BoxFilter::evaluate(float x) const { - if (fabs(x) <= m_width) return 1.0f; - else return 0.0f; + if (fabs(x) <= m_width) return 1.0f; + else return 0.0f; } @@ -162,7 +167,7 @@ float TriangleFilter::evaluate(float x) const { - x = fabs(x); + x = fabs(x); if( x < m_width ) return m_width - x; return 0.0f; } @@ -172,11 +177,11 @@ float QuadraticFilter::evaluate(float x) const { - x = fabs(x); + x = fabs(x); if( x < 0.5f ) return 0.75f - x * x; if( x < 1.5f ) { - float t = x - 1.5f; - return 0.5f * t * t; + float t = x - 1.5f; + return 0.5f * t * t; } return 0.0f; } @@ -186,10 +191,10 @@ float CubicFilter::evaluate(float x) const { - // f(t) = 2|t|^3 - 3|t|^2 + 1, -1 <= t <= 1 - x = fabs(x); - if( x < 1.0f ) return((2.0f * x - 3.0f) * x * x + 1.0f); - return 0.0f; + // f(t) = 2|t|^3 - 3|t|^2 + 1, -1 <= t <= 1 + x = fabs(x); + if( x < 1.0f ) return((2.0f * x - 3.0f) * x * x + 1.0f); + return 0.0f; } @@ -197,11 +202,11 @@ float BSplineFilter::evaluate(float x) const { - x = fabs(x); + x = fabs(x); if( x < 1.0f ) return (4.0f + x * x * (-6.0f + x * 3.0f)) / 6.0f; if( x < 2.0f ) { - float t = 2.0f - x; - return t * t * t / 6.0f; + float t = 2.0f - x; + return t * t * t / 6.0f; } return 0.0f; } @@ -211,21 +216,21 @@ float MitchellFilter::evaluate(float x) const { - x = fabs(x); - if( x < 1.0f ) return p0 + x * x * (p2 + x * p3); - if( x < 2.0f ) return q0 + x * (q1 + x * (q2 + x * q3)); - return 0.0f; + x = fabs(x); + if( x < 1.0f ) return p0 + x * x * (p2 + x * p3); + if( x < 2.0f ) return q0 + x * (q1 + x * (q2 + x * q3)); + return 0.0f; } void MitchellFilter::setParameters(float b, float c) { - p0 = (6.0f - 2.0f * b) / 6.0f; - p2 = (-18.0f + 12.0f * b + 6.0f * c) / 6.0f; - p3 = (12.0f - 9.0f * b - 6.0f * c) / 6.0f; - q0 = (8.0f * b + 24.0f * c) / 6.0f; - q1 = (-12.0f * b - 48.0f * c) / 6.0f; - q2 = (6.0f * b + 30.0f * c) / 6.0f; - q3 = (-b - 6.0f * c) / 6.0f; + p0 = (6.0f - 2.0f * b) / 6.0f; + p2 = (-18.0f + 12.0f * b + 6.0f * c) / 6.0f; + p3 = (12.0f - 9.0f * b - 6.0f * c) / 6.0f; + q0 = (8.0f * b + 24.0f * c) / 6.0f; + q1 = (-12.0f * b - 48.0f * c) / 6.0f; + q2 = (6.0f * b + 30.0f * c) / 6.0f; + q3 = (-b - 6.0f * c) / 6.0f; } @@ -233,9 +238,9 @@ float LanczosFilter::evaluate(float x) const { - x = fabs(x); - if( x < 3.0f ) return sincf(PI * x) * sincf(PI * x / 3.0f); - return 0.0f; + x = fabs(x); + if( x < 3.0f ) return sincf(PI * x) * sincf(PI * x / 3.0f); + return 0.0f; } @@ -243,172 +248,187 @@ float SincFilter::evaluate(float x) const { - return sincf(PI * x); + return sincf(PI * x); } -KaiserFilter::KaiserFilter(float w) : Filter(w) { setParameters(4.0f, 1.0f); } +KaiserFilter::KaiserFilter(float w) : Filter(w) { setParameters(/*alpha=*/4.0f, /*stretch=*/1.0f); } float KaiserFilter::evaluate(float x) const { - const float sinc_value = sincf(PI * x * stretch); - const float t = x / m_width; - if ((1 - t * t) >= 0) return sinc_value * bessel0(alpha * sqrtf(1 - t * t)) / bessel0(alpha); - else return 0; + const float sinc_value = sincf(PI * x * stretch); + const float t = x / m_width; + if ((1 - t * t) >= 0) return sinc_value * bessel0(alpha * sqrtf(1 - t * t)) / bessel0(alpha); + else return 0; } void KaiserFilter::setParameters(float alpha, float stretch) { - this->alpha = alpha; - this->stretch = stretch; + this->alpha = alpha; + this->stretch = stretch; +} + +GaussianFilter::GaussianFilter(float w) : Filter(w) { setParameters(1); } + +float GaussianFilter::evaluate(float x) const +{ + // variance = sigma^2 + return (1.0f / sqrtf(2 * PI * variance)) * expf(-x*x / (2 * variance)); +} + +void GaussianFilter::setParameters(float variance) +{ + this->variance = variance; } -/// Ctor. Kernel1::Kernel1(const Filter & f, int iscale, int samples/*= 32*/) { - nvDebugCheck(iscale > 1); - nvDebugCheck(samples > 0); - - const float scale = 1.0f / iscale; - - m_width = f.width() * iscale; - m_windowSize = (int)ceilf(2 * m_width); - m_data = new float[m_windowSize]; - - const float offset = float(m_windowSize) / 2; - - float total = 0.0f; - for (int i = 0; i < m_windowSize; i++) - { - const float sample = f.sampleBox(i - offset, scale, samples); - m_data[i] = sample; - total += sample; - } - - const float inv = 1.0f / total; - for (int i = 0; i < m_windowSize; i++) - { - m_data[i] *= inv; - } + nvDebugCheck(iscale > 1); + nvDebugCheck(samples > 0); + + const float scale = 1.0f / iscale; + + m_width = f.width() * iscale; + m_windowSize = (int)ceilf(2 * m_width); + m_data = new float[m_windowSize]; + + const float offset = float(m_windowSize) / 2; + + float total = 0.0f; + for (int i = 0; i < m_windowSize; i++) + { + const float sample = f.sampleBox(i - offset, scale, samples); + m_data[i] = sample; + total += sample; + } + + const float inv = 1.0f / total; + for (int i = 0; i < m_windowSize; i++) + { + m_data[i] *= inv; + } } -/// Dtor. Kernel1::~Kernel1() { - delete m_data; + delete [] m_data; } -/// Print the kernel for debugging purposes. +// Print the kernel for debugging purposes. void Kernel1::debugPrint() { - for (int i = 0; i < m_windowSize; i++) { - nvDebug("%d: %f\n", i, m_data[i]); - } + for (int i = 0; i < m_windowSize; i++) { + nvDebug("%d: %f\n", i, m_data[i]); + } } -/// Ctor. Kernel2::Kernel2(uint ws) : m_windowSize(ws) { - m_data = new float[m_windowSize * m_windowSize]; + m_data = new float[m_windowSize * m_windowSize]; +} + +Kernel2::Kernel2(uint ws, const float * data) : m_windowSize(ws) +{ + m_data = new float[m_windowSize * m_windowSize]; + + memcpy(m_data, data, sizeof(float) * m_windowSize * m_windowSize); } -/// Copy ctor. Kernel2::Kernel2(const Kernel2 & k) : m_windowSize(k.m_windowSize) { - m_data = new float[m_windowSize * m_windowSize]; - for (uint i = 0; i < m_windowSize * m_windowSize; i++) { - m_data[i] = k.m_data[i]; - } + m_data = new float[m_windowSize * m_windowSize]; + for (uint i = 0; i < m_windowSize * m_windowSize; i++) { + m_data[i] = k.m_data[i]; + } } -/// Dtor. Kernel2::~Kernel2() { - delete m_data; + delete [] m_data; } -/// Normalize the filter. +// Normalize the filter. void Kernel2::normalize() { - float total = 0.0f; - for(uint i = 0; i < m_windowSize*m_windowSize; i++) { - total += fabs(m_data[i]); - } - - float inv = 1.0f / total; - for(uint i = 0; i < m_windowSize*m_windowSize; i++) { - m_data[i] *= inv; - } + float total = 0.0f; + for(uint i = 0; i < m_windowSize*m_windowSize; i++) { + total += fabs(m_data[i]); + } + + float inv = 1.0f / total; + for(uint i = 0; i < m_windowSize*m_windowSize; i++) { + m_data[i] *= inv; + } } -/// Transpose the kernel. +// Transpose the kernel. void Kernel2::transpose() { - for(uint i = 0; i < m_windowSize; i++) { - for(uint j = i+1; j < m_windowSize; j++) { - swap(m_data[i*m_windowSize + j], m_data[j*m_windowSize + i]); - } - } + for(uint i = 0; i < m_windowSize; i++) { + for(uint j = i+1; j < m_windowSize; j++) { + swap(m_data[i*m_windowSize + j], m_data[j*m_windowSize + i]); + } + } } -/// Init laplacian filter, usually used for sharpening. +// Init laplacian filter, usually used for sharpening. void Kernel2::initLaplacian() { - nvDebugCheck(m_windowSize == 3); -// m_data[0] = -1; m_data[1] = -1; m_data[2] = -1; -// m_data[3] = -1; m_data[4] = +8; m_data[5] = -1; -// m_data[6] = -1; m_data[7] = -1; m_data[8] = -1; - - m_data[0] = +0; m_data[1] = -1; m_data[2] = +0; - m_data[3] = -1; m_data[4] = +4; m_data[5] = -1; - m_data[6] = +0; m_data[7] = -1; m_data[8] = +0; - -// m_data[0] = +1; m_data[1] = -2; m_data[2] = +1; -// m_data[3] = -2; m_data[4] = +4; m_data[5] = -2; -// m_data[6] = +1; m_data[7] = -2; m_data[8] = +1; + nvDebugCheck(m_windowSize == 3); + // m_data[0] = -1; m_data[1] = -1; m_data[2] = -1; + // m_data[3] = -1; m_data[4] = +8; m_data[5] = -1; + // m_data[6] = -1; m_data[7] = -1; m_data[8] = -1; + + m_data[0] = +0; m_data[1] = -1; m_data[2] = +0; + m_data[3] = -1; m_data[4] = +4; m_data[5] = -1; + m_data[6] = +0; m_data[7] = -1; m_data[8] = +0; + + // m_data[0] = +1; m_data[1] = -2; m_data[2] = +1; + // m_data[3] = -2; m_data[4] = +4; m_data[5] = -2; + // m_data[6] = +1; m_data[7] = -2; m_data[8] = +1; } -/// Init simple edge detection filter. +// Init simple edge detection filter. void Kernel2::initEdgeDetection() { - nvCheck(m_windowSize == 3); - m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; - m_data[3] =-1; m_data[4] = 0; m_data[5] = 1; - m_data[6] = 0; m_data[7] = 0; m_data[8] = 0; + nvCheck(m_windowSize == 3); + m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; + m_data[3] =-1; m_data[4] = 0; m_data[5] = 1; + m_data[6] = 0; m_data[7] = 0; m_data[8] = 0; } -/// Init sobel filter. +// Init sobel filter. void Kernel2::initSobel() { - if (m_windowSize == 3) - { - m_data[0] = -1; m_data[1] = 0; m_data[2] = 1; - m_data[3] = -2; m_data[4] = 0; m_data[5] = 2; - m_data[6] = -1; m_data[7] = 0; m_data[8] = 1; - } - else if (m_windowSize == 5) - { - float elements[] = { + if (m_windowSize == 3) + { + m_data[0] = -1; m_data[1] = 0; m_data[2] = 1; + m_data[3] = -2; m_data[4] = 0; m_data[5] = 2; + m_data[6] = -1; m_data[7] = 0; m_data[8] = 1; + } + else if (m_windowSize == 5) + { + float elements[] = { -1, -2, 0, 2, 1, -2, -3, 0, 3, 2, -3, -4, 0, 4, 3, -2, -3, 0, 3, 2, -1, -2, 0, 2, 1 - }; + }; - for (int i = 0; i < 5*5; i++) { - m_data[i] = elements[i]; - } - } - else if (m_windowSize == 7) - { - float elements[] = { + for (int i = 0; i < 5*5; i++) { + m_data[i] = elements[i]; + } + } + else if (m_windowSize == 7) + { + float elements[] = { -1, -2, -3, 0, 3, 2, 1, -2, -3, -4, 0, 4, 3, 2, -3, -4, -5, 0, 5, 4, 3, @@ -416,15 +436,15 @@ -3, -4, -5, 0, 5, 4, 3, -2, -3, -4, 0, 4, 3, 2, -1, -2, -3, 0, 3, 2, 1 - }; + }; - for (int i = 0; i < 7*7; i++) { - m_data[i] = elements[i]; - } - } - else if (m_windowSize == 9) - { - float elements[] = { + for (int i = 0; i < 7*7; i++) { + m_data[i] = elements[i]; + } + } + else if (m_windowSize == 9) + { + float elements[] = { -1, -2, -3, -4, 0, 4, 3, 2, 1, -2, -3, -4, -5, 0, 5, 4, 3, 2, -3, -4, -5, -6, 0, 6, 5, 4, 3, @@ -434,47 +454,47 @@ -3, -4, -5, -6, 0, 6, 5, 4, 3, -2, -3, -4, -5, 0, 5, 4, 3, 2, -1, -2, -3, -4, 0, 4, 3, 2, 1 - }; - - for (int i = 0; i < 9*9; i++) { - m_data[i] = elements[i]; - } - } + }; + + for (int i = 0; i < 9*9; i++) { + m_data[i] = elements[i]; + } + } } -/// Init prewitt filter. +// Init prewitt filter. void Kernel2::initPrewitt() { - if (m_windowSize == 3) - { - m_data[0] = -1; m_data[1] = 0; m_data[2] = -1; - m_data[3] = -1; m_data[4] = 0; m_data[5] = -1; - m_data[6] = -1; m_data[7] = 0; m_data[8] = -1; - } - else if (m_windowSize == 5) - { - // @@ Is this correct? - float elements[] = { + if (m_windowSize == 3) + { + m_data[0] = -1; m_data[1] = 0; m_data[2] = -1; + m_data[3] = -1; m_data[4] = 0; m_data[5] = -1; + m_data[6] = -1; m_data[7] = 0; m_data[8] = -1; + } + else if (m_windowSize == 5) + { + // @@ Is this correct? + float elements[] = { -2, -1, 0, 1, 2, -2, -1, 0, 1, 2, -2, -1, 0, 1, 2, -2, -1, 0, 1, 2, -2, -1, 0, 1, 2 - }; + }; - for (int i = 0; i < 5*5; i++) { - m_data[i] = elements[i]; - } - } + for (int i = 0; i < 5*5; i++) { + m_data[i] = elements[i]; + } + } } -/// Init blended sobel filter. +// Init blended sobel filter. void Kernel2::initBlendedSobel(const Vector4 & scale) { - nvCheck(m_windowSize == 9); + nvCheck(m_windowSize == 9); - { - const float elements[] = { + { + const float elements[] = { -1, -2, -3, -4, 0, 4, 3, 2, 1, -2, -3, -4, -5, 0, 5, 4, 3, 2, -3, -4, -5, -6, 0, 6, 5, 4, 3, @@ -484,14 +504,14 @@ -3, -4, -5, -6, 0, 6, 5, 4, 3, -2, -3, -4, -5, 0, 5, 4, 3, 2, -1, -2, -3, -4, 0, 4, 3, 2, 1 - }; - - for (int i = 0; i < 9*9; i++) { - m_data[i] = elements[i] * scale.w(); - } - } - { - const float elements[] = { + }; + + for (int i = 0; i < 9*9; i++) { + m_data[i] = elements[i] * scale.w; + } + } + { + const float elements[] = { -1, -2, -3, 0, 3, 2, 1, -2, -3, -4, 0, 4, 3, 2, -3, -4, -5, 0, 5, 4, 3, @@ -499,107 +519,109 @@ -3, -4, -5, 0, 5, 4, 3, -2, -3, -4, 0, 4, 3, 2, -1, -2, -3, 0, 3, 2, 1, - }; + }; - for (int i = 0; i < 7; i++) { - for (int e = 0; e < 7; e++) { - m_data[(i + 1) * 9 + e + 1] += elements[i * 7 + e] * scale.z(); - } - } - } - { - const float elements[] = { + for (int i = 0; i < 7; i++) { + for (int e = 0; e < 7; e++) { + m_data[(i + 1) * 9 + e + 1] += elements[i * 7 + e] * scale.z; + } + } + } + { + const float elements[] = { -1, -2, 0, 2, 1, -2, -3, 0, 3, 2, -3, -4, 0, 4, 3, -2, -3, 0, 3, 2, -1, -2, 0, 2, 1 - }; + }; - for (int i = 0; i < 5; i++) { - for (int e = 0; e < 5; e++) { - m_data[(i + 2) * 9 + e + 2] += elements[i * 5 + e] * scale.y(); - } - } - } - { - const float elements[] = { + for (int i = 0; i < 5; i++) { + for (int e = 0; e < 5; e++) { + m_data[(i + 2) * 9 + e + 2] += elements[i * 5 + e] * scale.y; + } + } + } + { + const float elements[] = { -1, 0, 1, -2, 0, 2, -1, 0, 1, - }; + }; - for (int i = 0; i < 3; i++) { - for (int e = 0; e < 3; e++) { - m_data[(i + 3) * 9 + e + 3] += elements[i * 3 + e] * scale.x(); - } - } - } + for (int i = 0; i < 3; i++) { + for (int e = 0; e < 3; e++) { + m_data[(i + 3) * 9 + e + 3] += elements[i * 3 + e] * scale.x; + } + } + } } PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples/*= 32*/) { - nvDebugCheck(samples > 0); + nvDebugCheck(samples > 0); - float scale = float(dstLength) / float(srcLength); - const float iscale = 1.0f / scale; + float scale = float(dstLength) / float(srcLength); + const float iscale = 1.0f / scale; - if (scale > 1) { - // Upsampling. - samples = 1; - scale = 1; - } - - m_length = dstLength; - m_width = f.width() * iscale; - m_windowSize = (int)ceilf(m_width * 2) + 1; - - m_data = new float[m_windowSize * m_length]; - memset(m_data, 0, sizeof(float) * m_windowSize * m_length); - - for (uint i = 0; i < m_length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - m_width); - const int right = (int)ceilf(center + m_width); - nvDebugCheck(right - left <= m_windowSize); - - float total = 0.0f; - for (int j = 0; j < m_windowSize; j++) - { - const float sample = f.sampleBox(left + j - center, scale, samples); - - m_data[i * m_windowSize + j] = sample; - total += sample; - } - - // normalize weights. - for (int j = 0; j < m_windowSize; j++) - { - m_data[i * m_windowSize + j] /= total; - } - } + if (scale > 1) { + // Upsampling. + samples = 1; + scale = 1; + } + + m_length = dstLength; + m_width = f.width() * iscale; + m_windowSize = (int)ceilf(m_width * 2) + 1; + + m_data = new float[m_windowSize * m_length]; + memset(m_data, 0, sizeof(float) * m_windowSize * m_length); + + for (uint i = 0; i < m_length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - m_width); + const int right = (int)ceilf(center + m_width); + nvDebugCheck(right - left <= m_windowSize); + + float total = 0.0f; + for (int j = 0; j < m_windowSize; j++) + { + const float sample = f.sampleBox(left + j - center, scale, samples); + + //printf("%f %X\n", sample, *(uint32 *)&sample); + + m_data[i * m_windowSize + j] = sample; + total += sample; + } + + // normalize weights. + for (int j = 0; j < m_windowSize; j++) + { + m_data[i * m_windowSize + j] /= total; + } + } } PolyphaseKernel::~PolyphaseKernel() { - delete [] m_data; + delete [] m_data; } -/// Print the kernel for debugging purposes. +// Print the kernel for debugging purposes. void PolyphaseKernel::debugPrint() const { - for (uint i = 0; i < m_length; i++) - { - nvDebug("%d: ", i); - for (int j = 0; j < m_windowSize; j++) - { - nvDebug(" %6.4f", m_data[i * m_windowSize + j]); - } - nvDebug("\n"); - } + for (uint i = 0; i < m_length; i++) + { + nvDebug("%d: ", i); + for (int j = 0; j < m_windowSize; j++) + { + nvDebug(" %6.4f", m_data[i * m_windowSize + j]); + } + nvDebug("\n"); + } } Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/FloatImage.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/FloatImage.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/FloatImage.h @@ -3,265 +3,392 @@ #ifndef NV_IMAGE_FLOATIMAGE_H #define NV_IMAGE_FLOATIMAGE_H -#include +#include "nvimage.h" -#include +#include "nvmath/nvmath.h" // lerp -#include -#include // clamp +#include "nvcore/Debug.h" +#include "nvcore/Utils.h" // clamp #include // abs namespace nv { -class Vector4; -class Matrix; -class Image; -class Filter; -class Kernel1; -class Kernel2; -class PolyphaseKernel; + class Vector4; + class Matrix; + class Image; + class Filter; + class Kernel1; + class Kernel2; + class PolyphaseKernel; + + /// Multicomponent floating point image class. + class FloatImage + { + public: + + enum WrapMode { + WrapMode_Clamp, + WrapMode_Repeat, + WrapMode_Mirror + }; + + NVIMAGE_API FloatImage(); + NVIMAGE_API FloatImage(const Image * img); + NVIMAGE_API virtual ~FloatImage(); + + /** @name Conversion. */ + //@{ + NVIMAGE_API void initFrom(const Image * img); + NVIMAGE_API Image * createImage(uint base_component = 0, uint num = 4) const; + NVIMAGE_API Image * createImageGammaCorrect(float gamma = 2.2f) const; + //@} + + /** @name Allocation. */ + //@{ + NVIMAGE_API void allocate(uint c, uint w, uint h, uint d = 1); + NVIMAGE_API void free(); // Does not clear members. + NVIMAGE_API void resizeChannelCount(uint c); + //@} + + /** @name Manipulation. */ + //@{ + NVIMAGE_API void clear(float f = 0.0f); + NVIMAGE_API void clear(uint component, float f = 0.0f); + NVIMAGE_API void copyChannel(uint src, uint dst); + + NVIMAGE_API void normalize(uint base_component); + + NVIMAGE_API void packNormals(uint base_component); + NVIMAGE_API void expandNormals(uint base_component); + NVIMAGE_API void scaleBias(uint base_component, uint num, float scale, float add); + + NVIMAGE_API void clamp(uint base_component, uint num, float low, float high); + + NVIMAGE_API void toLinear(uint base_component, uint num, float gamma = 2.2f); + NVIMAGE_API void toGamma(uint base_component, uint num, float gamma = 2.2f); + NVIMAGE_API void exponentiate(uint base_component, uint num, float power); + + NVIMAGE_API void transform(uint base_component, const Matrix & m, const Vector4 & offset); + NVIMAGE_API void swizzle(uint base_component, uint r, uint g, uint b, uint a); + + NVIMAGE_API FloatImage * fastDownSample() const; + NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const; + NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const; + NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const; + NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const; + NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const; + NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const; + + NVIMAGE_API void convolve(const Kernel2 & k, uint c, WrapMode wm); + + //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const; + //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const; + //@} + + NVIMAGE_API float applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const; + NVIMAGE_API float applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; + NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; + NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; + NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const; + + + NVIMAGE_API void flipX(); + NVIMAGE_API void flipY(); + NVIMAGE_API void flipZ(); + + NVIMAGE_API float alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale = 1.0f) const; + NVIMAGE_API void scaleAlphaToCoverage(float coverage, float alphaRef, int alphaChannel); + + + uint width() const { return m_width; } + uint height() const { return m_height; } + uint depth() const { return m_depth; } + uint componentCount() const { return m_componentCount; } + uint floatCount() const { return m_floatCount; } + uint pixelCount() const { return m_pixelCount; } + + + /** @name Pixel access. */ + //@{ + const float * channel(uint c) const; + float * channel(uint c); + + const float * plane(uint c, uint z) const; + float * plane(uint c, uint z); + + const float * scanline(uint c, uint y, uint z) const; + float * scanline(uint c, uint y, uint z); + + //float pixel(uint c, uint x, uint y) const; + //float & pixel(uint c, uint x, uint y); + + float pixel(uint c, uint x, uint y, uint z) const; + float & pixel(uint c, uint x, uint y, uint z); + + float pixel(uint c, uint idx) const; + float & pixel(uint c, uint idx); + + float pixel(uint idx) const; + float & pixel(uint idx); + + float sampleNearest(uint c, float x, float y, WrapMode wm) const; + float sampleLinear(uint c, float x, float y, WrapMode wm) const; + + float sampleNearest(uint c, float x, float y, float z, WrapMode wm) const; + float sampleLinear(uint c, float x, float y, float z, WrapMode wm) const; + + float sampleNearestClamp(uint c, float x, float y) const; + float sampleNearestRepeat(uint c, float x, float y) const; + float sampleNearestMirror(uint c, float x, float y) const; + + float sampleNearestClamp(uint c, float x, float y, float z) const; + float sampleNearestRepeat(uint c, float x, float y, float z) const; + float sampleNearestMirror(uint c, float x, float y, float z) const; + + NVIMAGE_API float sampleLinearClamp(uint c, float x, float y) const; + float sampleLinearRepeat(uint c, float x, float y) const; + float sampleLinearMirror(uint c, float x, float y) const; + + float sampleLinearClamp(uint c, float x, float y, float z) const; + float sampleLinearRepeat(uint c, float x, float y, float z) const; + float sampleLinearMirror(uint c, float x, float y, float z) const; + //@} + + + NVIMAGE_API FloatImage* clone() const; + + public: + + uint index(uint x, uint y, uint z) const; + uint indexClamp(int x, int y, int z) const; + uint indexRepeat(int x, int y, int z) const; + uint indexMirror(int x, int y, int z) const; + uint index(int x, int y, int z, WrapMode wm) const; + + float bilerp(uint c, int ix0, int iy0, int ix1, int iy1, float fx, float fy) const; + float trilerp(uint c, int ix0, int iy0, int iz0, int ix1, int iy1, int iz1, float fx, float fy, float fz) const; + + public: + + uint16 m_componentCount; + uint16 m_width; + uint16 m_height; + uint16 m_depth; + uint32 m_pixelCount; + uint32 m_floatCount; + float * m_mem; + + }; + + + /// Get const channel pointer. + inline const float * FloatImage::channel(uint c) const + { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(c < m_componentCount); + return m_mem + c * m_pixelCount; + } + + /// Get channel pointer. + inline float * FloatImage::channel(uint c) { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(c < m_componentCount); + return m_mem + c * m_pixelCount; + } + + inline const float * FloatImage::plane(uint c, uint z) const { + nvDebugCheck(z < m_depth); + return channel(c) + z * m_width * m_height; + } + + inline float * FloatImage::plane(uint c, uint z) { + nvDebugCheck(z < m_depth); + return channel(c) + z * m_width * m_height; + } + + /// Get const scanline pointer. + inline const float * FloatImage::scanline(uint c, uint y, uint z) const + { + nvDebugCheck(y < m_height); + return plane(c, z) + y * m_width; + } + + /// Get scanline pointer. + inline float * FloatImage::scanline(uint c, uint y, uint z) + { + nvDebugCheck(y < m_height); + return plane(c, z) + y * m_width; + } + + /// Get pixel component. + inline float FloatImage::pixel(uint c, uint x, uint y, uint z) const + { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(c < m_componentCount); + nvDebugCheck(x < m_width); + nvDebugCheck(y < m_height); + nvDebugCheck(z < m_depth); + return m_mem[c * m_pixelCount + index(x, y, z)]; + } + + /// Get pixel component. + inline float & FloatImage::pixel(uint c, uint x, uint y, uint z) + { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(c < m_componentCount); + nvDebugCheck(x < m_width); + nvDebugCheck(y < m_height); + nvDebugCheck(z < m_depth); + return m_mem[c * m_pixelCount + index(x, y, z)]; + } + + /// Get pixel component. + inline float FloatImage::pixel(uint c, uint idx) const + { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(c < m_componentCount); + nvDebugCheck(idx < m_pixelCount); + return m_mem[c * m_pixelCount + idx]; + } + + /// Get pixel component. + inline float & FloatImage::pixel(uint c, uint idx) + { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(c < m_componentCount); + nvDebugCheck(idx < m_pixelCount); + return m_mem[c * m_pixelCount + idx]; + } + + /// Get pixel component. + inline float FloatImage::pixel(uint idx) const + { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(idx < m_floatCount); + return m_mem[idx]; + } + + /// Get pixel component. + inline float & FloatImage::pixel(uint idx) + { + nvDebugCheck(m_mem != NULL); + nvDebugCheck(idx < m_floatCount); + return m_mem[idx]; + } + + inline uint FloatImage::index(uint x, uint y, uint z) const + { + nvDebugCheck(x < m_width); + nvDebugCheck(y < m_height); + nvDebugCheck(z < m_depth); + uint idx = (z * m_height + y) * m_width + x; + nvDebugCheck(idx < m_pixelCount); + return idx; + } + + + inline int wrapClamp(int x, int w) + { + return nv::clamp(x, 0, w - 1); + } + inline int wrapRepeat(int x, int w) + { + if (x >= 0) return x % w; + else return (x + 1) % w + w - 1; + } + inline int wrapMirror(int x, int w) + { + if (w == 1) x = 0; + + x = abs(x); + while (x >= w) { + x = abs(w + w - x - 2); + } + + return x; + } + + + + inline uint FloatImage::indexClamp(int x, int y, int z) const + { + x = wrapClamp(x, m_width); + y = wrapClamp(y, m_height); + z = wrapClamp(z, m_depth); + return index(x, y, z); + } + + + inline uint FloatImage::indexRepeat(int x, int y, int z) const + { + x = wrapRepeat(x, m_width); + y = wrapRepeat(y, m_height); + z = wrapRepeat(z, m_depth); + return index(x, y, z); + } + + inline uint FloatImage::indexMirror(int x, int y, int z) const + { + x = wrapMirror(x, m_width); + y = wrapMirror(y, m_height); + z = wrapMirror(z, m_depth); + return index(x, y, z); + } + + inline uint FloatImage::index(int x, int y, int z, WrapMode wm) const + { + if (wm == WrapMode_Clamp) return indexClamp(x, y, z); + if (wm == WrapMode_Repeat) return indexRepeat(x, y, z); + /*if (wm == WrapMode_Mirror)*/ return indexMirror(x, y, z); + } + + inline float FloatImage::bilerp(uint c, int ix0, int iy0, int ix1, int iy1, float fx, float fy) const { + int iz = 0; + float f1 = pixel(c, ix0, iy0, iz); + float f2 = pixel(c, ix1, iy0, iz); + float f3 = pixel(c, ix0, iy1, iz); + float f4 = pixel(c, ix1, iy1, iz); + + float i1 = lerp(f1, f2, fx); + float i2 = lerp(f3, f4, fx); + + return lerp(i1, i2, fy); + } + + inline float FloatImage::trilerp(uint c, int ix0, int iy0, int iz0, int ix1, int iy1, int iz1, float fx, float fy, float fz) const { + float f000 = pixel(c, ix0, iy0, iz0); + float f100 = pixel(c, ix1, iy0, iz0); + float f010 = pixel(c, ix0, iy1, iz0); + float f110 = pixel(c, ix1, iy1, iz0); + float f001 = pixel(c, ix0, iy0, iz1); + float f101 = pixel(c, ix1, iy0, iz1); + float f011 = pixel(c, ix0, iy1, iz1); + float f111 = pixel(c, ix1, iy1, iz1); + + float i1 = lerp(f000, f001, fz); + float i2 = lerp(f010, f011, fz); + float j1 = lerp(f100, f101, fz); + float j2 = lerp(f110, f111, fz); + + float w1 = lerp(i1, i2, fy); + float w2 = lerp(j1, j2, fy); + + return lerp(w1, w2, fx); + } + + // Does not compare channel count. + inline bool sameLayout(const FloatImage * img0, const FloatImage * img1) { + if (img0 == NULL || img1 == NULL) return false; + return img0->width() == img1->width() && img0->height() == img1->height() && img0->depth() == img1->depth(); + } -/// Multicomponent floating point image class. -class FloatImage -{ -public: - - enum WrapMode { - WrapMode_Clamp, - WrapMode_Repeat, - WrapMode_Mirror - }; - - NVIMAGE_API FloatImage(); - NVIMAGE_API FloatImage(const Image * img); - NVIMAGE_API virtual ~FloatImage(); - - /** @name Conversion. */ - //@{ - NVIMAGE_API void initFrom(const Image * img); - NVIMAGE_API Image * createImage(uint base_component = 0, uint num = 4) const; - NVIMAGE_API Image * createImageGammaCorrect(float gamma = 2.2f) const; - //@} - - /** @name Allocation. */ - //@{ - NVIMAGE_API void allocate(uint c, uint w, uint h); - NVIMAGE_API void free(); // Does not clear members. - //@} - - /** @name Manipulation. */ - //@{ - NVIMAGE_API void clear(float f=0.0f); - - NVIMAGE_API void normalize(uint base_component); - - NVIMAGE_API void packNormals(uint base_component); - NVIMAGE_API void expandNormals(uint base_component); - NVIMAGE_API void scaleBias(uint base_component, uint num, float scale, float add); - - //NVIMAGE_API void clamp(uint base_component, uint num); - NVIMAGE_API void clamp(float low, float high); - - NVIMAGE_API void toLinear(uint base_component, uint num, float gamma = 2.2f); - NVIMAGE_API void toGamma(uint base_component, uint num, float gamma = 2.2f); - NVIMAGE_API void exponentiate(uint base_component, uint num, float power); - - - NVIMAGE_API FloatImage * fastDownSample() const; - NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const; - NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const; - NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const; - - NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const; - //@} - - NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const; - NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const; - NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const; - NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * output) const; - NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * output) const; - NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * output) const; - NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * output) const; - - - uint width() const { return m_width; } - uint height() const { return m_height; } - uint componentNum() const { return m_componentNum; } - uint count() const { return m_count; } - - - /** @name Pixel access. */ - //@{ - const float * channel(uint c) const; - float * channel(uint c); - - const float * scanline(uint y, uint c) const; - float * scanline(uint y, uint c); - - void setPixel(float f, uint x, uint y, uint c); - void addPixel(float f, uint x, uint y, uint c); - float pixel(uint x, uint y, uint c) const; - - void setPixel(float f, uint idx); - float pixel(uint idx) const; - - float sampleNearest(float x, float y, int c, WrapMode wm) const; - float sampleLinear(float x, float y, int c, WrapMode wm) const; - - float sampleNearestClamp(float x, float y, int c) const; - float sampleNearestRepeat(float x, float y, int c) const; - float sampleNearestMirror(float x, float y, int c) const; - - float sampleLinearClamp(float x, float y, int c) const; - float sampleLinearRepeat(float x, float y, int c) const; - float sampleLinearMirror(float x, float y, int c) const; - //@} - - - FloatImage* clone() const; - -public: - - uint index(uint x, uint y) const; - uint indexClamp(int x, int y) const; - uint indexRepeat(int x, int y) const; - uint indexMirror(int x, int y) const; - uint index(int x, int y, WrapMode wm) const; - -public: - - uint16 m_width; ///< Width of the texture. - uint16 m_height; ///< Height of the texture. - uint32 m_componentNum; ///< Number of components. - uint32 m_count; ///< Image pixel count. - float * m_mem; - -}; - - -/// Get const channel pointer. -inline const float * FloatImage::channel(uint c) const -{ - nvDebugCheck(m_mem != NULL); - nvDebugCheck(c < m_componentNum); - return m_mem + c * m_width * m_height; -} - -/// Get channel pointer. -inline float * FloatImage::channel(uint c) { - nvDebugCheck(m_mem != NULL); - nvDebugCheck(c < m_componentNum); - return m_mem + c * m_width * m_height; -} - -/// Get const scanline pointer. -inline const float * FloatImage::scanline(uint y, uint c) const -{ - nvDebugCheck(y < m_height); - return channel(c) + y * m_width; -} - -/// Get scanline pointer. -inline float * FloatImage::scanline(uint y, uint c) -{ - nvDebugCheck(y < m_height); - return channel(c) + y * m_width; -} - -/// Set pixel component. -inline void FloatImage::setPixel(float f, uint x, uint y, uint c) -{ - nvDebugCheck(m_mem != NULL); - nvDebugCheck(x < m_width); - nvDebugCheck(y < m_height); - nvDebugCheck(c < m_componentNum); - m_mem[(c * m_height + y) * m_width + x] = f; -} - -/// Add to pixel component. -inline void FloatImage::addPixel(float f, uint x, uint y, uint c) -{ - nvDebugCheck(m_mem != NULL); - nvDebugCheck(x < m_width); - nvDebugCheck(y < m_height); - nvDebugCheck(c < m_componentNum); - m_mem[(c * m_height + y) * m_width + x] += f; -} - -/// Get pixel component. -inline float FloatImage::pixel(uint x, uint y, uint c) const -{ - nvDebugCheck(m_mem != NULL); - nvDebugCheck(x < m_width); - nvDebugCheck(y < m_height); - nvDebugCheck(c < m_componentNum); - return m_mem[(c * m_height + y) * m_width + x]; -} - -/// Set pixel component. -inline void FloatImage::setPixel(float f, uint idx) -{ - nvDebugCheck(idx < m_count); - m_mem[idx] = f; -} - -/// Get pixel component. -inline float FloatImage::pixel(uint idx) const -{ - nvDebugCheck(idx < m_count); - return m_mem[idx]; -} - -inline uint FloatImage::index(uint x, uint y) const -{ - nvDebugCheck(x < m_width); - nvDebugCheck(y < m_height); - return y * m_width + x; -} - -inline uint FloatImage::indexClamp(int x, int y) const -{ - return nv::clamp(y, int(0), int(m_height-1)) * m_width + nv::clamp(x, int(0), int(m_width-1)); -} - -inline int repeat_remainder(int a, int b) -{ - if (a >= 0) return a % b; - else return (a + 1) % b + b - 1; -} - -inline uint FloatImage::indexRepeat(int x, int y) const -{ - return repeat_remainder(y, m_height) * m_width + repeat_remainder(x, m_width); -} - -inline uint FloatImage::indexMirror(int x, int y) const -{ - if (m_width == 1) x = 0; - - x = abs(x); - while (x >= m_width) { - x = abs(m_width + m_width - x - 2); - } - - if (m_height == 1) y = 0; - - y = abs(y); - while (y >= m_height) { - y = abs(m_height + m_height - y - 2); - } - - return index(x, y); -} - -inline uint FloatImage::index(int x, int y, WrapMode wm) const -{ - if (wm == WrapMode_Clamp) return indexClamp(x, y); - if (wm == WrapMode_Repeat) return indexRepeat(x, y); - /*if (wm == WrapMode_Mirror)*/ return indexMirror(x, y); -} } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/FloatImage.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/FloatImage.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/FloatImage.cpp @@ -4,355 +4,530 @@ #include "Filter.h" #include "Image.h" -#include -#include - -#include -#include +#include "nvmath/Color.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/ftoi.h" +#include "nvmath/Gamma.h" + +#include "nvcore/Utils.h" // max +#include "nvcore/Ptr.h" +#include "nvcore/Memory.h" +#include "nvcore/Array.inl" #include +#include // memset, memcpy using namespace nv; -namespace -{ - static int iround(float f) - { - return int(f); - } - - static int ifloor(float f) - { - return int(floor(f)); - } - - static float frac(float f) - { - return f - floor(f); - } - - static int mirror(int x, int w) - { - x = abs(x); - while (x >= w) { - x = 2 * w - x - 2; - } - return x; - } -} - /// Ctor. -FloatImage::FloatImage() : m_width(0), m_height(0), - m_componentNum(0), m_count(0), m_mem(NULL) +FloatImage::FloatImage() : m_componentCount(0), m_width(0), m_height(0), m_depth(0), + m_pixelCount(0), m_floatCount(0), m_mem(NULL) { } /// Ctor. Init from image. -FloatImage::FloatImage(const Image * img) : m_width(0), m_height(0), - m_componentNum(0), m_count(0), m_mem(NULL) +FloatImage::FloatImage(const Image * img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0), + m_pixelCount(0), m_floatCount(0), m_mem(NULL) { - initFrom(img); + initFrom(img); } /// Dtor. FloatImage::~FloatImage() { - free(); + free(); } /// Init the floating point image from a regular image. void FloatImage::initFrom(const Image * img) { - nvCheck(img != NULL); - - allocate(4, img->width(), img->height()); - - float * red_channel = channel(0); - float * green_channel = channel(1); - float * blue_channel = channel(2); - float * alpha_channel = channel(3); - - const uint count = m_width * m_height; - for(uint i = 0; i < count; i++) { - Color32 pixel = img->pixel(i); - red_channel[i] = float(pixel.r) / 255.0f; - green_channel[i] = float(pixel.g) / 255.0f; - blue_channel[i] = float(pixel.b) / 255.0f; - alpha_channel[i] = float(pixel.a) / 255.0f; - } + nvCheck(img != NULL); + + allocate(4, img->width(), img->height(), img->depth()); + + float * red_channel = channel(0); + float * green_channel = channel(1); + float * blue_channel = channel(2); + float * alpha_channel = channel(3); + + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) { + Color32 pixel = img->pixel(i); + red_channel[i] = float(pixel.r) / 255.0f; + green_channel[i] = float(pixel.g) / 255.0f; + blue_channel[i] = float(pixel.b) / 255.0f; + alpha_channel[i] = float(pixel.a) / 255.0f; + } } /// Convert the floating point image to a regular image. -Image * FloatImage::createImage(uint base_component/*= 0*/, uint num/*= 4*/) const +Image * FloatImage::createImage(uint baseComponent/*= 0*/, uint num/*= 4*/) const { - nvCheck(num <= 4); - nvCheck(base_component + num <= m_componentNum); - - AutoPtr img(new Image()); - img->allocate(m_width, m_height); - - const uint size = m_width * m_height; - for(uint i = 0; i < size; i++) { - - uint c; - uint8 rgba[4]= {0, 0, 0, 0xff}; - - for(c = 0; c < num; c++) { - float f = m_mem[size * (base_component + c) + i]; - rgba[c] = nv::clamp(int(255.0f * f), 0, 255); - } - - img->pixel(i) = Color32(rgba[0], rgba[1], rgba[2], rgba[3]); - } - - return img.release(); + nvCheck(num <= 4); + nvCheck(baseComponent + num <= m_componentCount); + + AutoPtr img(new Image()); + img->allocate(m_width, m_height, m_depth); + + for (uint i = 0; i < m_pixelCount; i++) { + + uint c; + uint8 rgba[4]= {0, 0, 0, 0xff}; + + for (c = 0; c < num; c++) { + float f = pixel(baseComponent + c, i); + rgba[c] = nv::clamp(int(255.0f * f), 0, 255); + } + + img->pixel(i) = Color32(rgba[0], rgba[1], rgba[2], rgba[3]); + } + + return img.release(); } /// Convert the floating point image to a regular image. Correct gamma of rgb, but not alpha. Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const { - nvCheck(m_componentNum == 4); - - AutoPtr img(new Image()); - img->allocate(m_width, m_height); - - const float * rChannel = this->channel(0); - const float * gChannel = this->channel(1); - const float * bChannel = this->channel(2); - const float * aChannel = this->channel(3); - - const uint size = m_width * m_height; - for(uint i = 0; i < size; i++) - { - const uint8 r = nv::clamp(int(255.0f * pow(rChannel[i], 1.0f/gamma)), 0, 255); - const uint8 g = nv::clamp(int(255.0f * pow(gChannel[i], 1.0f/gamma)), 0, 255); - const uint8 b = nv::clamp(int(255.0f * pow(bChannel[i], 1.0f/gamma)), 0, 255); - const uint8 a = nv::clamp(int(255.0f * aChannel[i]), 0, 255); - - img->pixel(i) = Color32(r, g, b, a); - } - - return img.release(); -} - -/// Allocate a 2d float image of the given format and the given extents. -void FloatImage::allocate(uint c, uint w, uint h) -{ - free(); - - m_width = w; - m_height = h; - m_componentNum = c; - m_count = w * h * c; - m_mem = reinterpret_cast(::malloc(m_count * sizeof(float))); + nvCheck(m_componentCount == 4); + + AutoPtr img(new Image()); + img->allocate(m_width, m_height, m_depth); + + const float * rChannel = this->channel(0); + const float * gChannel = this->channel(1); + const float * bChannel = this->channel(2); + const float * aChannel = this->channel(3); + + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) + { + const uint8 r = nv::clamp(int(255.0f * pow(rChannel[i], 1.0f/gamma)), 0, 255); + const uint8 g = nv::clamp(int(255.0f * pow(gChannel[i], 1.0f/gamma)), 0, 255); + const uint8 b = nv::clamp(int(255.0f * pow(bChannel[i], 1.0f/gamma)), 0, 255); + const uint8 a = nv::clamp(int(255.0f * aChannel[i]), 0, 255); + + img->pixel(i) = Color32(r, g, b, a); + } + + return img.release(); +} + +/// Allocate a 2D float image of the given format and the given extents. +void FloatImage::allocate(uint c, uint w, uint h, uint d) +{ + if (m_componentCount != c || m_width != w || m_height != h || m_depth != d) + { + free(); + + m_width = w; + m_height = h; + m_depth = d; + m_componentCount = c; + m_pixelCount = w * h * d; + m_floatCount = m_pixelCount * c; + m_mem = malloc(m_floatCount); + } } /// Free the image, but don't clear the members. void FloatImage::free() { - ::free( reinterpret_cast(m_mem) ); - m_mem = NULL; + ::free(m_mem); + m_mem = NULL; +} + +void FloatImage::resizeChannelCount(uint c) +{ + if (m_componentCount != c) { + uint count = m_pixelCount * c; + m_mem = realloc(m_mem, count); + + if (c > m_componentCount) { + memset(m_mem + m_floatCount, 0, (count - m_floatCount) * sizeof(float)); + } + + m_componentCount = c; + m_floatCount = count; + } } void FloatImage::clear(float f/*=0.0f*/) { - for(uint i = 0; i < m_count; i++) { - m_mem[i] = f; - } + for (uint i = 0; i < m_floatCount; i++) { + m_mem[i] = f; + } } -void FloatImage::normalize(uint base_component) +void FloatImage::clear(uint c, float f/*= 0.0f*/) { - nvCheck(base_component + 3 <= m_componentNum); - - float * xChannel = this->channel(base_component + 0); - float * yChannel = this->channel(base_component + 1); - float * zChannel = this->channel(base_component + 2); + float * channel = this->channel(c); - const uint size = m_width * m_height; - for(uint i = 0; i < size; i++) { - - Vector3 normal(xChannel[i], yChannel[i], zChannel[i]); - normal = normalizeSafe(normal, Vector3(zero), 0.0f); - - xChannel[i] = normal.x(); - yChannel[i] = normal.y(); - zChannel[i] = normal.z(); - } + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) { + channel[i] = f; + } } -void FloatImage::packNormals(uint base_component) +void FloatImage::copyChannel(uint src, uint dst) { - scaleBias(base_component, 3, 0.5f, 1.0f); + nvCheck(src < m_componentCount); + nvCheck(dst < m_componentCount); + + const float * srcChannel = this->channel(src); + float * dstChannel = this->channel(dst); + + memcpy(dstChannel, srcChannel, sizeof(float)*m_pixelCount); } -void FloatImage::expandNormals(uint base_component) +void FloatImage::normalize(uint baseComponent) { - scaleBias(base_component, 3, 2, -0.5); + nvCheck(baseComponent + 3 <= m_componentCount); + + float * xChannel = this->channel(baseComponent + 0); + float * yChannel = this->channel(baseComponent + 1); + float * zChannel = this->channel(baseComponent + 2); + + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) { + + Vector3 normal(xChannel[i], yChannel[i], zChannel[i]); + normal = normalizeSafe(normal, Vector3(0), 0.0f); + + xChannel[i] = normal.x; + yChannel[i] = normal.y; + zChannel[i] = normal.z; + } +} + +void FloatImage::packNormals(uint baseComponent) +{ + scaleBias(baseComponent, 3, 0.5f, 0.5f); +} + +void FloatImage::expandNormals(uint baseComponent) +{ + scaleBias(baseComponent, 3, 2, -1.0); } -void FloatImage::scaleBias(uint base_component, uint num, float scale, float bias) +void FloatImage::scaleBias(uint baseComponent, uint num, float scale, float bias) { - const uint size = m_width * m_height; - - for(uint c = 0; c < num; c++) { - float * ptr = this->channel(base_component + c); - - for(uint i = 0; i < size; i++) { - ptr[i] = scale * (ptr[i] + bias); - } - } + const uint size = m_pixelCount; + + for (uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + for (uint i = 0; i < size; i++) { + ptr[i] = scale * ptr[i] + bias; + } + } } /// Clamp the elements of the image. -void FloatImage::clamp(float low, float high) +void FloatImage::clamp(uint baseComponent, uint num, float low, float high) { - for(uint i = 0; i < m_count; i++) { - m_mem[i] = nv::clamp(m_mem[i], low, high); - } + const uint size = m_pixelCount; + + for (uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + for (uint i = 0; i < size; i++) { + ptr[i] = nv::clamp(ptr[i], low, high); + } + } } /// From gamma to linear space. -void FloatImage::toLinear(uint base_component, uint num, float gamma /*= 2.2f*/) +void FloatImage::toLinear(uint baseComponent, uint num, float gamma /*= 2.2f*/) { - exponentiate(base_component, num, gamma); + if (gamma == 2.2f) { + for (uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + powf_11_5(ptr, ptr, m_pixelCount); + } + } else { + exponentiate(baseComponent, num, gamma); + } } /// From linear to gamma space. -void FloatImage::toGamma(uint base_component, uint num, float gamma /*= 2.2f*/) +void FloatImage::toGamma(uint baseComponent, uint num, float gamma /*= 2.2f*/) { - exponentiate(base_component, num, 1.0f/gamma); + if (gamma == 2.2f) { + for (uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + powf_5_11(ptr, ptr, m_pixelCount); + } + } else { + exponentiate(baseComponent, num, 1.0f/gamma); + } } /// Exponentiate the elements of the image. -void FloatImage::exponentiate(uint base_component, uint num, float power) +void FloatImage::exponentiate(uint baseComponent, uint num, float power) +{ + const uint size = m_pixelCount; + + for(uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + for(uint i = 0; i < size; i++) { + ptr[i] = powf(max(0.0f, ptr[i]), power); + } + } +} + +/// Apply linear transform. +void FloatImage::transform(uint baseComponent, const Matrix & m, Vector4::Arg offset) +{ + nvCheck(baseComponent + 4 <= m_componentCount); + + float * r = this->channel(baseComponent + 0); + float * g = this->channel(baseComponent + 1); + float * b = this->channel(baseComponent + 2); + float * a = this->channel(baseComponent + 3); + + const uint size = m_pixelCount; + for (uint i = 0; i < size; i++) + { + Vector4 color = nv::transform(m, Vector4(*r, *g, *b, *a)) + offset; + + *r++ = color.x; + *g++ = color.y; + *b++ = color.z; + *a++ = color.w; + } +} + +void FloatImage::swizzle(uint baseComponent, uint r, uint g, uint b, uint a) +{ + nvCheck(baseComponent + 4 <= m_componentCount); + nvCheck(r < 7 && g < 7 && b < 7 && a < 7); + + float consts[] = { 1.0f, 0.0f, -1.0f }; + float * c[7]; + c[0] = this->channel(baseComponent + 0); + c[1] = this->channel(baseComponent + 1); + c[2] = this->channel(baseComponent + 2); + c[3] = this->channel(baseComponent + 3); + c[4] = consts; + c[5] = consts + 1; + c[6] = consts + 2; + + const uint size = m_pixelCount; + for (uint i = 0; i < size; i++) + { + float tmp[4] = { *c[r], *c[g], *c[b], *c[a] }; + + *c[0]++ = tmp[0]; + *c[1]++ = tmp[1]; + *c[2]++ = tmp[2]; + *c[3]++ = tmp[3]; + } +} + +float FloatImage::sampleNearest(uint c, float x, float y, const WrapMode wm) const { - const uint size = m_width * m_height; + if( wm == WrapMode_Clamp ) return sampleNearestClamp(c, x, y); + else if( wm == WrapMode_Repeat ) return sampleNearestRepeat(c, x, y); + else /*if( wm == WrapMode_Mirror )*/ return sampleNearestMirror(c, x, y); +} - for(uint c = 0; c < num; c++) { - float * ptr = this->channel(base_component + c); - - for(uint i = 0; i < size; i++) { - ptr[i] = pow(ptr[i], power); - } - } +float FloatImage::sampleLinear(uint c, float x, float y, WrapMode wm) const +{ + if( wm == WrapMode_Clamp ) return sampleLinearClamp(c, x, y); + else if( wm == WrapMode_Repeat ) return sampleLinearRepeat(c, x, y); + else /*if( wm == WrapMode_Mirror )*/ return sampleLinearMirror(c, x, y); } -float FloatImage::sampleNearest(const float x, const float y, const int c, const WrapMode wm) const +float FloatImage::sampleNearest(uint c, float x, float y, float z, WrapMode wm) const { - if( wm == WrapMode_Clamp ) return sampleNearestClamp(x, y, c); - else if( wm == WrapMode_Repeat ) return sampleNearestRepeat(x, y, c); - else /*if( wm == WrapMode_Mirror )*/ return sampleNearestMirror(x, y, c); + if( wm == WrapMode_Clamp ) return sampleNearestClamp(c, x, y, z); + else if( wm == WrapMode_Repeat ) return sampleNearestRepeat(c, x, y, z); + else /*if( wm == WrapMode_Mirror )*/ return sampleNearestMirror(c, x, y, z); } -float FloatImage::sampleLinear(const float x, const float y, const int c, const WrapMode wm) const +float FloatImage::sampleLinear(uint c, float x, float y, float z, WrapMode wm) const { - if( wm == WrapMode_Clamp ) return sampleLinearClamp(x, y, c); - else if( wm == WrapMode_Repeat ) return sampleLinearRepeat(x, y, c); - else /*if( wm == WrapMode_Mirror )*/ return sampleLinearMirror(x, y, c); + if( wm == WrapMode_Clamp ) return sampleLinearClamp(c, x, y, z); + else if( wm == WrapMode_Repeat ) return sampleLinearRepeat(c, x, y, z); + else /*if( wm == WrapMode_Mirror )*/ return sampleLinearMirror(c, x, y, z); } -float FloatImage::sampleNearestClamp(const float x, const float y, const int c) const +float FloatImage::sampleNearestClamp(uint c, float x, float y) const { - int ix = ::clamp(iround(x * m_width), 0, m_width-1); - int iy = ::clamp(iround(y * m_height), 0, m_height-1); - return pixel(ix, iy, c); + int ix = wrapClamp(iround(x * m_width), m_width); + int iy = wrapClamp(iround(y * m_height), m_height); + return pixel(c, ix, iy, 0); } -float FloatImage::sampleNearestRepeat(const float x, const float y, const int c) const +float FloatImage::sampleNearestRepeat(uint c, float x, float y) const { - int ix = iround(frac(x) * m_width); - int iy = iround(frac(y) * m_height); - return pixel(ix, iy, c); + int ix = wrapRepeat(iround(x * m_width), m_width); + int iy = wrapRepeat(iround(y * m_height), m_height); + return pixel(c, ix, iy, 0); } -float FloatImage::sampleNearestMirror(const float x, const float y, const int c) const +float FloatImage::sampleNearestMirror(uint c, float x, float y) const { - int ix = mirror(iround(x * m_width), m_width); - int iy = mirror(iround(y * m_height), m_height); - return pixel(ix, iy, c); + int ix = wrapMirror(iround(x * m_width), m_width); + int iy = wrapMirror(iround(y * m_height), m_height); + return pixel(c, ix, iy, 0); } -float FloatImage::sampleLinearClamp(float x, float y, const int c) const +float FloatImage::sampleNearestClamp(uint c, float x, float y, float z) const { - const int w = m_width; - const int h = m_height; - - x *= w; - y *= h; - - const float fracX = frac(x); - const float fracY = frac(y); - - const int ix0 = ::clamp(ifloor(x), 0, w-1); - const int iy0 = ::clamp(ifloor(y), 0, h-1); - const int ix1 = ::clamp(ifloor(x)+1, 0, w-1); - const int iy1 = ::clamp(ifloor(y)+1, 0, h-1); + int ix = wrapClamp(iround(x * m_width), m_width); + int iy = wrapClamp(iround(y * m_height), m_height); + int iz = wrapClamp(iround(z * m_depth), m_depth); + return pixel(c, ix, iy, iz); +} - float f1 = pixel(ix0, iy0, c); - float f2 = pixel(ix1, iy0, c); - float f3 = pixel(ix0, iy1, c); - float f4 = pixel(ix1, iy1, c); - - float i1 = lerp(f1, f2, fracX); - float i2 = lerp(f3, f4, fracX); +float FloatImage::sampleNearestRepeat(uint c, float x, float y, float z) const +{ + int ix = wrapRepeat(iround(x * m_width), m_width); + int iy = wrapRepeat(iround(y * m_height), m_height); + int iz = wrapRepeat(iround(z * m_depth), m_depth); + return pixel(c, ix, iy, iz); +} - return lerp(i1, i2, fracY); +float FloatImage::sampleNearestMirror(uint c, float x, float y, float z) const +{ + int ix = wrapMirror(iround(x * m_width), m_width); + int iy = wrapMirror(iround(y * m_height), m_height); + int iz = wrapMirror(iround(z * m_depth), m_depth); + return pixel(c, ix, iy, iz); } -float FloatImage::sampleLinearRepeat(float x, float y, int c) const + +float FloatImage::sampleLinearClamp(uint c, float x, float y) const { - const int w = m_width; - const int h = m_height; - - const float fracX = frac(x * w); - const float fracY = frac(y * h); - - int ix0 = ifloor(frac(x) * w); - int iy0 = ifloor(frac(y) * h); - int ix1 = ifloor(frac(x + 1.0f/w) * w); - int iy1 = ifloor(frac(y + 1.0f/h) * h); - - float f1 = pixel(ix0, iy0, c); - float f2 = pixel(ix1, iy0, c); - float f3 = pixel(ix0, iy1, c); - float f4 = pixel(ix1, iy1, c); - - float i1 = lerp(f1, f2, fracX); - float i2 = lerp(f3, f4, fracX); + const int w = m_width; + const int h = m_height; - return lerp(i1, i2, fracY); + x *= w; + y *= h; + + const float fracX = frac(x); + const float fracY = frac(y); + + const int ix0 = ::clamp(ifloor(x), 0, w-1); + const int iy0 = ::clamp(ifloor(y), 0, h-1); + const int ix1 = ::clamp(ifloor(x)+1, 0, w-1); + const int iy1 = ::clamp(ifloor(y)+1, 0, h-1); + + return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY); } -float FloatImage::sampleLinearMirror(float x, float y, int c) const +float FloatImage::sampleLinearRepeat(uint c, float x, float y) const { - const int w = m_width; - const int h = m_height; + const int w = m_width; + const int h = m_height; + + const float fracX = frac(x * w); + const float fracY = frac(y * h); + + // @@ Using floor in some places, but round in others? + int ix0 = ifloor(frac(x) * w); + int iy0 = ifloor(frac(y) * h); + int ix1 = ifloor(frac(x + 1.0f/w) * w); + int iy1 = ifloor(frac(y + 1.0f/h) * h); + + return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY); +} - x *= w; - y *= h; +float FloatImage::sampleLinearMirror(uint c, float x, float y) const +{ + const int w = m_width; + const int h = m_height; - const float fracX = frac(x); - const float fracY = frac(y); + x *= w; + y *= h; - int ix0 = mirror(iround(x), w); - int iy0 = mirror(iround(y), h); - int ix1 = mirror(iround(x) + 1, w); - int iy1 = mirror(iround(y) + 1, h); + const float fracX = frac(x); + const float fracY = frac(y); - float f1 = pixel(ix0, iy0, c); - float f2 = pixel(ix1, iy0, c); - float f3 = pixel(ix0, iy1, c); - float f4 = pixel(ix1, iy1, c); - - float i1 = lerp(f1, f2, fracX); - float i2 = lerp(f3, f4, fracX); + int ix0 = wrapMirror(iround(x), w); + int iy0 = wrapMirror(iround(y), h); + int ix1 = wrapMirror(iround(x) + 1, w); + int iy1 = wrapMirror(iround(y) + 1, h); - return lerp(i1, i2, fracY); + return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY); +} + +float FloatImage::sampleLinearClamp(uint c, float x, float y, float z) const +{ + const int w = m_width; + const int h = m_height; + const int d = m_depth; + + x *= w; + y *= h; + z *= d; + + const float fracX = frac(x); + const float fracY = frac(y); + const float fracZ = frac(z); + + // @@ Using floor in some places, but round in others? + const int ix0 = ::clamp(ifloor(x), 0, w-1); + const int iy0 = ::clamp(ifloor(y), 0, h-1); + const int iz0 = ::clamp(ifloor(z), 0, h-1); + const int ix1 = ::clamp(ifloor(x)+1, 0, w-1); + const int iy1 = ::clamp(ifloor(y)+1, 0, h-1); + const int iz1 = ::clamp(ifloor(z)+1, 0, h-1); + + return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); +} + +float FloatImage::sampleLinearRepeat(uint c, float x, float y, float z) const +{ + const int w = m_width; + const int h = m_height; + const int d = m_depth; + + const float fracX = frac(x * w); + const float fracY = frac(y * h); + const float fracZ = frac(z * d); + + int ix0 = ifloor(frac(x) * w); + int iy0 = ifloor(frac(y) * h); + int iz0 = ifloor(frac(z) * d); + int ix1 = ifloor(frac(x + 1.0f/w) * w); + int iy1 = ifloor(frac(y + 1.0f/h) * h); + int iz1 = ifloor(frac(z + 1.0f/d) * d); + + return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); +} + +float FloatImage::sampleLinearMirror(uint c, float x, float y, float z) const +{ + const int w = m_width; + const int h = m_height; + const int d = m_depth; + + x *= w; + y *= h; + z *= d; + + int ix0 = wrapMirror(iround(x), w); + int iy0 = wrapMirror(iround(y), h); + int iz0 = wrapMirror(iround(z), d); + int ix1 = wrapMirror(iround(x) + 1, w); + int iy1 = wrapMirror(iround(y) + 1, h); + int iz1 = wrapMirror(iround(z) + 1, d); + + const float fracX = frac(x); + const float fracY = frac(y); + const float fracZ = frac(z); + + return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); } @@ -365,545 +540,930 @@ /// FloatImage * FloatImage::fastDownSample() const { - nvDebugCheck(m_width != 1 || m_height != 1); - - AutoPtr dst_image( new FloatImage() ); - - const uint w = max(1, m_width / 2); - const uint h = max(1, m_height / 2); - dst_image->allocate(m_componentNum, w, h); - - // 1D box filter. - if (m_width == 1 || m_height == 1) - { - const uint n = w * h; - - if ((m_width * m_height) & 1) - { - const float scale = 1.0f / (2 * n + 1); - - for(uint c = 0; c < m_componentNum; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint x = 0; x < n; x++) - { - const float w0 = float(n - x); - const float w1 = float(n - 0); - const float w2 = float(1 + x); - - *dst++ = scale * (w0 * src[0] + w1 * src[1] + w2 * src[2]); - src += 2; - } - } - } - else - { - for(uint c = 0; c < m_componentNum; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint x = 0; x < n; x++) - { - *dst = 0.5f * (src[0] + src[1]); - dst++; - src += 2; - } - } - } - } - - // Regular box filter. - else if ((m_width & 1) == 0 && (m_height & 1) == 0) - { - for(uint c = 0; c < m_componentNum; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint y = 0; y < h; y++) - { - for(uint x = 0; x < w; x++) - { - *dst = 0.25f * (src[0] + src[1] + src[m_width] + src[m_width + 1]); - dst++; - src += 2; - } - - src += m_width; - } - } - } - - // Polyphase filters. - else if (m_width & 1 && m_height & 1) - { - nvDebugCheck(m_width == 2 * w + 1); - nvDebugCheck(m_height == 2 * h + 1); - - const float scale = 1.0f / (m_width * m_height); - - for(uint c = 0; c < m_componentNum; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint y = 0; y < h; y++) - { - const float v0 = float(h - y); - const float v1 = float(h - 0); - const float v2 = float(1 + y); - - for (uint x = 0; x < w; x++) - { - const float w0 = float(w - x); - const float w1 = float(w - 0); - const float w2 = float(1 + x); - - float f = 0.0f; - f += v0 * (w0 * src[0 * m_width + 2 * x] + w1 * src[0 * m_width + 2 * x + 1] + w2 * src[0 * m_width + 2 * x + 2]); - f += v1 * (w0 * src[1 * m_width + 2 * x] + w1 * src[1 * m_width + 2 * x + 1] + w2 * src[0 * m_width + 2 * x + 2]); - f += v2 * (w0 * src[2 * m_width + 2 * x] + w1 * src[2 * m_width + 2 * x + 1] + w2 * src[0 * m_width + 2 * x + 2]); - - *dst = f * scale; - dst++; - } - - src += 2 * m_width; - } - } - } - else if (m_width & 1) - { - nvDebugCheck(m_width == 2 * w + 1); - const float scale = 1.0f / (2 * m_width); - - for(uint c = 0; c < m_componentNum; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint y = 0; y < h; y++) - { - for (uint x = 0; x < w; x++) - { - const float w0 = float(w - x); - const float w1 = float(w - 0); - const float w2 = float(1 + x); - - float f = 0.0f; - f += w0 * (src[2 * x + 0] + src[m_width + 2 * x + 0]); - f += w1 * (src[2 * x + 1] + src[m_width + 2 * x + 1]); - f += w2 * (src[2 * x + 2] + src[m_width + 2 * x + 2]); - - *dst = f * scale; - dst++; - } - - src += 2 * m_width; - } - } - } - else if (m_height & 1) - { - nvDebugCheck(m_height == 2 * h + 1); - - const float scale = 1.0f / (2 * m_height); - - for(uint c = 0; c < m_componentNum; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint y = 0; y < h; y++) - { - const float v0 = float(h - y); - const float v1 = float(h - 0); - const float v2 = float(1 + y); - - for (uint x = 0; x < w; x++) - { - float f = 0.0f; - f += v0 * (src[0 * m_width + 2 * x] + src[0 * m_width + 2 * x + 1]); - f += v1 * (src[1 * m_width + 2 * x] + src[1 * m_width + 2 * x + 1]); - f += v2 * (src[2 * m_width + 2 * x] + src[2 * m_width + 2 * x + 1]); - - *dst = f * scale; - dst++; - } - - src += 2 * m_width; - } - } - } - - return dst_image.release(); + nvDebugCheck(m_depth == 1); + nvDebugCheck(m_width != 1 || m_height != 1); + + AutoPtr dst_image( new FloatImage() ); + + const uint w = max(1, m_width / 2); + const uint h = max(1, m_height / 2); + dst_image->allocate(m_componentCount, w, h); + + // 1D box filter. + if (m_width == 1 || m_height == 1) + { + const uint n = w * h; + + if ((m_width * m_height) & 1) + { + const float scale = 1.0f / (2 * n + 1); + + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint x = 0; x < n; x++) + { + const float w0 = float(n - x); + const float w1 = float(n - 0); + const float w2 = float(1 + x); + + *dst++ = scale * (w0 * src[0] + w1 * src[1] + w2 * src[2]); + src += 2; + } + } + } + else + { + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint x = 0; x < n; x++) + { + *dst = 0.5f * (src[0] + src[1]); + dst++; + src += 2; + } + } + } + } + + // Regular box filter. + else if ((m_width & 1) == 0 && (m_height & 1) == 0) + { + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint y = 0; y < h; y++) + { + for(uint x = 0; x < w; x++) + { + *dst = 0.25f * (src[0] + src[1] + src[m_width] + src[m_width + 1]); + dst++; + src += 2; + } + + src += m_width; + } + } + } + + // Polyphase filters. + else if (m_width & 1 && m_height & 1) + { + nvDebugCheck(m_width == 2 * w + 1); + nvDebugCheck(m_height == 2 * h + 1); + + const float scale = 1.0f / (m_width * m_height); + + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint y = 0; y < h; y++) + { + const float v0 = float(h - y); + const float v1 = float(h - 0); + const float v2 = float(1 + y); + + for (uint x = 0; x < w; x++) + { + const float w0 = float(w - x); + const float w1 = float(w - 0); + const float w2 = float(1 + x); + + float f = 0.0f; + f += v0 * (w0 * src[0 * m_width + 2 * x] + w1 * src[0 * m_width + 2 * x + 1] + w2 * src[0 * m_width + 2 * x + 2]); + f += v1 * (w0 * src[1 * m_width + 2 * x] + w1 * src[1 * m_width + 2 * x + 1] + w2 * src[1 * m_width + 2 * x + 2]); + f += v2 * (w0 * src[2 * m_width + 2 * x] + w1 * src[2 * m_width + 2 * x + 1] + w2 * src[2 * m_width + 2 * x + 2]); + + *dst = f * scale; + dst++; + } + + src += 2 * m_width; + } + } + } + else if (m_width & 1) + { + nvDebugCheck(m_width == 2 * w + 1); + const float scale = 1.0f / (2 * m_width); + + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + const float w0 = float(w - x); + const float w1 = float(w - 0); + const float w2 = float(1 + x); + + float f = 0.0f; + f += w0 * (src[2 * x + 0] + src[m_width + 2 * x + 0]); + f += w1 * (src[2 * x + 1] + src[m_width + 2 * x + 1]); + f += w2 * (src[2 * x + 2] + src[m_width + 2 * x + 2]); + + *dst = f * scale; + dst++; + } + + src += 2 * m_width; + } + } + } + else if (m_height & 1) + { + nvDebugCheck(m_height == 2 * h + 1); + + const float scale = 1.0f / (2 * m_height); + + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint y = 0; y < h; y++) + { + const float v0 = float(h - y); + const float v1 = float(h - 0); + const float v2 = float(1 + y); + + for (uint x = 0; x < w; x++) + { + float f = 0.0f; + f += v0 * (src[0 * m_width + 2 * x] + src[0 * m_width + 2 * x + 1]); + f += v1 * (src[1 * m_width + 2 * x] + src[1 * m_width + 2 * x + 1]); + f += v2 * (src[2 * m_width + 2 * x] + src[2 * m_width + 2 * x + 1]); + + *dst = f * scale; + dst++; + } + + src += 2 * m_width; + } + } + } + + return dst_image.release(); } /// Downsample applying a 1D kernel separately in each dimension. FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const { - const uint w = max(1, m_width / 2); - const uint h = max(1, m_height / 2); + const uint w = max(1, m_width / 2); + const uint h = max(1, m_height / 2); + const uint d = max(1, m_depth / 2); - return resize(filter, w, h, wm); + return resize(filter, w, h, d, wm); } /// Downsample applying a 1D kernel separately in each dimension. FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm, uint alpha) const { - const uint w = max(1, m_width / 2); - const uint h = max(1, m_height / 2); + const uint w = max(1, m_width / 2); + const uint h = max(1, m_height / 2); + const uint d = max(1, m_depth / 2); - return resize(filter, w, h, wm, alpha); + return resize(filter, w, h, d, wm, alpha); } /// Downsample applying a 1D kernel separately in each dimension. FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm) const { - // @@ Use monophase filters when frac(m_width / w) == 0 + // @@ Use monophase filters when frac(m_width / w) == 0 + + AutoPtr tmp_image( new FloatImage() ); + AutoPtr dst_image( new FloatImage() ); + + PolyphaseKernel xkernel(filter, m_width, w, 32); + PolyphaseKernel ykernel(filter, m_height, h, 32); - AutoPtr tmp_image( new FloatImage() ); - AutoPtr dst_image( new FloatImage() ); - - PolyphaseKernel xkernel(filter, m_width, w, 32); - PolyphaseKernel ykernel(filter, m_height, h, 32); - - // @@ Select fastest filtering order: - //if (w * m_height <= h * m_width) - { - tmp_image->allocate(m_componentNum, w, m_height); - dst_image->allocate(m_componentNum, w, h); - - Array tmp_column(h); - tmp_column.resize(h); - - for (uint c = 0; c < m_componentNum; c++) - { - float * tmp_channel = tmp_image->channel(c); - - for (uint y = 0; y < m_height; y++) { - this->applyKernelHorizontal(xkernel, y, c, wm, tmp_channel + y * w); - } - - float * dst_channel = dst_image->channel(c); - - for (uint x = 0; x < w; x++) { - tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer()); - - for (uint y = 0; y < h; y++) { - dst_channel[y * w + x] = tmp_column[y]; - } - } - } - } - /*else - { - tmp_image->allocate(m_componentNum, m_width, h); - dst_image->allocate(m_componentNum, w, h); - - Array tmp_column(h); - tmp_column.resize(h); - - for (uint c = 0; c < m_componentNum; c++) - { - float * tmp_channel = tmp_image->channel(c); - - for (uint x = 0; x < w; x++) { - tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer()); - - for (uint y = 0; y < h; y++) { - tmp_channel[y * w + x] = tmp_column[y]; - } - } - - float * dst_channel = dst_image->channel(c); - - for (uint y = 0; y < m_height; y++) { - this->applyKernelHorizontal(xkernel, y, c, wm, dst_channel + y * w); - } - } - }*/ - - return dst_image.release(); + // @@ Select fastest filtering order: + //if (w * m_height <= h * m_width) + { + tmp_image->allocate(m_componentCount, w, m_height); + dst_image->allocate(m_componentCount, w, h); + + // @@ We could avoid this allocation, write directly to dst_plane. + Array tmp_column(h); + tmp_column.resize(h); + + for (uint c = 0; c < m_componentCount; c++) + { + for (uint z = 0; z < m_depth; z++) + { + float * tmp_plane = tmp_image->plane(c, z); + + for (uint y = 0; y < m_height; y++) { + this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w); + } + + float * dst_plane = dst_image->plane(c, z); + + for (uint x = 0; x < w; x++) { + tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); + + // @@ We could avoid this copy, write directly to dst_plane. + for (uint y = 0; y < h; y++) { + dst_plane[y * w + x] = tmp_column[y]; + } + } + } + } + } + + return dst_image.release(); +} + +/// Downsample applying a 1D kernel separately in each dimension. (for 3d textures) +FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const +{ + // @@ Use monophase filters when frac(m_width / w) == 0 + + // Use the existing 2d version if we are not resizing in the Z axis: + if (m_depth == d) { + return resize(filter, w, h, wm); + } + + AutoPtr tmp_image( new FloatImage() ); + AutoPtr tmp_image2( new FloatImage() ); + AutoPtr dst_image( new FloatImage() ); + + PolyphaseKernel xkernel(filter, m_width, w, 32); + PolyphaseKernel ykernel(filter, m_height, h, 32); + PolyphaseKernel zkernel(filter, m_depth, d, 32); + + tmp_image->allocate(m_componentCount, w, m_height, m_depth); + tmp_image2->allocate(m_componentCount, w, m_height, d); + dst_image->allocate(m_componentCount, w, h, d); + + Array tmp_column(h); + tmp_column.resize(h); + + for (uint c = 0; c < m_componentCount; c++) + { + float * tmp_channel = tmp_image->channel(c); + + // split width in half + for (uint z = 0; z < m_depth; z++ ) { + for (uint y = 0; y < m_height; y++) { + this->applyKernelX(xkernel, y, z, c, wm, tmp_channel + z * m_height * w + y * w); + } + } + + // split depth in half + float * tmp2_channel = tmp_image2->channel(c); + for (uint y = 0; y < m_height; y++) { + for (uint x = 0; x < w; x++) { + tmp_image->applyKernelZ(zkernel, x, y, c, wm, tmp_column.buffer() ); + + for (uint z = 0; z < d; z++) { + tmp2_channel[z * m_height * w + y * w + x] = tmp_column[z]; + } + } + } + + // split height in half + float * dst_channel = dst_image->channel(c); + + for (uint z = 0; z < d; z++ ) { + for (uint x = 0; x < w; x++) { + tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); + + for (uint y = 0; y < h; y++) { + dst_channel[z * h * w + y * w + x] = tmp_column[y]; + } + } + } + } + + return dst_image.release(); } + /// Downsample applying a 1D kernel separately in each dimension. FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const { - nvCheck(alpha < m_componentNum); + nvCheck(alpha < m_componentCount); - AutoPtr tmp_image( new FloatImage() ); - AutoPtr dst_image( new FloatImage() ); - - PolyphaseKernel xkernel(filter, m_width, w, 32); - PolyphaseKernel ykernel(filter, m_height, h, 32); - - { - tmp_image->allocate(m_componentNum, w, m_height); - dst_image->allocate(m_componentNum, w, h); - - Array tmp_column(h); - tmp_column.resize(h); - - for (uint c = 0; c < m_componentNum; c++) - { - float * tmp_channel = tmp_image->channel(c); - - for (uint y = 0; y < m_height; y++) { - this->applyKernelHorizontal(xkernel, y, c, alpha, wm, tmp_channel + y * w); - } - } - - // Process all channels before applying vertical kernel to make sure alpha has been computed. - - for (uint c = 0; c < m_componentNum; c++) - { - float * dst_channel = dst_image->channel(c); - - for (uint x = 0; x < w; x++) { - tmp_image->applyKernelVertical(ykernel, x, c, alpha, wm, tmp_column.unsecureBuffer()); - - for (uint y = 0; y < h; y++) { - dst_channel[y * w + x] = tmp_column[y]; - } - } - } - } - - return dst_image.release(); + AutoPtr tmp_image( new FloatImage() ); + AutoPtr dst_image( new FloatImage() ); + + PolyphaseKernel xkernel(filter, m_width, w, 32); + PolyphaseKernel ykernel(filter, m_height, h, 32); + + { + tmp_image->allocate(m_componentCount, w, m_height); + dst_image->allocate(m_componentCount, w, h); + + Array tmp_column(h); + tmp_column.resize(h); + + for (uint i = 0; i < m_componentCount; i++) + { + // Process alpha channel first. + uint c; + if (i == 0) c = alpha; + else if (i > alpha) c = i; + else c = i - 1; + + for (uint z = 0; z < m_depth; z++) + { + float * tmp_plane = tmp_image->plane(c, z); + + for (uint y = 0; y < m_height; y++) { + this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w); + } + + float * dst_plane = dst_image->plane(c, z); + + for (uint x = 0; x < w; x++) { + tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); + + // @@ Avoid this copy, write directly to dst_plane. + for (uint y = 0; y < h; y++) { + dst_plane[y * w + x] = tmp_column[y]; + } + } + } + } + } + + return dst_image.release(); } +/// Downsample applying a 1D kernel separately in each dimension. (for 3d textures) +FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const +{ + nvCheck(alpha < m_componentCount); + + // use the existing 2d version if we are a 2d image: + if (m_depth == d) { + return resize( filter, w, h, wm, alpha ); + } + + AutoPtr tmp_image( new FloatImage() ); + AutoPtr tmp_image2( new FloatImage() ); + AutoPtr dst_image( new FloatImage() ); + + PolyphaseKernel xkernel(filter, m_width, w, 32); + PolyphaseKernel ykernel(filter, m_height, h, 32); + PolyphaseKernel zkernel(filter, m_depth, d, 32); + + tmp_image->allocate(m_componentCount, w, m_height, m_depth); + tmp_image2->allocate(m_componentCount, w, m_height, d); + dst_image->allocate(m_componentCount, w, h, d); + + Array tmp_column(h); + tmp_column.resize(h); + + for (uint i = 0; i < m_componentCount; i++) + { + // Process alpha channel first. + uint c; + if (i == 0) c = alpha; + else if (i > alpha) c = i; + else c = i - 1; + + float * tmp_channel = tmp_image->channel(c); + + for (uint z = 0; z < m_depth; z++ ) { + for (uint y = 0; y < m_height; y++) { + this->applyKernelX(xkernel, y, z, c, wm, tmp_channel + z * m_height * w + y * w); + } + } + + float * tmp2_channel = tmp_image2->channel(c); + for (uint y = 0; y < m_height; y++) { + for (uint x = 0; x < w; x++) { + tmp_image->applyKernelZ(zkernel, x, y, c, wm, tmp_column.buffer() ); + + for (uint z = 0; z < d; z++) { + tmp2_channel[z * m_height * w + y * w + x] = tmp_column[z]; + } + } + } + + float * dst_channel = dst_image->channel(c); + + for (uint z = 0; z < d; z++ ) { + for (uint x = 0; x < w; x++) { + tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); + + for (uint y = 0; y < h; y++) { + dst_channel[z * h * w + y * w + x] = tmp_column[y]; + } + } + } + } + + return dst_image.release(); +} + + +void FloatImage::convolve(const Kernel2 & k, uint c, WrapMode wm) +{ + AutoPtr tmpImage(clone()); + + uint w = m_width; + uint h = m_height; + uint d = m_depth; + + for (uint z = 0; z < d; z++) + { + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + pixel(c, x, y, 0) = tmpImage->applyKernelXY(&k, x, y, z, c, wm); + } + } + } +} + /// Apply 2D kernel at the given coordinates and return result. -float FloatImage::applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const +float FloatImage::applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const { - nvDebugCheck(k != NULL); - - const uint kernelWindow = k->windowSize(); - const int kernelOffset = int(kernelWindow / 2) - 1; - - const float * channel = this->channel(c); - - float sum = 0.0f; - for (uint i = 0; i < kernelWindow; i++) - { - const int src_y = int(y + i) - kernelOffset; - - for (uint e = 0; e < kernelWindow; e++) - { - const int src_x = int(x + e) - kernelOffset; - - int idx = this->index(src_x, src_y, wm); - - sum += k->valueAt(e, i) * channel[idx]; - } - } - - return sum; + nvDebugCheck(k != NULL); + + const uint kernelWindow = k->windowSize(); + const int kernelOffset = int(kernelWindow / 2); + + const float * channel = this->plane(c, z); + + float sum = 0.0f; + for (uint i = 0; i < kernelWindow; i++) + { + int src_y = int(y + i) - kernelOffset; + + for (uint e = 0; e < kernelWindow; e++) + { + int src_x = int(x + e) - kernelOffset; + + int idx = this->index(src_x, src_y, z, wm); + + sum += k->valueAt(e, i) * channel[idx]; + } + } + + return sum; } +/// Apply 1D horizontal kernel at the given coordinates and return result. +float FloatImage::applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const +{ + nvDebugCheck(k != NULL); + + const uint kernelWindow = k->windowSize(); + const int kernelOffset = int(kernelWindow / 2); + + const float * channel = this->channel(c); + + float sum = 0.0f; + for (uint i = 0; i < kernelWindow; i++) + { + const int src_x = int(x + i) - kernelOffset; + const int idx = this->index(src_x, y, z, wm); + + sum += k->valueAt(i) * channel[idx]; + } + + return sum; +} + /// Apply 1D vertical kernel at the given coordinates and return result. -float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const +float FloatImage::applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const { - nvDebugCheck(k != NULL); - - const uint kernelWindow = k->windowSize(); - const int kernelOffset = int(kernelWindow / 2) - 1; - - const float * channel = this->channel(c); - - float sum = 0.0f; - for (uint i = 0; i < kernelWindow; i++) - { - const int src_y = int(y + i) - kernelOffset; - const int idx = this->index(x, src_y, wm); - - sum += k->valueAt(i) * channel[idx]; - } - - return sum; + nvDebugCheck(k != NULL); + + const uint kernelWindow = k->windowSize(); + const int kernelOffset = int(kernelWindow / 2); + + const float * channel = this->channel(c); + + float sum = 0.0f; + for (uint i = 0; i < kernelWindow; i++) + { + const int src_y = int(y + i) - kernelOffset; + const int idx = this->index(x, src_y, z, wm); + + sum += k->valueAt(i) * channel[idx]; + } + + return sum; } -/// Apply 1D horizontal kernel at the given coordinates and return result. -float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const +/// Apply 1D kernel in the z direction at the given coordinates and return result. +float FloatImage::applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const { - nvDebugCheck(k != NULL); - - const uint kernelWindow = k->windowSize(); - const int kernelOffset = int(kernelWindow / 2) - 1; - - const float * channel = this->channel(c); - - float sum = 0.0f; - for (uint e = 0; e < kernelWindow; e++) - { - const int src_x = int(x + e) - kernelOffset; - const int idx = this->index(src_x, y, wm); - - sum += k->valueAt(e) * channel[idx]; - } - - return sum; + nvDebugCheck(k != NULL); + + const uint kernelWindow = k->windowSize(); + const int kernelOffset = int(kernelWindow / 2); + + const float * channel = this->channel(c); + + float sum = 0.0f; + for (uint i = 0; i < kernelWindow; i++) + { + const int src_z = int(z + i) - kernelOffset; + const int idx = this->index(x, y, src_z, wm); + + sum += k->valueAt(i) * channel[idx]; + } + + return sum; } +/// Apply 1D horizontal kernel at the given coordinates and return result. +void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * __restrict output) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_width); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvDebugCheck(right - left <= windowSize); + + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(left + j, y, z, wm); + + sum += k.valueAt(i, j) * channel[idx]; + } + + output[i] = sum; + } +} + /// Apply 1D vertical kernel at the given coordinates and return result. -void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * __restrict output) const +void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * __restrict output) const { - const uint length = k.length(); - const float scale = float(length) / float(m_height); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvCheck(right - left <= windowSize); - - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(x, j+left, wm); - - sum += k.valueAt(i, j) * channel[idx]; - } - - output[i] = sum; - } + const uint length = k.length(); + const float scale = float(length) / float(m_height); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvCheck(right - left <= windowSize); + + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(x, j+left, z, wm); + + sum += k.valueAt(i, j) * channel[idx]; + } + + output[i] = sum; + } } -/// Apply 1D horizontal kernel at the given coordinates and return result. -void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * __restrict output) const +/// Apply 1D kernel in the Z direction at the given coordinates and return result. +void FloatImage::applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * __restrict output) const { - const uint length = k.length(); - const float scale = float(length) / float(m_width); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvDebugCheck(right - left <= windowSize); - - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(left + j, y, wm); - - sum += k.valueAt(i, j) * channel[idx]; - } - - output[i] = sum; - } + const uint length = k.length(); + const float scale = float(length) / float(m_height); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvCheck(right - left <= windowSize); + + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(x, y, j+left, wm); + + sum += k.valueAt(i, j) * channel[idx]; + } + + output[i] = sum; + } } +/// Apply 1D horizontal kernel at the given coordinates and return result. +void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * __restrict output) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_width); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + const float * alpha = this->channel(a); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvDebugCheck(right - left <= windowSize); + + float norm = 0.0f; + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(left + j, y, z, wm); + + float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); + norm += w; + sum += w * channel[idx]; + } + + output[i] = sum / norm; + } +} + /// Apply 1D vertical kernel at the given coordinates and return result. -void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * __restrict output) const +void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * __restrict output) const { - const uint length = k.length(); - const float scale = float(length) / float(m_height); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - const float * alpha = this->channel(a); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvCheck(right - left <= windowSize); - - float norm = 0; - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(x, j+left, wm); - - float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); - norm += w; - sum += w * channel[idx]; - } - - output[i] = sum / norm; - } + const uint length = k.length(); + const float scale = float(length) / float(m_height); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + const float * alpha = this->channel(a); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvCheck(right - left <= windowSize); + + float norm = 0; + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(x, j+left, z, wm); + + float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); + norm += w; + sum += w * channel[idx]; + } + + output[i] = sum / norm; + } } /// Apply 1D horizontal kernel at the given coordinates and return result. -void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * __restrict output) const +void FloatImage::applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * __restrict output) const { - const uint length = k.length(); - const float scale = float(length) / float(m_width); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - const float * alpha = this->channel(a); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvDebugCheck(right - left <= windowSize); - - float norm = 0.0f; - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(left + j, y, wm); - - float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); - norm += w; - sum += w * channel[idx]; - } - - output[i] = sum / norm; - } + const uint length = k.length(); + const float scale = float(length) / float(m_width); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + const float * alpha = this->channel(a); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvDebugCheck(right - left <= windowSize); + + float norm = 0.0f; + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(x, y, left + j, wm); + + float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); + norm += w; + sum += w * channel[idx]; + } + + output[i] = sum / norm; + } +} + + +void FloatImage::flipX() +{ + const uint w = m_width; + const uint h = m_height; + const uint d = m_depth; + const uint w2 = w / 2; + + for (uint c = 0; c < m_componentCount; c++) { + for (uint z = 0; z < d; z++) { + for (uint y = 0; y < h; y++) { + float * line = scanline(c, y, z); + for (uint x = 0; x < w2; x++) { + swap(line[x], line[w - 1 - x]); + } + } + } + } +} + +void FloatImage::flipY() +{ + const uint w = m_width; + const uint h = m_height; + const uint d = m_depth; + const uint h2 = h / 2; + + for (uint c = 0; c < m_componentCount; c++) { + for (uint z = 0; z < d; z++) { + for (uint y = 0; y < h2; y++) { + float * src = scanline(c, y, z); + float * dst = scanline(c, h - 1 - y, z); + for (uint x = 0; x < w; x++) { + swap(src[x], dst[x]); + } + } + } + } +} + +void FloatImage::flipZ() +{ + const uint w = m_width; + const uint h = m_height; + const uint d = m_depth; + const uint d2 = d / 2; + + for (uint c = 0; c < m_componentCount; c++) { + for (uint z = 0; z < d2; z++) { + float * src = plane(c, z); + float * dst = plane(c, d - 1 - z); + for (uint i = 0; i < w*h; i++) { + swap(src[i], dst[i]); + } + } + } +} + + + +float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale/*=1*/) const +{ + const uint w = m_width; + const uint h = m_height; + + float coverage = 0.0f; + +#if 0 + const float * alpha = channel(alphaChannel); + + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) { + if (alpha[i] > alphaRef) coverage += 1.0f; // @@ gt or lt? + } + + return coverage / float(w * h); +#else + const uint n = 8; + + // If we want subsampling: + for (uint y = 0; y < h-1; y++) { + for (uint x = 0; x < w-1; x++) { + + float alpha00 = nv::saturate(pixel(alphaChannel, x+0, y+0, 0) * alphaScale); + float alpha10 = nv::saturate(pixel(alphaChannel, x+1, y+0, 0) * alphaScale); + float alpha01 = nv::saturate(pixel(alphaChannel, x+0, y+1, 0) * alphaScale); + float alpha11 = nv::saturate(pixel(alphaChannel, x+1, y+1, 0) * alphaScale); + + for (float fy = 0.5f/n; fy < 1.0f; fy++) { + for (float fx = 0.5f/n; fx < 1.0f; fx++) { + float alpha = alpha00 * (1 - fx) * (1 - fy) + alpha10 * fx * (1 - fy) + alpha01 * (1 - fx) * fy + alpha11 * fx * fy; + if (alpha > alphaRef) coverage += 1.0f; + } + } + } + } + + return coverage / float(w * h * n * n); +#endif +} + +void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int alphaChannel) +{ +#if 0 + float minAlphaRef = 0.0f; + float maxAlphaRef = 1.0f; + float midAlphaRef = 0.5f; + + // Determine desired scale using a binary search. Hardcoded to 8 steps max. + for (int i = 0; i < 10; i++) { + float currentCoverage = alphaTestCoverage(midAlphaRef, alphaChannel); + + if (currentCoverage > desiredCoverage) { + minAlphaRef = midAlphaRef; + } + else if (currentCoverage < desiredCoverage) { + maxAlphaRef = midAlphaRef; + } + else { + break; + } + + midAlphaRef = (minAlphaRef + maxAlphaRef) * 0.5f; + } + + float alphaScale = alphaRef / midAlphaRef; + + // Scale alpha channel. + scaleBias(alphaChannel, 1, alphaScale, 0.0f); + clamp(alphaChannel, 1, 0.0f, 1.0f); +#else + float minAlphaScale = 0.0f; + float maxAlphaScale = 4.0f; + float alphaScale = 1.0f; + + // Determine desired scale using a binary search. Hardcoded to 8 steps max. + for (int i = 0; i < 10; i++) { + float currentCoverage = alphaTestCoverage(alphaRef, alphaChannel, alphaScale); + + if (currentCoverage < desiredCoverage) { + minAlphaScale = alphaScale; + } + else if (currentCoverage > desiredCoverage) { + maxAlphaScale = alphaScale; + } + else { + break; + } + + alphaScale = (minAlphaScale + maxAlphaScale) * 0.5f; + } + + // Scale alpha channel. + scaleBias(alphaChannel, 1, alphaScale, 0.0f); + clamp(alphaChannel, 1, 0.0f, 1.0f); +#endif +#if _DEBUG + alphaTestCoverage(alphaRef, alphaChannel); +#endif } FloatImage* FloatImage::clone() const { - FloatImage* copy = new FloatImage(); - copy->m_width = m_width; - copy->m_height = m_height; - copy->m_componentNum = m_componentNum; - copy->m_count = m_count; - - if(m_mem) - { - copy->allocate(m_componentNum, m_width, m_height); - memcpy(copy->m_mem, m_mem, m_count * sizeof(float)); - } - - return copy; + FloatImage* copy = new FloatImage(); + + copy->allocate(m_componentCount, m_width, m_height, m_depth); + memcpy(copy->m_mem, m_mem, m_floatCount * sizeof(float)); + + return copy; } Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/HoleFilling.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/HoleFilling.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/HoleFilling.h @@ -1,96 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_IMAGE_HOLEFILLING_H -#define NV_IMAGE_HOLEFILLING_H - -#include -#include - -namespace nv -{ - class FloatImage; - - /// Bit mask. - class BitMap - { - public: - BitMap(uint w, uint h) : - m_width(w), m_height(h), m_bitArray(w*h) - { - } - - const uint width() const { return m_width; } - const uint height() const { return m_height; } - - bool bitAt(uint x, uint y) const - { - nvDebugCheck(x < m_width && y < m_height); - return m_bitArray.bitAt(y * m_width + x); - } - bool bitAt(uint idx) const - { - return m_bitArray.bitAt(idx); - } - - void setBitAt(uint x, uint y) - { - nvDebugCheck(x < m_width && y < m_height); - m_bitArray.setBitAt(y * m_width + x); - } - void setBitAt(uint idx) - { - m_bitArray.setBitAt(idx); - } - - void clearBitAt(uint x, uint y) - { - nvDebugCheck(x < m_width && y < m_height); - m_bitArray.clearBitAt(y * m_width + x); - } - void clearBitAt(uint idx) - { - m_bitArray.clearBitAt(idx); - } - - void clearAll() - { - m_bitArray.clearAll(); - } - - void setAll() - { - m_bitArray.setAll(); - } - - void toggleAll() - { - m_bitArray.toggleAll(); - } - - friend void swap(BitMap & a, BitMap & b) - { - nvCheck(a.m_width == b.m_width); - nvCheck(a.m_height == b.m_height); - //swap(const_cast(a.m_width), const_cast(b.m_width)); - //swap(const_cast(a.m_height), const_cast(b.m_height)); - swap(a.m_bitArray, b.m_bitArray); - } - - private: - - const uint m_width; - const uint m_height; - BitArray m_bitArray; - - }; - - NVIMAGE_API void fillVoronoi(FloatImage * img, const BitMap * bmap); - NVIMAGE_API void fillBlur(FloatImage * img, const BitMap * bmap); - NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap * bmap); - - NVIMAGE_API void fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap); - NVIMAGE_API void fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1); - -} // nv namespace - -#endif // NV_IMAGE_HOLEFILLING_H Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/HoleFilling.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/HoleFilling.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/HoleFilling.cpp @@ -1,753 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include -#include - -#include - -#include -#include - -using namespace nv; - - -// This is a variation of Sapiro's inpainting method. -void nv::fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap) -{ - nvCheck(img != NULL); - nvCheck(bmap != NULL); - - const int w = img->width(); - const int h = img->height(); - const int count = img->componentNum(); - - nvCheck(bmap->width() == uint(w)); - nvCheck(bmap->height() == uint(h)); - - AutoPtr newbmap(new BitMap(w, h)); - - for(int p = 0; p < passCount; p++) - { - for(int c = 0; c < count; c++) - { - float * channel = img->channel(c); - - for(int y = 0; y < h; y++) { - for(int x = 0; x < w; x++) { - - if (bmap->bitAt(x, y)) { - // Not a hole. - newbmap->setBitAt(x, y); - continue; - } - - const bool west = bmap->bitAt(img->indexClamp(x-1, y)); - const bool east = bmap->bitAt(img->indexClamp(x+1, y)); - const bool north = bmap->bitAt(img->indexClamp(x, y-1)); - const bool south = bmap->bitAt(img->indexClamp(x, y+1)); - const bool northwest = bmap->bitAt(img->indexClamp(x-1, y-1)); - const bool northeast = bmap->bitAt(img->indexClamp(x+1, y-1)); - const bool southwest = bmap->bitAt(img->indexClamp(x-1, y+1)); - const bool southeast = bmap->bitAt(img->indexClamp(x+1, y+1)); - - int num = west + east + north + south + northwest + northeast + southwest + southeast; - - if (num != 0) { - - float average = 0.0f; - if (num == 3 && west && northwest && southwest) { - average = channel[img->indexClamp(x-1, y)]; - } - else if (num == 3 && east && northeast && southeast) { - average = channel[img->indexClamp(x+1, y)]; - } - else if (num == 3 && north && northwest && northeast) { - average = channel[img->indexClamp(x, y-1)]; - } - else if (num == 3 && south && southwest && southeast) { - average = channel[img->indexClamp(x, y+1)]; - } - else { - float total = 0.0f; - if (west) { average += 1 * channel[img->indexClamp(x-1, y)]; total += 1; } - if (east) { average += 1 * channel[img->indexClamp(x+1, y)]; total += 1; } - if (north) { average += 1 * channel[img->indexClamp(x, y-1)]; total += 1; } - if (south) { average += 1 * channel[img->indexClamp(x, y+1)]; total += 1; } - - if (northwest) { average += channel[img->indexClamp(x-1, y-1)]; ++total; } - if (northeast) { average += channel[img->indexClamp(x+1, y-1)]; ++total; } - if (southwest) { average += channel[img->indexClamp(x-1, y+1)]; ++total; } - if (southeast) { average += channel[img->indexClamp(x+1, y+1)]; ++total; } - - average /= total; - } - - channel[img->indexClamp(x, y)] = average; - newbmap->setBitAt(x, y); - } - } - } - } - - // Update the bit mask. - swap(*newbmap, *bmap); - } -} - - -namespace { - - struct Neighbor { - uint16 x; - uint16 y; - uint32 d; - }; - - // Compute euclidean squared distance. - static uint dist( uint16 ax, uint16 ay, uint16 bx, uint16 by ) { - int dx = bx - ax; - int dy = by - ay; - return uint(dx*dx + dy*dy); - } - - // Check neighbour, this is the core of the EDT algorithm. - static void checkNeighbour( int x, int y, Neighbor * e, const Neighbor & n ) { - nvDebugCheck(e != NULL); - - uint d = dist( x, y, n.x, n.y ); - if( d < e->d ) { - e->x = n.x; - e->y = n.y; - e->d = d; - } - } - -} // namespace - -// Voronoi filling using EDT-4 -void nv::fillVoronoi(FloatImage * img, const BitMap * bmap) -{ - nvCheck(img != NULL); - - const int w = img->width(); - const int h = img->height(); - const int count = img->componentNum(); - - nvCheck(bmap->width() == uint(w)); - nvCheck(bmap->height() == uint(h)); - - Array edm; - edm.resize(w * h); - - int x, y; - int x0, x1, y0, y1; - - // Init edm. - for( y = 0; y < h; y++ ) { - for( x = 0; x < w; x++ ) { - if( bmap->bitAt(x, y) ) { - edm[y * w + x].x = x; - edm[y * w + x].y = y; - edm[y * w + x].d = 0; - } - else { - edm[y * w + x].x = w; - edm[y * w + x].y = h; - edm[y * w + x].d = w*w + h*h; - } - } - } - - // First pass. - for( y = 0; y < h; y++ ) { - for( x = 0; x < w; x++ ) { - x0 = clamp(x-1, 0, w-1); // @@ Wrap? - x1 = clamp(x+1, 0, w-1); - y0 = clamp(y-1, 0, h-1); - - Neighbor & e = edm[y * w + x]; - checkNeighbour(x, y, &e, edm[y0 * w + x0]); - checkNeighbour(x, y, &e, edm[y0 * w + x]); - checkNeighbour(x, y, &e, edm[y0 * w + x1]); - checkNeighbour(x, y, &e, edm[y * w + x0]); - } - - for( x = w-1; x >= 0; x-- ) { - x1 = clamp(x+1, 0, w-1); - - Neighbor & e = edm[y * w + x]; - checkNeighbour(x, y, &e, edm[y * w + x1]); - } - } - - // Third pass. - for( y = h-1; y >= 0; y-- ) { - for( x = w-1; x >= 0; x-- ) { - x0 = clamp(x-1, 0, w-1); - x1 = clamp(x+1, 0, w-1); - y1 = clamp(y+1, 0, h-1); - - Neighbor & e = edm[y * w + x]; - checkNeighbour(x, y, &e, edm[y * w + x1]); - checkNeighbour(x, y, &e, edm[y1 * w + x0]); - checkNeighbour(x, y, &e, edm[y1 * w + x]); - checkNeighbour(x, y, &e, edm[y1 * w + x1]); - } - - for( x = 0; x < w; x++ ) { - x0 = clamp(x-1, 0, w-1); - - Neighbor & e = edm[y * w + x]; - checkNeighbour(x, y, &e, edm[y * w + x0]); - } - } - - // Fill empty holes. - for( y = 0; y < h; y++ ) { - for( x = 0; x < w; x++ ) { - const int sx = edm[y * w + x].x; - const int sy = edm[y * w + x].y; - nvDebugCheck(sx < w && sy < h); - - if( sx != x || sy != y ) { - for(int c = 0; c < count; c++ ) { - img->setPixel(img->pixel(sx, sy, c), x, y, c); - } - } - } - } - -} - - -void nv::fillBlur(FloatImage * img, const BitMap * bmap) -{ - nvCheck(img != NULL); - - // @@ Apply a 3x3 kernel. -} - - -static bool downsample(const FloatImage * src, const BitMap * srcMask, const FloatImage ** _dst, const BitMap ** _dstMask) -{ - const uint w = src->width(); - const uint h = src->height(); - const uint count = src->componentNum(); - - // count holes in srcMask, return false if fully filled. - uint holes = 0; - for(uint y = 0; y < h; y++) { - for(uint x = 0; x < w; x++) { - holes += srcMask->bitAt(x, y) == 0; - } - } - if (holes == 0 || (w == 2 || h == 2)) { - // Stop when no holes or when the texture is very small. - return false; - } - - // Apply box filter to image and mask and return true. - const uint nw = w / 2; - const uint nh = h / 2; - - FloatImage * dst = new FloatImage(); - dst->allocate(count, nw, nh); - BitMap * dstMask = new BitMap(nw, nh); - - for(uint c = 0; c < count; c++) { - for(uint y = 0; y < nh; y++) { - for(uint x = 0; x < nw; x++) { - - const uint x0 = 2 * x + 0; - const uint x1 = 2 * x + 1; - const uint y0 = 2 * y + 0; - const uint y1 = 2 * y + 1; - - const float f0 = src->pixel(x0, y0, c); - const float f1 = src->pixel(x1, y0, c); - const float f2 = src->pixel(x0, y1, c); - const float f3 = src->pixel(x1, y1, c); - - const bool b0 = srcMask->bitAt(x0, y0); - const bool b1 = srcMask->bitAt(x1, y0); - const bool b2 = srcMask->bitAt(x0, y1); - const bool b3 = srcMask->bitAt(x1, y1); - - if (b0 || b1 || b2 || b3) { - // Set bit mask. - dstMask->setBitAt(x, y); - - // Set pixel. - float value = 0.0f; - int total = 0; - if (b0) { value += f0; total++; } - if (b1) { value += f1; total++; } - if (b2) { value += f2; total++; } - if (b3) { value += f3; total++; } - dst->setPixel(value / total, x, y, c); - } - } - } - } - - *_dst = dst; - *_dstMask = dstMask; - - return true; -} - -// This is the filter used in the Lumigraph paper. -void nv::fillPullPush(FloatImage * img, const BitMap * bmap) -{ - nvCheck(img != NULL); - - const uint count = img->componentNum(); - const uint w = img->width(); - const uint h = img->height(); - const uint num = log2(max(w,h)); - - // Build mipmap chain. - Array mipmaps(num); - Array mipmapMasks(num); - - mipmaps.append(img); - mipmapMasks.append(bmap); - - const FloatImage * current; - const BitMap * currentMask; - - // Compute mipmap chain. - while(downsample(mipmaps.back(), mipmapMasks.back(), ¤t, ¤tMask)) - { - mipmaps.append(current); - mipmapMasks.append(currentMask); - } - - // Sample mipmaps until non-hole is found. - for(uint y = 0; y < h; y++) { - for(uint x = 0; x < w; x++) { - - int sx = x; - int sy = y; - //float sx = x; - //float sy = y; - - const uint levelCount = mipmaps.count(); - for (uint l = 0; l < levelCount; l++) - { - //const float fx = sx / mipmaps[l]->width(); - //const float fy = sy / mipmaps[l]->height(); - - if (mipmapMasks[l]->bitAt(sx, sy)) - { - // Sample mipmaps[l](sx, sy) and copy to img(x, y) - for(uint c = 0; c < count; c++) { - //img->setPixel(mipmaps[l]->linear_clamp(fx, fy, c), x, y, c); - img->setPixel(mipmaps[l]->pixel(sx, sy, c), x, y, c); - } - break; - } - - sx /= 2; - sy /= 2; - } - } - } - - // Don't delete the original image and mask. - mipmaps[0] = NULL; - mipmapMasks[0] = NULL; - - // Delete the mipmaps. - deleteAll(mipmaps); - deleteAll(mipmapMasks); -} - - - -/* - -This Code is from Charles Bloom: - -DoPixelSeamFix -10-20-02 - -Looks in the 5x5 local neighborhood (LocalPixels) of the desired pixel to fill. -It tries to build a quadratic model of the neighborhood surface to use in -extrapolating. You need 5 pixels to establish a 2d quadratic curve. - -This is really just a nice generic way to extrapolate pixels. It also happens -to work great for seam-fixing. - -Note that I'm working on normals, but I treat them just as 3 scalars and normalize -at the end. To be more correct, I would work on the surface of a sphere, but that -just seems like way too much work. - -*/ - -struct LocalPixels -{ - // 5x5 neighborhood - // the center is at result - // index [y][x] - bool fill[5][5]; - float data[5][5]; - - mutable float result; - mutable float weight; - - bool Quad3SubH(float * pQ, int row) const - { - const bool * pFill = fill[row]; - const float * pDat = data[row]; - - if ( pFill[1] && pFill[2] && pFill[3] ) - { - // good row - *pQ = pDat[1] - 2.f * pDat[2] + pDat[3]; - return true; - } - else if ( pFill[0] && pFill[1] && pFill[2] ) - { - // good row - *pQ = pDat[0] - 2.f * pDat[1] + pDat[2]; - return true; - } - else if ( pFill[2] && pFill[3] && pFill[4] ) - { - // good row - *pQ = pDat[2] - 2.f * pDat[3] + pDat[4]; - return true; - } - return false; - } - - // improve result with a horizontal quad in row 1 and/or - bool Quad3SubV(float * pQ, int col) const - { - if ( fill[1][col] && fill[2][col] && fill[3][col] ) - { - // good row - *pQ = data[1][col] - 2.f * data[2][col] + data[3][col]; - return true; - } - else if ( fill[0][col] && fill[1][col] && fill[2][col] ) - { - // good row - *pQ = data[0][col] - 2.f * data[1][col] + data[2][col]; - return true; - } - else if ( fill[2][col] && fill[3][col] && fill[4][col] ) - { - // good row - *pQ = data[2][col] - 2.f * data[3][col] + data[4][col]; - return true; - } - return false; - } - - bool Quad3H(float * pQ) const - { - if (!Quad3SubH(pQ,1)) - { - return Quad3SubH(pQ,3); - } - float q = 0.0f; // initializer not needed, just make it shut up - if (Quad3SubH(&q, 3)) - { - // got q and pQ - *pQ = (*pQ+q)*0.5f; - } - return true; - } - - bool Quad3V(float * pQ) const - { - if (!Quad3SubV(pQ, 1)) - { - return Quad3SubV(pQ, 3); - } - float q = 0.0f; // initializer not needed, just make it shut up - if (Quad3SubV(&q, 3)) - { - // got q and pQ - *pQ = (*pQ + q) * 0.5f; - } - return true; - } - // Quad returns ([0]+[2] - 2.f*[1]) - // a common want is [1] - ([0]+[2])*0.5f ; - // so use -0.5f*Quad - - bool tryQuads() const - { - bool res = false; - - // look for a pair that straddles the middle: - if ( fill[2][1] && fill[2][3] ) - { - // got horizontal straddle - float q; - if ( Quad3H(&q) ) - { - result += (data[2][1] + data[2][3] - q) * 0.5f; - weight += 1.f; - res = true; - } - } - if ( fill[1][2] && fill[3][2] ) - { - // got vertical straddle - float q; - if ( Quad3V(&q) ) - { - result += (data[1][2] + data[3][2] - q) * 0.5f; - weight += 1.f; - res = true; - } - } - - // look for pairs that lead into the middle : - if ( fill[2][0] && fill[2][1] ) - { - // got left-side pair - float q; - if ( Quad3H(&q) ) - { - result += data[2][1]*2.f - data[2][0] + q; - weight += 1.f; - res = true; - } - } - if ( fill[2][3] && fill[2][4] ) - { - // got right-side pair - float q; - if ( Quad3H(&q) ) - { - result += data[2][3]*2.f - data[2][4] + q; - weight += 1.f; - res = true; - } - } - if ( fill[0][2] && fill[1][2] ) - { - // got left-side pair - float q; - if ( Quad3V(&q) ) - { - result += data[1][2]*2.f - data[0][2] + q; - weight += 1.f; - res = true; - } - } - if ( fill[3][2] && fill[4][2] ) - { - // got right-side pair - float q; - if ( Quad3V(&q) ) - { - result += data[3][2]*2.f - data[4][2] + q; - weight += 1.f; - res = true; - } - } - return res; - } - - bool tryPlanar() const - { - // four cases : - const int indices[] = - { - 2,1, 1,2, 1,1, - 2,1, 3,2, 3,1, - 2,3, 1,2, 1,3, - 2,3, 3,2, 3,3 - }; - bool res = false; - for (int i = 0; i < 4; i++) - { - const int * I = indices + i*6; - if (!fill[ I[0] ][ I[1] ]) - continue; - if (!fill[ I[2] ][ I[3] ]) - continue; - if (!fill[ I[4] ][ I[5] ]) - continue; - - result += data[ I[0] ][ I[1] ] + data[ I[2] ][ I[3] ] - data[ I[4] ][ I[5] ]; - weight += 1.0f; - res = true; - } - return res; - } - - bool tryTwos() const - { - bool res = false; - - if (fill[2][1] && fill[2][3]) - { - result += (data[2][1] + data[2][3]) * 0.5f; - weight += 1.0f; - res = true; - } - if (fill[1][2] && fill[3][2]) - { - result += (data[1][2] + data[3][2]) * 0.5f; - weight += 1.0f; - res = true; - } - - // four side-rotates : - const int indices[] = - { - 2,1, 2,0, - 2,3, 2,4, - 1,2, 0,2, - 3,2, 4,2, - }; - for (int i = 0; i < 4; i++) - { - const int * I = indices + i*4; - if (!fill[ I[0] ][ I[1] ]) - continue; - if (!fill[ I[2] ][ I[3] ]) - continue; - - result += data[ I[0] ][ I[1] ]*2.0f - data[ I[2] ][ I[3] ]; - weight += 1.0f; - res = true; - } - - return res; - } - - bool doLocalPixelFill() const - { - result = 0.0f; - weight = 0.0f; - - if (tryQuads()) { - return true; - } - - if (tryPlanar()) { - return true; - } - - return tryTwos(); - } - -}; // struct LocalPixels - - - -// This is a quadratic extrapolation filter from Charles Bloom (DoPixelSeamFix). Used with his permission. -void nv::fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/) -{ - nvCheck(passCount > 0); - nvCheck(img != NULL); - nvCheck(bmap != NULL); - - const int w = img->width(); - const int h = img->height(); - const int count = img->componentNum(); - - nvCheck(bmap->width() == uint(w)); - nvCheck(bmap->height() == uint(h)); - - AutoPtr newbmap( new BitMap(w, h) ); - - float * coverageChannel = NULL; - if (coverageIndex != -1) - { - coverageChannel = img->channel(coverageIndex); - } - - int firstChannel = -1; - - for (int p = 0; p < passCount; p++) - { - for (int c = 0; c < count; c++) - { - if (c == coverageIndex) continue; - if (firstChannel == -1) firstChannel = c; - - float * channel = img->channel(c); - - for (int yb = 0; yb < h; yb++) { - for (int xb = 0; xb < w; xb++) { - - if (bmap->bitAt(xb, yb)) { - // Not a hole. - newbmap->setBitAt(xb, yb); - continue; - } - - int numFill = 0; - - LocalPixels lp; - for (int ny = 0; ny < 5; ny++) - { - int y = (yb + ny - 2); - if ( y < 0 || y >= h ) - { - // out of range - for(int i = 0; i < 5; i++) - { - lp.fill[ny][i] = false; - } - continue; - } - - for (int nx = 0; nx < 5; nx++) - { - int x = (xb + nx - 2); - if (x < 0 || x >= w) - { - lp.fill[ny][nx] = false; - } - else - { - int idx = img->index(x, y); - if (!bmap->bitAt(idx)) - { - lp.fill[ny][nx] = false; - } - else - { - lp.fill[ny][nx] = true; - lp.data[ny][nx] = channel[idx]; - numFill++; - } - } - } - } - - // need at least 3 to do anything decent - if (numFill < 2) - continue; - - nvDebugCheck(lp.fill[2][2] == false); - - if (lp.doLocalPixelFill()) - { - const int idx = img->index(xb, yb); - channel[idx] = lp.result / lp.weight; - - if (c == firstChannel) - { - //coverageChannel[idx] /= lp.weight; // @@ Not sure what this was for, coverageChannel[idx] is always zero. - newbmap->setBitAt(xb, yb); - } - } - } - } - } - - // Update the bit mask. - swap(*newbmap, *bmap); - } -} Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/Image.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/Image.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/Image.h @@ -1,81 +1,90 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_IMAGE_IMAGE_H #define NV_IMAGE_IMAGE_H -#include -#include +#include "nvimage.h" +#include "nvcore/Debug.h" + +#if NV_USE_ALTIVEC +#undef pixel +#endif namespace nv { - class Color32; - - /// 32 bit RGBA image. - class NVIMAGE_CLASS Image - { - public: - - enum Format - { - Format_RGB, - Format_ARGB, - }; - - Image(); - Image(const Image & img); - ~Image(); - - const Image & operator=(const Image & img); - - - void allocate(uint w, uint h); - bool load(const char * name); - - void wrap(void * data, uint w, uint h); - void unwrap(); - - uint width() const; - uint height() const; - - const Color32 * scanline(uint h) const; - Color32 * scanline(uint h); - - const Color32 * pixels() const; - Color32 * pixels(); - - const Color32 & pixel(uint idx) const; - Color32 & pixel(uint idx); - - const Color32 & pixel(uint x, uint y) const; - Color32 & pixel(uint x, uint y); - - Format format() const; - void setFormat(Format f); - - void fill(Color32 c); - - private: - void free(); - - private: - uint m_width; - uint m_height; - Format m_format; - Color32 * m_data; - }; - - - inline const Color32 & Image::pixel(uint x, uint y) const - { - nvDebugCheck(x < width() && y < height()); - return pixel(y * width() + x); - } - - inline Color32 & Image::pixel(uint x, uint y) - { - nvDebugCheck(x < width() && y < height()); - return pixel(y * width() + x); - } + class Color32; + + /// 32 bit RGBA image. + class NVIMAGE_CLASS Image + { + public: + + enum Format + { + Format_RGB, + Format_ARGB, + }; + + Image(); + Image(const Image & img); + ~Image(); + + const Image & operator=(const Image & img); + + + void allocate(uint w, uint h, uint d = 1); + bool load(const char * name); + + void resize(uint w, uint h, uint d = 1); + + void wrap(void * data, uint w, uint h, uint d = 1); + void unwrap(); + + uint width() const; + uint height() const; + uint depth() const; + + const Color32 * scanline(uint h) const; + Color32 * scanline(uint h); + + const Color32 * pixels() const; + Color32 * pixels(); + + const Color32 & pixel(uint idx) const; + Color32 & pixel(uint idx); + + const Color32 & pixel(uint x, uint y, uint z = 0) const; + Color32 & pixel(uint x, uint y, uint z = 0); + + Format format() const; + void setFormat(Format f); + + void fill(Color32 c); + + private: + void free(); + + private: + uint m_width; + uint m_height; + uint m_depth; + Format m_format; + Color32 * m_data; + }; + + + inline const Color32 & Image::pixel(uint x, uint y, uint z) const + { + nvDebugCheck(x < m_width && y < m_height && z < m_depth); + return pixel((z * m_height + y) * m_width + x); + } + + inline Color32 & Image::pixel(uint x, uint y, uint z) + { + nvDebugCheck(x < m_width && y < m_height && z < m_depth); + return pixel((z * m_height + y) * m_width + x); + } } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/Image.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/Image.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/Image.cpp @@ -1,12 +1,16 @@ // This code is in the public domain -- castanyo@yahoo.es -#include -#include +#include "Image.h" +#include "ImageIO.h" -#include +#include "nvmath/Color.h" -#include -#include +#include "nvcore/Debug.h" +#include "nvcore/Ptr.h" +#include "nvcore/Utils.h" // swap +#include "nvcore/Memory.h" // realloc, free + +#include // memcpy using namespace nv; @@ -17,133 +21,182 @@ Image::Image(const Image & img) : m_data(NULL) { - allocate(img.m_width, img.m_height); - m_format = img.m_format; - memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height); + allocate(img.m_width, img.m_height, img.m_depth); + m_format = img.m_format; + memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth); } Image::~Image() { - free(); + free(); } const Image & Image::operator=(const Image & img) { - allocate(img.m_width, img.m_height); - m_format = img.m_format; - memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height); - return *this; + allocate(img.m_width, img.m_height, m_depth); + m_format = img.m_format; + memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth); + return *this; } -void Image::allocate(uint w, uint h) -{ - m_width = w; - m_height = h; - m_data = (Color32 *)realloc(m_data, w * h * sizeof(Color32)); +void Image::allocate(uint w, uint h, uint d/*= 1*/) +{ + free(); + m_width = w; + m_height = h; + m_depth = d; + m_data = realloc(m_data, w * h * d); +} + +void Image::resize(uint w, uint h, uint d/*= 1*/) { + + Image img; + img.allocate(w, h, d); + + Color32 background(0,0,0,0); + + // Copy image. + uint x, y, z; + for(z = 0; z < min(d, m_depth); z++) { + for(y = 0; y < min(h, m_height); y++) { + for(x = 0; x < min(w, m_width); x++) { + img.pixel(x, y, z) = pixel(x, y, z); + } + for(; x < w; x++) { + img.pixel(x, y, z) = background; + } + } + for(; y < h; y++) { + for(x = 0; x < w; x++) { + img.pixel(x, y, z) = background; + } + } + } + for(; z < d; z++) { + for(y = 0; y < h; y++) { + for(x = 0; x < w; x++) { + img.pixel(x, y, z) = background; + } + } + } + + swap(m_width, img.m_width); + swap(m_height, img.m_height); + swap(m_depth, img.m_depth); + swap(m_format, img.m_format); + swap(m_data, img.m_data); } bool Image::load(const char * name) { - free(); - - AutoPtr img(ImageIO::load(name)); - if (img == NULL) { - return false; - } - - swap(m_width, img->m_width); - swap(m_height, img->m_height); - swap(m_format, img->m_format); - swap(m_data, img->m_data); - - return true; -} - -void Image::wrap(void * data, uint w, uint h) -{ - free(); - m_data = (Color32 *)data; - m_width = w; - m_height = h; + free(); + + AutoPtr img(ImageIO::load(name)); + if (img == NULL) { + return false; + } + + swap(m_width, img->m_width); + swap(m_height, img->m_height); + swap(m_depth, img->m_depth); + swap(m_format, img->m_format); + swap(m_data, img->m_data); + + return true; +} + +void Image::wrap(void * data, uint w, uint h, uint d) +{ + free(); + m_data = (Color32 *)data; + m_width = w; + m_height = h; + m_depth = d; } void Image::unwrap() { - m_data = NULL; - m_width = 0; - m_height = 0; + m_data = NULL; + m_width = 0; + m_height = 0; + m_depth = 0; } void Image::free() { - ::free(m_data); - m_data = NULL; + ::free(m_data); + m_data = NULL; } uint Image::width() const { - return m_width; + return m_width; } uint Image::height() const { - return m_height; + return m_height; +} + +uint Image::depth() const +{ + return m_depth; } const Color32 * Image::scanline(uint h) const { - nvDebugCheck(h < m_height); - return m_data + h * m_width; + nvDebugCheck(h < m_height); + return m_data + h * m_width; } Color32 * Image::scanline(uint h) { - nvDebugCheck(h < m_height); - return m_data + h * m_width; + nvDebugCheck(h < m_height); + return m_data + h * m_width; } const Color32 * Image::pixels() const { - return m_data; + return m_data; } Color32 * Image::pixels() { - return m_data; + return m_data; } const Color32 & Image::pixel(uint idx) const { - nvDebugCheck(idx < m_width * m_height); - return m_data[idx]; + nvDebugCheck(idx < m_width * m_height * m_depth); + return m_data[idx]; } Color32 & Image::pixel(uint idx) { - nvDebugCheck(idx < m_width * m_height); - return m_data[idx]; + nvDebugCheck(idx < m_width * m_height * m_depth); + return m_data[idx]; } Image::Format Image::format() const { - return m_format; + return m_format; } void Image::setFormat(Image::Format f) { - m_format = f; + m_format = f; } void Image::fill(Color32 c) { - const uint size = m_width * m_height; - for (uint i = 0; i < size; ++i) - { - m_data[i] = c; - } + const uint size = m_width * m_height * m_depth; + for (uint i = 0; i < size; ++i) + { + m_data[i] = c; + } } Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ImageIO.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ImageIO.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ImageIO.h @@ -1,58 +1,36 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_IMAGE_IMAGEIO_H #define NV_IMAGE_IMAGEIO_H -#include +#include "nvimage.h" + +#include "nvcore/StrLib.h" + namespace nv { - class Image; - class FloatImage; - class Stream; - - namespace ImageIO - { - NVIMAGE_API Image * load(const char * fileName); - NVIMAGE_API Image * load(const char * fileName, Stream & s); - - NVIMAGE_API FloatImage * loadFloat(const char * fileName); - NVIMAGE_API FloatImage * loadFloat(const char * fileName, Stream & s); - - NVIMAGE_API bool save(const char * fileName, Stream & s, Image * img); - NVIMAGE_API bool save(const char * fileName, Image * img); - NVIMAGE_API bool saveFloat(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); - - NVIMAGE_API Image * loadTGA(Stream & s); - NVIMAGE_API bool saveTGA(Stream & s, const Image * img); - - NVIMAGE_API Image * loadPSD(Stream & s); - -#if defined(HAVE_PNG) - NVIMAGE_API Image * loadPNG(Stream & s); -#endif - -#if defined(HAVE_JPEG) - NVIMAGE_API Image * loadJPG(Stream & s); -#endif - -#if defined(HAVE_TIFF) - NVIMAGE_API FloatImage * loadFloatTIFF(const char * fileName, Stream & s); - - NVIMAGE_API bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); -#endif - -#if defined(HAVE_OPENEXR) - NVIMAGE_API FloatImage * loadFloatEXR(const char * fileName, Stream & s); - - NVIMAGE_API bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); -#endif + class Image; + class FloatImage; + class Stream; + + namespace ImageIO + { + NVIMAGE_API Image * load(const char * fileName); + NVIMAGE_API Image * load(const char * fileName, Stream & s); + + NVIMAGE_API FloatImage * loadFloat(const char * fileName); + NVIMAGE_API FloatImage * loadFloat(const char * fileName, Stream & s); + + NVIMAGE_API bool save(const char * fileName, const Image * img, const char ** tags=NULL); // NULL terminated list. + NVIMAGE_API bool save(const char * fileName, Stream & s, const Image * img, const char ** tags=NULL); + + NVIMAGE_API bool saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount); + NVIMAGE_API bool saveFloat(const char * fileName, Stream & s, const FloatImage * fimage, uint baseComponent, uint componentCount); - // NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s); - // NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); + } // ImageIO namespace - } // ImageIO namespace - } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ImageIO.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ImageIO.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ImageIO.cpp @@ -1,752 +1,905 @@ // This code is in the public domain -- castanyo@yahoo.es -#include -#include -#include -#include -//#include // @@ Disable temporarily -#include - -#include - #include "ImageIO.h" #include "Image.h" #include "FloatImage.h" #include "TgaFile.h" #include "PsdFile.h" +#include "DirectDrawSurface.h" +#include "PixelFormat.h" + +#include "nvmath/Color.h" +#include "nvmath/Half.h" + +#include "nvcore/Ptr.h" +#include "nvcore/Utils.h" +#include "nvcore/Array.inl" +#include "nvcore/StrLib.h" +#include "nvcore/StdStream.h" +#include "nvcore/TextWriter.h" // Extern +#if defined(HAVE_FREEIMAGE) +# include +// If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms. +# undef HAVE_JPEG +# undef HAVE_PNG +# undef HAVE_TIFF +# undef HAVE_OPENEXR +#endif + #if defined(HAVE_JPEG) extern "C" { -# include +# include } #endif #if defined(HAVE_PNG) -# include +# include #endif #if defined(HAVE_TIFF) -# define _TIFF_DATA_TYPEDEFS_ -# include +# define _TIFF_DATA_TYPEDEFS_ +# include #endif #if defined(HAVE_OPENEXR) -# include -# include -# include -# include -# include -# include +# include +# include +# include +# include +# include +# include #endif +#if defined(HAVE_STBIMAGE) +# define STBI_NO_STDIO +# include +#endif + + using namespace nv; -namespace { - // Array of image load plugins. -// static HashMap s_plugin_load_map; - // Array of image save plugins. -// static HashMap s_plugin_save_map; - - struct Color555 { - uint16 b : 5; - uint16 g : 5; - uint16 r : 5; - }; - -} // namespace +struct Color555 { + uint16 b : 5; + uint16 g : 5; + uint16 r : 5; +}; + +// Load TGA image. +static Image * loadTGA(Stream & s) +{ + nvCheck(!s.isError()); + nvCheck(s.isLoading()); + + TgaHeader tga; + s << tga; + s.seek(TgaHeader::Size + tga.id_length); + + // Get header info. + bool rle = false; + bool pal = false; + bool rgb = false; + bool grey = false; + + switch( tga.image_type ) { + case TGA_TYPE_RLE_INDEXED: + rle = true; + // no break is intended! + case TGA_TYPE_INDEXED: + if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) { + nvDebug( "*** loadTGA: Error, only 24bit paletted images are supported.\n" ); + return NULL; + } + pal = true; + break; + case TGA_TYPE_RLE_RGB: + rle = true; + // no break is intended! + case TGA_TYPE_RGB: + rgb = true; + break; + + case TGA_TYPE_RLE_GREY: + rle = true; + // no break is intended! + case TGA_TYPE_GREY: + grey = true; + break; + + default: + nvDebug( "*** loadTGA: Error, unsupported image type.\n" ); + return NULL; + } -Image * nv::ImageIO::load(const char * fileName) -{ - nvDebugCheck(fileName != NULL); + const uint pixel_size = (tga.pixel_size/8); + nvDebugCheck(pixel_size <= 4); - StdInputStream stream(fileName); - - if (stream.isError()) { - return NULL; - } - - return ImageIO::load(fileName, stream); -} + const uint size = tga.width * tga.height * pixel_size; -Image * nv::ImageIO::load(const char * fileName, Stream & s) -{ - nvDebugCheck(fileName != NULL); - nvDebugCheck(s.isLoading()); - const char * extension = Path::extension(fileName); - - if (strCaseCmp(extension, ".tga") == 0) { - return ImageIO::loadTGA(s); - } -#if defined(HAVE_JPEG) - if (strCaseCmp(extension, ".jpg") == 0 || strCaseCmp(extension, ".jpeg") == 0) { - return loadJPG(s); - } -#endif -#if defined(HAVE_PNG) - if (strCaseCmp(extension, ".png") == 0) { - return loadPNG(s); - } -#endif - if (strCaseCmp(extension, ".psd") == 0) { - return loadPSD(s); + // Read palette + uint8 palette[768]; + if( pal ) { + nvDebugCheck(tga.colormap_length <= 256); + s.serialize(palette, 3 * tga.colormap_length); + } + + // Decode image. + uint8 * mem = new uint8[size]; + if( rle ) { + // Decompress image in src. + uint8 * dst = mem; + int num = size; + + while (num > 0) { + // Get packet header + uint8 c; + s << c; + + uint count = (c & 0x7f) + 1; + num -= count * pixel_size; + + if (c & 0x80) { + // RLE pixels. + uint8 pixel[4]; // uint8 pixel[pixel_size]; + s.serialize( pixel, pixel_size ); + do { + memcpy(dst, pixel, pixel_size); + dst += pixel_size; + } while (--count); + } + else { + // Raw pixels. + count *= pixel_size; + //file->Read8(dst, count); + s.serialize(dst, count); + dst += count; + } } - // @@ use image plugins? - return NULL; + } + else { + s.serialize(mem, size); + } + + // Allocate image. + AutoPtr img(new Image()); + img->allocate(tga.width, tga.height); + + int lstep; + Color32 * dst; + if( tga.flags & TGA_ORIGIN_UPPER ) { + lstep = tga.width; + dst = img->pixels(); + } + else { + lstep = - tga.width; + dst = img->pixels() + (tga.height-1) * tga.width; + } + + // Write image. + uint8 * src = mem; + if( pal ) { + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + uint8 idx = *src++; + dst[x].setBGRA(palette[3*idx+0], palette[3*idx+1], palette[3*idx+2], 0xFF); + } + dst += lstep; + } + } + else if( grey ) { + img->setFormat(Image::Format_ARGB); + + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + dst[x].setBGRA(*src, *src, *src, *src); + src++; + } + dst += lstep; + } + } + else { + + if( tga.pixel_size == 16 ) { + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + Color555 c = *reinterpret_cast(src); + uint8 b = (c.b << 3) | (c.b >> 2); + uint8 g = (c.g << 3) | (c.g >> 2); + uint8 r = (c.r << 3) | (c.r >> 2); + dst[x].setBGRA(b, g, r, 0xFF); + src += 2; + } + dst += lstep; + } + } + else if( tga.pixel_size == 24 ) { + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + dst[x].setBGRA(src[0], src[1], src[2], 0xFF); + src += 3; + } + dst += lstep; + } + } + else if( tga.pixel_size == 32 ) { + img->setFormat(Image::Format_ARGB); + + for( int y = 0; y < tga.height; y++ ) { + for( int x = 0; x < tga.width; x++ ) { + dst[x].setBGRA(src[0], src[1], src[2], src[3]); + src += 4; + } + dst += lstep; + } + } + } + + // free uncompressed data. + delete [] mem; + + return img.release(); } -bool nv::ImageIO::save(const char * fileName, Stream & s, Image * img) +// Save TGA image. +static bool saveTGA(Stream & s, const Image * img) { - nvDebugCheck(fileName != NULL); - nvDebugCheck(s.isSaving()); - nvDebugCheck(img != NULL); + nvCheck(!s.isError()); + nvCheck(img != NULL); + nvCheck(img->pixels() != NULL); + + TgaFile tga; + tga.head.id_length = 0; + tga.head.colormap_type = 0; + tga.head.image_type = TGA_TYPE_RGB; + + tga.head.colormap_index = 0; + tga.head.colormap_length = 0; + tga.head.colormap_size = 0; + + tga.head.x_origin = 0; + tga.head.y_origin = 0; + tga.head.width = img->width(); + tga.head.height = img->height(); + if(img->format() == Image::Format_ARGB) { + tga.head.pixel_size = 32; + tga.head.flags = TGA_ORIGIN_UPPER | TGA_HAS_ALPHA; + } + else { + tga.head.pixel_size = 24; + tga.head.flags = TGA_ORIGIN_UPPER; + } - const char * extension = Path::extension(fileName); + // @@ Serialize directly. + tga.allocate(); - if (strCaseCmp(extension, ".tga") == 0) { - return ImageIO::saveTGA(s, img); - } + const uint n = img->width() * img->height(); + if(img->format() == Image::Format_ARGB) { + for(uint i = 0; i < n; i++) { + Color32 color = img->pixel(i); + tga.mem[4 * i + 0] = color.b; + tga.mem[4 * i + 1] = color.g; + tga.mem[4 * i + 2] = color.r; + tga.mem[4 * i + 3] = color.a; + } + } + else { + for(uint i = 0; i < n; i++) { + Color32 color = img->pixel(i); + tga.mem[3 * i + 0] = color.b; + tga.mem[3 * i + 1] = color.g; + tga.mem[3 * i + 2] = color.r; + } + } - return false; + s << tga; + + tga.free(); + + return true; } -bool nv::ImageIO::save(const char * fileName, Image * img) +/*static Image * loadPPM(Stream & s) { - nvDebugCheck(fileName != NULL); - nvDebugCheck(img != NULL); + // @@ + return NULL; +}*/ - StdOutputStream stream(fileName); - if (stream.isError()) - { - return false; - } +// Save PPM image. +static bool savePPM(Stream & s, const Image * img) +{ + //if (img->depth() != 1) return false; + //if (img->format() == Image::Format_ARGB) return false; - return ImageIO::save(fileName, stream, img); + uint w = img->width(); + uint h = img->height(); + + TextWriter writer(&s); + writer.format("P6\n"); + writer.format("%d %d\n", w, h); + writer.writeString("255\n"); + for (uint i = 0; i < w * h; i++) { + Color32 c = img->pixel(i); + s << (uint8_t&)c.r << (uint8_t&)c.g << (uint8_t&)c.b; + } + + return true; } -FloatImage * nv::ImageIO::loadFloat(const char * fileName) + +/*static FloatImage * loadFloatPFM(Stream & s) { - nvDebugCheck(fileName != NULL); + return NULL; +}*/ - StdInputStream stream(fileName); - - if (stream.isError()) { - return NULL; - } - - return loadFloat(fileName, stream); -} +/*static bool saveFloatPFM(Stream & s, const FloatImage * img, uint base_channel, uint channel_count) +{ + return false; +}*/ -FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s) +// Load PSD image. +static Image * loadPSD(Stream & s) { - nvDebugCheck(fileName != NULL); + nvCheck(!s.isError()); + nvCheck(s.isLoading()); - const char * extension = Path::extension(fileName); - -#if defined(HAVE_TIFF) - if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) { - return loadFloatTIFF(fileName, s); - } -#endif -#if defined(HAVE_OPENEXR) - if (strCaseCmp(extension, ".exr") == 0) { - return loadFloatEXR(fileName, s); - } -#endif + s.setByteOrder(Stream::BigEndian); -/* // @@ Disable temporarily - if (strCaseCmp(extension, ".pfm") == 0) { - return loadFloatPFM(fileName, s); - } -*/ + PsdHeader header; + s << header; - return NULL; -} + if (!header.isValid()) + { + printf("invalid header!\n"); + return NULL; + } + if (!header.isSupported()) + { + printf("unsupported file!\n"); + return NULL; + } -bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) -{ - const char * extension = Path::extension(fileName); + int tmp; -#if defined(HAVE_OPENEXR) - if (strCaseCmp(extension, ".exr") == 0) - { - return ImageIO::saveFloatEXR(fileName, fimage, base_component, num_components); - } -#endif + // Skip mode data. + s << tmp; + s.seek(s.tell() + tmp); + + // Skip image resources. + s << tmp; + s.seek(s.tell() + tmp); + + // Skip the reserved data. + s << tmp; + s.seek(s.tell() + tmp); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + uint16 compression; + s << compression; + + if (compression > 1) { + // Unknown compression type. + return NULL; + } -#if defined(HAVE_TIFF) - if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) - { - return ImageIO::saveFloatTIFF(fileName, fimage, base_component, num_components); - } -#endif + uint channel_num = header.channel_count; -/* // @@ Disable Temporarily - if (strCaseCmp(extension, ".pfm") == 0) - { -// return ImageIO::saveFloatPFM(fileName, fimage, base_component, num_components); - } -*/ + AutoPtr img(new Image()); + img->allocate(header.width, header.height); - if (num_components == 3 || num_components == 4) - { - AutoPtr image(fimage->createImage(base_component, num_components)); - nvCheck(image != NULL); + if (channel_num < 4) + { + // Clear the image. + img->fill(Color32(0, 0, 0, 0xFF)); + } + else + { + // Enable alpha. + img->setFormat(Image::Format_ARGB); - if (num_components == 4) - { - image->setFormat(Image::Format_ARGB); - } + // Ignore remaining channels. + channel_num = 4; + } - return ImageIO::save(fileName, image.ptr()); - } - return false; -} + const uint pixel_count = header.height * header.width; + + static const uint components[4] = {2, 1, 0, 3}; + + if (compression) + { + s.seek(s.tell() + header.height * header.channel_count * sizeof(uint16)); + + // Read RLE data. + for (uint channel = 0; channel < channel_num; channel++) + { + uint8 * ptr = (uint8 *)img->pixels() + components[channel]; + + uint count = 0; + while( count < pixel_count ) + { + if (s.isAtEnd()) return NULL; + + uint8 c; + s << c; + + uint len = c; + if (len < 128) + { + // Copy next len+1 bytes literally. + len++; + count += len; + if (count > pixel_count) return NULL; + + while (len != 0) + { + s << *ptr; + ptr += 4; + len--; + } + } + else if (len > 128) + { + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len ^= 0xFF; + len += 2; + count += len; + if (s.isAtEnd() || count > pixel_count) return NULL; + + uint8 val; + s << val; + while( len != 0 ) { + *ptr = val; + ptr += 4; + len--; + } + } + else if( len == 128 ) { + // No-op. + } + } + } + } + else + { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit value for each pixel in the image. + + // Read the data by channel. + for (uint channel = 0; channel < channel_num; channel++) + { + uint8 * ptr = (uint8 *)img->pixels() + components[channel]; + + // Read the data. + uint count = pixel_count; + while (count != 0) + { + s << *ptr; + ptr += 4; + count--; + } + } + } + return img.release(); +} -/// Load TGA image. -Image * nv::ImageIO::loadTGA(Stream & s) +static FloatImage * loadFloatDDS(Stream & s) { - nvCheck(!s.isError()); - nvCheck(s.isLoading()); - - TgaHeader tga; - s << tga; - s.seek(TgaHeader::Size + tga.id_length); - - // Get header info. - bool rle = false; - bool pal = false; - bool rgb = false; - bool grey = false; - - switch( tga.image_type ) { - case TGA_TYPE_RLE_INDEXED: - rle = true; - // no break is intended! - case TGA_TYPE_INDEXED: - if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) { - nvDebug( "*** ImageIO::loadTGA: Error, only 24bit paletted images are supported.\n" ); - return NULL; - } - pal = true; - break; - - case TGA_TYPE_RLE_RGB: - rle = true; - // no break is intended! - case TGA_TYPE_RGB: - rgb = true; - break; - - case TGA_TYPE_RLE_GREY: - rle = true; - // no break is intended! - case TGA_TYPE_GREY: - grey = true; - break; - - default: - nvDebug( "*** ImageIO::loadTGA: Error, unsupported image type.\n" ); - return NULL; - } - - const uint pixel_size = (tga.pixel_size/8); - nvDebugCheck(pixel_size <= 4); - - const uint size = tga.width * tga.height * pixel_size; - - - // Read palette - uint8 palette[768]; - if( pal ) { - nvDebugCheck(tga.colormap_length < 256); - s.serialize(palette, 3 * tga.colormap_length); - } + nvCheck(s.isLoading()); + nvCheck(!s.isError()); - // Decode image. - uint8 * mem = new uint8[size]; - if( rle ) { - // Decompress image in src. - uint8 * dst = mem; - int num = size; - - while (num > 0) { - // Get packet header - uint8 c; - s << c; - - uint count = (c & 0x7f) + 1; - num -= count * pixel_size; - - if (c & 0x80) { - // RLE pixels. - uint8 pixel[4]; // uint8 pixel[pixel_size]; - s.serialize( pixel, pixel_size ); - do { - memcpy(dst, pixel, pixel_size); - dst += pixel_size; - } while (--count); - } - else { - // Raw pixels. - count *= pixel_size; - //file->Read8(dst, count); - s.serialize(dst, count); - dst += count; - } - } - } - else { - s.serialize(mem, size); - } + DDSHeader header; + s << header; - // Allocate image. - AutoPtr img(new Image()); - img->allocate(tga.width, tga.height); - - int lstep; - Color32 * dst; - if( tga.flags & TGA_ORIGIN_UPPER ) { - lstep = tga.width; - dst = img->pixels(); - } - else { - lstep = - tga.width; - dst = img->pixels() + (tga.height-1) * tga.width; - } + // @@ We only support a few formats for now. - // Write image. - uint8 * src = mem; - if( pal ) { - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - uint8 idx = *src++; - dst[x].setBGRA(palette[3*idx+0], palette[3*idx+1], palette[3*idx+2], 0xFF); - } - dst += lstep; - } - } - else if( grey ) { - img->setFormat(Image::Format_ARGB); - - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - dst[x].setBGRA(*src, *src, *src, *src); - src++; - } - dst += lstep; - } - } - else { - - if( tga.pixel_size == 16 ) { - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - Color555 c = *reinterpret_cast(src); - uint8 b = (c.b << 3) | (c.b >> 2); - uint8 g = (c.g << 3) | (c.g >> 2); - uint8 r = (c.r << 3) | (c.r >> 2); - dst[x].setBGRA(b, g, r, 0xFF); - src += 2; - } - dst += lstep; - } - } - else if( tga.pixel_size == 24 ) { - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - dst[x].setBGRA(src[0], src[1], src[2], 0xFF); - src += 3; - } - dst += lstep; - } - } - else if( tga.pixel_size == 32 ) { - img->setFormat(Image::Format_ARGB); - - for( int y = 0; y < tga.height; y++ ) { - for( int x = 0; x < tga.width; x++ ) { - dst[x].setBGRA(src[0], src[1], src[2], src[3]); - src += 4; - } - dst += lstep; - } - } - } + if (header.pf.fourcc == D3DFMT_A16B16G16R16F) { + const int size = header.width * header.height; + uint16 * const data = new uint16[size * 4]; + + //s.serialize(data, size * 4 * sizeof(uint16)); + for (int i = 0; i < 4* size; i++) { + s << data[i]; + } + + FloatImage * img = new FloatImage; + img->allocate(4, header.width, header.height); + + uint32 * r = (uint32 *)img->channel(0); + uint32 * g = (uint32 *)img->channel(1); + uint32 * b = (uint32 *)img->channel(2); + uint32 * a = (uint32 *)img->channel(3); + + uint16 * ptr = data; + for (int i = 0; i < size; i++) { + *r++ = half_to_float( *ptr++ ); + *g++ = half_to_float( *ptr++ ); + *b++ = half_to_float( *ptr++ ); + *a++ = half_to_float( *ptr++ ); + } + + delete [] data; + + return img; + } + else if (header.pf.fourcc == D3DFMT_R32F) { + const int size = header.width * header.height; + float * const data = new float[size]; + + for (int i = 0; i < size; i++) { + s << data[i]; + } + + FloatImage * img = new FloatImage; + img->allocate(4, header.width, header.height); + + float * r = img->channel(0); + + float * ptr = data; + for (int i = 0; i < size; i++) { + *r++ = *ptr++; + } + + delete [] data; + + img->clear(1, 0.0f); + img->clear(2, 0.0f); + img->clear(3, 1.0f); + + return img; + } + else if (header.pf.fourcc == D3DFMT_L16 || (header.pf.bitcount == 16 && header.pf.rmask == 0xFFFF && header.pf.gmask == 0 && header.pf.bmask == 0 && header.pf.amask == 0)) + { + const int size = header.width * header.height; + uint16 * const data = new uint16[size]; + + for (int i = 0; i < size; i++) { + s << data[i]; + } + + FloatImage * img = new FloatImage; + img->allocate(4, header.width, header.height); + + float * r = img->channel(0); + + uint16 * ptr = data; + for (int i = 0; i < size; i++) { + *r++ = float(*ptr++) / 65535.0f; + } + + delete [] data; + + img->clear(1, 0.0f); + img->clear(2, 0.0f); + img->clear(3, 1.0f); + + return img; + } + else if (header.pf.fourcc == D3DFMT_L8 || (header.pf.bitcount == 8 && header.pf.rmask == 0xFF && header.pf.gmask == 0 && header.pf.bmask == 0 && header.pf.amask == 0)) + { + const int size = header.width * header.height; + uint8 * const data = new uint8[size]; - // free uncompressed data. - delete [] mem; + s.serialize(data, size); - return img.release(); + FloatImage * img = new FloatImage; + img->allocate(4, header.width, header.height); + + float * r = img->channel(0); + + uint8 * ptr = data; + for (int i = 0; i < size; i++) { + *r++ = float(*ptr++) / 255.0f; + } + + delete [] data; + + img->clear(1, 0.0f); + img->clear(2, 0.0f); + img->clear(3, 1.0f); + + return img; + } + return NULL; } -/// Save TGA image. -bool nv::ImageIO::saveTGA(Stream & s, const Image * img) +static bool saveFloatDDS(Stream & s, const FloatImage * img, uint base_component, uint num_components) { - nvCheck(!s.isError()); - nvCheck(img != NULL); - nvCheck(img->pixels() != NULL); - - TgaFile tga; - tga.head.id_length = 0; - tga.head.colormap_type = 0; - tga.head.image_type = TGA_TYPE_RGB; - - tga.head.colormap_index = 0; - tga.head.colormap_length = 0; - tga.head.colormap_size = 0; - - tga.head.x_origin = 0; - tga.head.y_origin = 0; - tga.head.width = img->width(); - tga.head.height = img->height(); - if(img->format() == Image::Format_ARGB) { - tga.head.pixel_size = 32; - tga.head.flags = TGA_ORIGIN_UPPER | TGA_HAS_ALPHA; - } - else { - tga.head.pixel_size = 24; - tga.head.flags = TGA_ORIGIN_UPPER; - } + nvCheck(s.isSaving()); + nvCheck(!s.isError()); - // @@ Serialize directly. - tga.allocate(); + if (num_components != 4) return false; - const uint n = img->width() * img->height(); - if(img->format() == Image::Format_ARGB) { - for(uint i = 0; i < n; i++) { - Color32 color = img->pixel(i); - tga.mem[4 * i + 0] = color.b; - tga.mem[4 * i + 1] = color.g; - tga.mem[4 * i + 2] = color.r; - tga.mem[4 * i + 3] = color.a; - } - } - else { - for(uint i = 0; i < n; i++) { - Color32 color = img->pixel(i); - tga.mem[3 * i + 0] = color.b; - tga.mem[3 * i + 1] = color.g; - tga.mem[3 * i + 2] = color.r; - } - } + static const uint D3DFMT_A16B16G16R16F = 113; - s << tga; - - tga.free(); - - return true; -} - -/// Load PSD image. -Image * nv::ImageIO::loadPSD(Stream & s) -{ - nvCheck(!s.isError()); - nvCheck(s.isLoading()); - - s.setByteOrder(Stream::BigEndian); - - PsdHeader header; - s << header; - - if (!header.isValid()) - { - printf("invalid header!\n"); - return NULL; - } - - if (!header.isSupported()) - { - printf("unsupported file!\n"); - return NULL; - } - - int tmp; - - // Skip mode data. - s << tmp; - s.seek(s.tell() + tmp); - - // Skip image resources. - s << tmp; - s.seek(s.tell() + tmp); - - // Skip the reserved data. - s << tmp; - s.seek(s.tell() + tmp); - - // Find out if the data is compressed. - // Known values: - // 0: no compression - // 1: RLE compressed - uint16 compression; - s << compression; - - if (compression > 1) { - // Unknown compression type. - return NULL; - } - - uint channel_num = header.channel_count; - - AutoPtr img(new Image()); - img->allocate(header.width, header.height); - - if (channel_num < 4) - { - // Clear the image. - img->fill(Color32(0, 0, 0, 0xFF)); - } - else - { - // Enable alpha. - img->setFormat(Image::Format_ARGB); - - // Ignore remaining channels. - channel_num = 4; - } - - - const uint pixel_count = header.height * header.width; - - static const uint components[4] = {2, 1, 0, 3}; - - if (compression) - { - s.seek(s.tell() + header.height * header.channel_count * sizeof(uint16)); - - // Read RLE data. - for (uint channel = 0; channel < channel_num; channel++) - { - uint8 * ptr = (uint8 *)img->pixels() + components[channel]; - - uint count = 0; - while( count < pixel_count ) - { - if (s.isAtEnd()) return NULL; - - uint8 c; - s << c; - - uint len = c; - if (len < 128) - { - // Copy next len+1 bytes literally. - len++; - count += len; - if (count > pixel_count) return NULL; - - while (len != 0) - { - s << *ptr; - ptr += 4; - len--; - } - } - else if (len > 128) - { - // Next -len+1 bytes in the dest are replicated from next source byte. - // (Interpret len as a negative 8-bit int.) - len ^= 0xFF; - len += 2; - count += len; - if (s.isAtEnd() || count > pixel_count) return NULL; - - uint8 val; - s << val; - while( len != 0 ) { - *ptr = val; - ptr += 4; - len--; - } - } - else if( len == 128 ) { - // No-op. - } - } - } - } - else - { - // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) - // where each channel consists of an 8-bit value for each pixel in the image. - - // Read the data by channel. - for (uint channel = 0; channel < channel_num; channel++) - { - uint8 * ptr = (uint8 *)img->pixels() + components[channel]; - - // Read the data. - uint count = pixel_count; - while (count != 0) - { - s << *ptr; - ptr += 4; - count--; - } - } - } + DDSHeader header; + header.setTexture2D(); + header.setWidth(img->width()); + header.setHeight(img->height()); + header.setFormatCode(D3DFMT_A16B16G16R16F); + // ... + + s << header; - return img.release(); + uint32 * r = (uint32 *)img->channel(base_component + 0); + uint32 * g = (uint32 *)img->channel(base_component + 1); + uint32 * b = (uint32 *)img->channel(base_component + 2); + uint32 * a = (uint32 *)img->channel(base_component + 3); + + const uint size = img->width() * img->height(); + for (uint i = 0; i < size; i++) { + uint16 R = half_from_float( *r++ ); + uint16 G = half_from_float( *g++ ); + uint16 B = half_from_float( *b++ ); + uint16 A = half_from_float( *a++ ); + + s.serialize(&R, sizeof(uint16)); + s.serialize(&G, sizeof(uint16)); + s.serialize(&B, sizeof(uint16)); + s.serialize(&A, sizeof(uint16)); + } + + return true; } + #if defined(HAVE_PNG) static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length) { - nvDebugCheck(png_ptr != NULL); - - Stream * s = (Stream *)png_get_io_ptr(png_ptr); - s->serialize(data, (int)length); - - if (s->isError()) { - png_error(png_ptr, "Read Error"); - } + nvDebugCheck(png_ptr != NULL); + + Stream * s = (Stream *)png_get_io_ptr(png_ptr); + s->serialize(data, (int)length); + + if (s->isError()) { + png_error(png_ptr, "Read Error"); + } } -Image * nv::ImageIO::loadPNG(Stream & s) +static Image * loadPNG(Stream & s) { - nvCheck(!s.isError()); - - // Set up a read buffer and check the library version - png_structp png_ptr; - png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - if (png_ptr == NULL) { - // nvDebug( "*** LoadPNG: Error allocating read buffer in file '%s'.\n", name ); - return NULL; - } + nvCheck(!s.isError()); - // Allocate/initialize a memory block for the image information - png_infop info_ptr = png_create_info_struct(png_ptr); - if (info_ptr == NULL) { - png_destroy_read_struct(&png_ptr, NULL, NULL); - // nvDebug( "*** LoadPNG: Error allocating image information for '%s'.\n", name ); - return NULL; - } + // Set up a read buffer and check the library version + png_structp png_ptr; + png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + if (png_ptr == NULL) { + // nvDebug( "*** LoadPNG: Error allocating read buffer in file '%s'.\n", name ); + return NULL; + } - // Set up the error handling - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - // nvDebug( "*** LoadPNG: Error reading png file '%s'.\n", name ); - return NULL; - } + // Allocate/initialize a memory block for the image information + png_infop info_ptr = png_create_info_struct(png_ptr); + if (info_ptr == NULL) { + png_destroy_read_struct(&png_ptr, NULL, NULL); + // nvDebug( "*** LoadPNG: Error allocating image information for '%s'.\n", name ); + return NULL; + } - // Set up the I/O functions. - png_set_read_fn(png_ptr, (void*)&s, user_read_data); + // Set up the error handling + if (setjmp(png_jmpbuf(png_ptr))) { + png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + // nvDebug( "*** LoadPNG: Error reading png file '%s'.\n", name ); + return NULL; + } + // Set up the I/O functions. + png_set_read_fn(png_ptr, (void*)&s, user_read_data); - // Retrieve the image header information - png_uint_32 width, height; - int bit_depth, color_type, interlace_type; - png_read_info(png_ptr, info_ptr); - png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL); + // Retrieve the image header information + png_uint_32 width, height; + int bit_depth, color_type, interlace_type; + png_read_info(png_ptr, info_ptr); + png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL); - if (color_type == PNG_COLOR_TYPE_PALETTE && bit_depth <= 8) { - // Convert indexed images to RGB. - png_set_expand(png_ptr); - } - else if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) { - // Convert grayscale to RGB. - png_set_expand(png_ptr); - } - else if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) { - // Expand images with transparency to full alpha channels - // so the data will be available as RGBA quartets. - png_set_expand(png_ptr); - } - else if (bit_depth < 8) { - // If we have < 8 scale it up to 8. - //png_set_expand(png_ptr); - png_set_packing(png_ptr); - } - // Reduce bit depth. - if (bit_depth == 16) { - png_set_strip_16(png_ptr); - } + if (color_type == PNG_COLOR_TYPE_PALETTE && bit_depth <= 8) { + // Convert indexed images to RGB. + png_set_expand(png_ptr); + } + else if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) { + // Convert grayscale to RGB. + png_set_expand(png_ptr); + } + else if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) { + // Expand images with transparency to full alpha channels + // so the data will be available as RGBA quartets. + png_set_expand(png_ptr); + } + else if (bit_depth < 8) { + // If we have < 8 scale it up to 8. + //png_set_expand(png_ptr); + png_set_packing(png_ptr); + } - // Represent gray as RGB - if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA) { - png_set_gray_to_rgb(png_ptr); - } + // Reduce bit depth. + if (bit_depth == 16) { + png_set_strip_16(png_ptr); + } - // Convert to RGBA filling alpha with 0xFF. - if (!(color_type & PNG_COLOR_MASK_ALPHA)) { - png_set_filler(png_ptr, 0xFF, PNG_FILLER_AFTER); - } + // Represent gray as RGB + if (color_type == PNG_COLOR_TYPE_GRAY || color_type == PNG_COLOR_TYPE_GRAY_ALPHA) { + png_set_gray_to_rgb(png_ptr); + } - // @todo Choose gamma according to the platform? - double screen_gamma = 2.2; - int intent; - if (png_get_sRGB(png_ptr, info_ptr, &intent)) { - png_set_gamma(png_ptr, screen_gamma, 0.45455); - } - else { - double image_gamma; - if (png_get_gAMA(png_ptr, info_ptr, &image_gamma)) { - png_set_gamma(png_ptr, screen_gamma, image_gamma); - } - else { - png_set_gamma(png_ptr, screen_gamma, 0.45455); - } - } + // Convert to RGBA filling alpha with 0xFF. + if (!(color_type & PNG_COLOR_MASK_ALPHA)) { + png_set_filler(png_ptr, 0xFF, PNG_FILLER_AFTER); + } - // Perform the selected transforms. - png_read_update_info(png_ptr, info_ptr); + // @todo Choose gamma according to the platform? + double screen_gamma = 2.2; + int intent; + if (png_get_sRGB(png_ptr, info_ptr, &intent)) { + png_set_gamma(png_ptr, screen_gamma, 0.45455); + } + else { + double image_gamma; + if (png_get_gAMA(png_ptr, info_ptr, &image_gamma)) { + png_set_gamma(png_ptr, screen_gamma, image_gamma); + } + else { + png_set_gamma(png_ptr, screen_gamma, 0.45455); + } + } - png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL); + // Perform the selected transforms. + png_read_update_info(png_ptr, info_ptr); - AutoPtr img(new Image()); - img->allocate(width, height); + png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, &interlace_type, NULL, NULL); - // Set internal format flags. - if(color_type & PNG_COLOR_MASK_COLOR) { - //img->flags |= PI_IF_HAS_COLOR; - } - if(color_type & PNG_COLOR_MASK_ALPHA) { - //img->flags |= PI_IF_HAS_ALPHA; - img->setFormat(Image::Format_ARGB); - } + AutoPtr img(new Image()); + img->allocate(width, height); - // Read the image - uint8 * pixels = (uint8 *)img->pixels(); - png_bytep * row_data = new png_bytep[sizeof(png_byte) * height]; - for (uint i = 0; i < height; i++) { - row_data[i] = &(pixels[width * 4 * i]); - } + // Set internal format flags. + if(color_type & PNG_COLOR_MASK_COLOR) { + //img->flags |= PI_IF_HAS_COLOR; + } + if(color_type & PNG_COLOR_MASK_ALPHA) { + //img->flags |= PI_IF_HAS_ALPHA; + img->setFormat(Image::Format_ARGB); + } - png_read_image(png_ptr, row_data); - delete [] row_data; + // Read the image + uint8 * pixels = (uint8 *)img->pixels(); + png_bytep * row_data = new png_bytep[sizeof(png_byte) * height]; + for (uint i = 0; i < height; i++) { + row_data[i] = &(pixels[width * 4 * i]); + } - // Finish things up - png_read_end(png_ptr, info_ptr); - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - - // RGBA to BGRA. - uint num = width * height; - for(uint i = 0; i < num; i++) - { - Color32 c = img->pixel(i); - img->pixel(i) = Color32(c.b, c.g, c.r, c.a); - } - - // Compute alpha channel if needed. - /*if( img->flags & PI_IU_BUMPMAP || img->flags & PI_IU_ALPHAMAP ) { - if( img->flags & PI_IF_HAS_COLOR && !(img->flags & PI_IF_HAS_ALPHA)) { - img->ComputeAlphaFromColor(); - } - }*/ + png_read_image(png_ptr, row_data); + delete [] row_data; + + // Finish things up + png_read_end(png_ptr, info_ptr); + png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + + // RGBA to BGRA. + uint num = width * height; + for(uint i = 0; i < num; i++) + { + Color32 c = img->pixel(i); + img->pixel(i) = Color32(c.b, c.g, c.r, c.a); + } + + // Compute alpha channel if needed. + /*if( img->flags & PI_IU_BUMPMAP || img->flags & PI_IU_ALPHAMAP ) { + if( img->flags & PI_IF_HAS_COLOR && !(img->flags & PI_IF_HAS_ALPHA)) { + img->ComputeAlphaFromColor(); + } + }*/ - return img.release(); + return img.release(); +} + +static void user_write_data(png_structp png_ptr, png_bytep data, png_size_t length) +{ + nvDebugCheck(png_ptr != NULL); + + Stream * s = (Stream *)png_get_io_ptr(png_ptr); + s->serialize(data, (int)length); + + if (s->isError()) { + png_error(png_ptr, "Write Error"); + } +} + +static void user_write_flush(png_structp png_ptr) { } + +static bool savePNG(Stream & s, const Image * img, const char ** tags/*=NULL*/) +{ + nvCheck(!s.isError()); + nvCheck(img != NULL); + nvCheck(img->pixels() != NULL); + + // Set up a write buffer and check the library version + png_structp png_ptr; + png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + if (png_ptr == NULL) { + return false; + } + + // Allocate/initialize a memory block for the image information + png_infop info_ptr = png_create_info_struct(png_ptr); + if (info_ptr == NULL) { + png_destroy_write_struct(&png_ptr, NULL); + return false; + } + + // Set up the error handling + if (setjmp(png_jmpbuf(png_ptr))) { + png_destroy_write_struct(&png_ptr, &info_ptr); + return false; + } + + // Set up the I/O functions. + png_set_write_fn(png_ptr, (void*)&s, user_write_data, user_write_flush); + + // Set image header information + int color_type = PNG_COLOR_TYPE_RGBA; + switch(img->format()) + { + case Image::Format_RGB: color_type = PNG_COLOR_TYPE_RGB; break; + case Image::Format_ARGB: color_type = PNG_COLOR_TYPE_RGBA; break; + } + png_set_IHDR(png_ptr, info_ptr, img->width(), img->height(), + 8, color_type, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, + PNG_FILTER_TYPE_DEFAULT); + + // Set image data + png_bytep * row_data = new png_bytep[sizeof(png_byte) * img->height()]; + for (uint i = 0; i < img->height(); i++) { + row_data[i] = (png_byte*)img->scanline (i); + if (img->format() == Image::Format_RGB) row_data[i]--; // This is a bit of a hack, libpng expects images in ARGB format not BGRA, it supports BGR swapping, but not alpha swapping. + } + png_set_rows(png_ptr, info_ptr, row_data); + + png_text * text = NULL; + if (tags != NULL) + { + int count = 0; + while(tags[2 * count] != NULL) count++; + + text = new png_text[count]; + memset(text, 0, count * sizeof(png_text); + + for (int i = 0; i < count; i++) { + text[i].compression = PNG_TEXT_COMPRESSION_NONE; + text[i].key = tags[2 * i + 0]; + text[i].text = tags[2 * i + 1]; + } + + png_set_text(png_ptr, info_ptr, text, count); + } + + png_write_png(png_ptr, info_ptr, + // component order is BGR(A) + PNG_TRANSFORM_BGR | + // Strip alpha byte for RGB images + (img->format() == Image::Format_RGB ? PNG_TRANSFORM_STRIP_FILLER : 0) + , NULL); + + // Finish things up + png_destroy_write_struct(&png_ptr, &info_ptr); + + delete [] row_data; + delete [] text; + + return true; } #endif // defined(HAVE_PNG) @@ -756,106 +909,106 @@ static void init_source (j_decompress_ptr /*cinfo*/){ } -static boolean fill_input_buffer (j_decompress_ptr cinfo){ - struct jpeg_source_mgr * src = cinfo->src; - static JOCTET FakeEOI[] = { 0xFF, JPEG_EOI }; - - // Generate warning - nvDebug("jpeglib: Premature end of file\n"); - - // Insert a fake EOI marker - src->next_input_byte = FakeEOI; - src->bytes_in_buffer = 2; +static boolean fill_input_buffer (j_decompress_ptr cinfo) { + struct jpeg_source_mgr * src = cinfo->src; + static JOCTET FakeEOI[] = { 0xFF, JPEG_EOI }; + + // Generate warning + nvDebug("jpeglib: Premature end of file\n"); + + // Insert a fake EOI marker + src->next_input_byte = FakeEOI; + src->bytes_in_buffer = 2; - return TRUE; + return TRUE; } static void skip_input_data (j_decompress_ptr cinfo, long num_bytes) { - struct jpeg_source_mgr * src = cinfo->src; + struct jpeg_source_mgr * src = cinfo->src; - if(num_bytes >= (long)src->bytes_in_buffer) { - fill_input_buffer(cinfo); - return; - } + if(num_bytes >= (long)src->bytes_in_buffer) { + fill_input_buffer(cinfo); + return; + } - src->bytes_in_buffer -= num_bytes; - src->next_input_byte += num_bytes; + src->bytes_in_buffer -= num_bytes; + src->next_input_byte += num_bytes; } static void term_source (j_decompress_ptr /*cinfo*/){ - // no work necessary here + // no work necessary here } -Image * nv::ImageIO::loadJPG(Stream & s) +static Image * loadJPG(Stream & s) { - nvCheck(!s.isError()); - - // Read the entire file. - Array byte_array; - byte_array.resize(s.size()); - s.serialize(byte_array.unsecureBuffer(), s.size()); - - jpeg_decompress_struct cinfo; - jpeg_error_mgr jerr; - - cinfo.err = jpeg_std_error(&jerr); - jpeg_create_decompress(&cinfo); - - cinfo.src = (struct jpeg_source_mgr *) (*cinfo.mem->alloc_small) - ((j_common_ptr) &cinfo, JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)); - cinfo.src->init_source = init_source; - cinfo.src->fill_input_buffer = fill_input_buffer; - cinfo.src->skip_input_data = skip_input_data; - cinfo.src->resync_to_restart = jpeg_resync_to_restart; // use default method - cinfo.src->term_source = term_source; - cinfo.src->bytes_in_buffer = byte_array.size(); - cinfo.src->next_input_byte = byte_array.buffer(); - - jpeg_read_header(&cinfo, TRUE); - jpeg_start_decompress(&cinfo); - - /* - cinfo.do_fancy_upsampling = FALSE; // fast decompression - cinfo.dct_method = JDCT_FLOAT; // Choose floating point DCT method. - */ - - uint8 * tmp_buffer = new uint8 [cinfo.output_width * cinfo.output_height * cinfo.num_components]; - uint8 * scanline = tmp_buffer; - - while( cinfo.output_scanline < cinfo.output_height ){ - int num_scanlines = jpeg_read_scanlines (&cinfo, &scanline, 1); - scanline += num_scanlines * cinfo.output_width * cinfo.num_components; - } + nvCheck(!s.isError()); - jpeg_finish_decompress(&cinfo); + // Read the entire file. + Array byte_array; + byte_array.resize(s.size()); + s.serialize(byte_array.buffer(), s.size()); - AutoPtr img(new Image()); - img->allocate(cinfo.output_width, cinfo.output_height); + jpeg_decompress_struct cinfo; + jpeg_error_mgr jerr; - Color32 * dst = img->pixels(); - const int size = img->height() * img->width(); - const uint8 * src = tmp_buffer; - - if( cinfo.num_components == 3 ) { - img->setFormat(Image::Format_RGB); - for( int i = 0; i < size; i++ ) { - *dst++ = Color32(src[0], src[1], src[2]); - src += 3; - } - } - else { - img->setFormat(Image::Format_ARGB); - for( int i = 0; i < size; i++ ) { - *dst++ = Color32(*src, *src, *src, *src); - src++; - } - } + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_decompress(&cinfo); + + cinfo.src = (struct jpeg_source_mgr *) (*cinfo.mem->alloc_small) + ((j_common_ptr) &cinfo, JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)); + cinfo.src->init_source = init_source; + cinfo.src->fill_input_buffer = fill_input_buffer; + cinfo.src->skip_input_data = skip_input_data; + cinfo.src->resync_to_restart = jpeg_resync_to_restart; // use default method + cinfo.src->term_source = term_source; + cinfo.src->bytes_in_buffer = byte_array.size(); + cinfo.src->next_input_byte = byte_array.buffer(); + + jpeg_read_header(&cinfo, TRUE); + jpeg_start_decompress(&cinfo); + + /* + cinfo.do_fancy_upsampling = FALSE; // fast decompression + cinfo.dct_method = JDCT_FLOAT; // Choose floating point DCT method. + */ + + uint8 * tmp_buffer = new uint8 [cinfo.output_width * cinfo.output_height * cinfo.num_components]; + uint8 * scanline = tmp_buffer; + + while( cinfo.output_scanline < cinfo.output_height ){ + int num_scanlines = jpeg_read_scanlines (&cinfo, &scanline, 1); + scanline += num_scanlines * cinfo.output_width * cinfo.num_components; + } - delete [] tmp_buffer; - jpeg_destroy_decompress (&cinfo); + jpeg_finish_decompress(&cinfo); - return img.release(); + AutoPtr img(new Image()); + img->allocate(cinfo.output_width, cinfo.output_height); + + Color32 * dst = img->pixels(); + const int size = img->height() * img->width(); + const uint8 * src = tmp_buffer; + + if( cinfo.num_components == 3 ) { + img->setFormat(Image::Format_RGB); + for( int i = 0; i < size; i++ ) { + *dst++ = Color32(src[0], src[1], src[2]); + src += 3; + } + } + else { + img->setFormat(Image::Format_ARGB); + for( int i = 0; i < size; i++ ) { + *dst++ = Color32(*src, *src, *src, *src); + src++; + } + } + + delete [] tmp_buffer; + jpeg_destroy_decompress (&cinfo); + + return img.release(); } #endif // defined(HAVE_JPEG) @@ -865,645 +1018,1005 @@ /* static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size) { - Stream * s = (Stream *)h; - nvDebugCheck(s != NULL); + Stream * s = (Stream *)h; + nvDebugCheck(s != NULL); - s->serialize(ptr, size); + s->serialize(ptr, size); - return size; + return size; } static toff_t tiffSeekProc(thandle_t h, toff_t offset, int whence) { - Stream * s = (Stream *)h; - nvDebugCheck(s != NULL); - - if (!s->isSeekable()) - { - return (toff_t)-1; - } + Stream * s = (Stream *)h; + nvDebugCheck(s != NULL); - if (whence == SEEK_SET) - { - s->seek(offset); - } - else if (whence == SEEK_CUR) - { - s->seek(s->tell() + offset); - } - else if (whence == SEEK_END) - { - s->seek(s->size() + offset); - } + if (!s->isSeekable()) + { + return (toff_t)-1; + } + + if (whence == SEEK_SET) + { + s->seek(offset); + } + else if (whence == SEEK_CUR) + { + s->seek(s->tell() + offset); + } + else if (whence == SEEK_END) + { + s->seek(s->size() + offset); + } - return s->tell(); + return s->tell(); } static int tiffCloseProc(thandle_t) { - return 0; + return 0; } static toff_t tiffSizeProc(thandle_t h) { - Stream * s = (Stream *)h; - nvDebugCheck(s != NULL); - return s->size(); + Stream * s = (Stream *)h; + nvDebugCheck(s != NULL); + return s->size(); } static int tiffMapFileProc(thandle_t, tdata_t*, toff_t*) { - // @@ TODO, Implement these functions. - return -1; + // @@ TODO, Implement these functions. + return -1; } static void tiffUnmapFileProc(thandle_t, tdata_t, toff_t) { - // @@ TODO, Implement these functions. + // @@ TODO, Implement these functions. } */ -FloatImage * nv::ImageIO::loadFloatTIFF(const char * fileName, Stream & s) +static FloatImage * loadFloatTIFF(const char * fileName, Stream & s) { - nvCheck(!s.isError()); - - TIFF * tif = TIFFOpen(fileName, "r"); - //TIFF * tif = TIFFClientOpen(fileName, "r", &s, tiffReadWriteProc, tiffReadWriteProc, tiffSeekProc, tiffCloseProc, tiffSizeProc, tiffMapFileProc, tiffUnmapFileProc); - - if (!tif) - { - nvDebug("Can't open '%s' for reading\n", fileName); - return NULL; - } - - ::uint16 spp, bpp, format; - ::uint32 width, height; - TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height); - TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &width); - TIFFGetField(tif, TIFFTAG_BITSPERSAMPLE, &bpp); - TIFFGetField(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); - TIFFGetField(tif, TIFFTAG_SAMPLEFORMAT, &format); - - if (bpp != 8 && bpp != 16 && bpp != 32) { - nvDebug("Can't load '%s', only 1 sample per pixel supported\n", fileName); - TIFFClose(tif); - return NULL; - } - - AutoPtr fimage(new FloatImage()); - fimage->allocate(spp, width, height); - - int linesize = TIFFScanlineSize(tif); - tdata_t buf = (::uint8 *)::malloc(linesize); - - for (uint y = 0; y < height; y++) - { - TIFFReadScanline(tif, buf, y, 0); - - for (uint c=0; cscanline(y, c); + nvCheck(!s.isError()); - for(uint x = 0; x < width; x++) - { - if (bpp == 8) - { - dst[x] = float(((::uint8 *)buf)[x*spp+c]) / float(0xFF); - } - else if (bpp == 16) - { - dst[x] = float(((::uint16 *)buf)[x*spp+c]) / float(0xFFFF); - } - else if (bpp == 32) - { - if (format==SAMPLEFORMAT_IEEEFP) - { - dst[x] = float(((float *)buf)[x*spp+c]); - } - else - { - dst[x] = float(((::uint32 *)buf)[x*spp+c] >> 8) / float(0xFFFFFF); - } + TIFF * tif = TIFFOpen(fileName, "r"); + //TIFF * tif = TIFFClientOpen(fileName, "r", &s, tiffReadWriteProc, tiffReadWriteProc, tiffSeekProc, tiffCloseProc, tiffSizeProc, tiffMapFileProc, tiffUnmapFileProc); - } + if (!tif) + { + nvDebug("Can't open '%s' for reading\n", fileName); + return NULL; + } - } - } - } + ::uint16 spp, bpp, format; + ::uint32 width, height; + TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &height); + TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &width); + TIFFGetField(tif, TIFFTAG_BITSPERSAMPLE, &bpp); + TIFFGetField(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); + TIFFGetField(tif, TIFFTAG_SAMPLEFORMAT, &format); + + if (bpp != 8 && bpp != 16 && bpp != 32) { + nvDebug("Can't load '%s', only 1 sample per pixel supported\n", fileName); + TIFFClose(tif); + return NULL; + } - ::free(buf); - - TIFFClose(tif); - - return fimage.release(); -} - -bool nv::ImageIO::saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) -{ - nvCheck(fileName != NULL); - nvCheck(fimage != NULL); - nvCheck(base_component + num_components <= fimage->componentNum()); - - const int iW = fimage->width(); - const int iH = fimage->height(); - const int iC = num_components; + AutoPtr fimage(new FloatImage()); + fimage->allocate(spp, width, height); - TIFF * image = TIFFOpen(fileName, "w"); + int linesize = TIFFScanlineSize(tif); + tdata_t buf = malloc(linesize); - // Open the TIFF file - if (image == NULL) - { - nvDebug("Could not open '%s' for writing\n", fileName); - return false; - } + for (uint y = 0; y < height; y++) + { + TIFFReadScanline(tif, buf, y, 0); - TIFFSetField(image, TIFFTAG_IMAGEWIDTH, iW); - TIFFSetField(image, TIFFTAG_IMAGELENGTH, iH); - TIFFSetField(image, TIFFTAG_SAMPLESPERPIXEL, iC); - TIFFSetField(image, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP); - TIFFSetField(image, TIFFTAG_BITSPERSAMPLE, 32); - - uint32 rowsperstrip = TIFFDefaultStripSize(image, (uint32)-1); - - TIFFSetField(image, TIFFTAG_ROWSPERSTRIP, rowsperstrip); - TIFFSetField(image, TIFFTAG_COMPRESSION, COMPRESSION_PACKBITS); - if (num_components == 3) + for (uint c=0; cscanline(y, c); - float * scanline = new float[iW * iC]; - for (int y = 0; y < iH; y++) - { - for (int c = 0; c < iC; c++) + for(uint x = 0; x < width; x++) + { + if (bpp == 8) + { + dst[x] = float(((::uint8 *)buf)[x*spp+c]) / float(0xFF); + } + else if (bpp == 16) { - const float * src = fimage->scanline(y, base_component + c); - for (int x = 0; x < iW; x++) scanline[x * iC + c] = src[x]; + dst[x] = float(((::uint16 *)buf)[x*spp+c]) / float(0xFFFF); } - if (TIFFWriteScanline(image, scanline, y, 0)==-1) + else if (bpp == 32) { - nvDebug("Error writing scanline %d\n", y); - return false; + if (format==SAMPLEFORMAT_IEEEFP) + { + dst[x] = float(((float *)buf)[x*spp+c]); + } + else + { + dst[x] = float(((::uint32 *)buf)[x*spp+c] >> 8) / float(0xFFFFFF); + } } + } } - delete [] scanline; + } - // Close the file - TIFFClose(image); - return true; -} + free(buf); -#endif + TIFFClose(tif); -#if defined(HAVE_OPENEXR) + return fimage.release(); +} -namespace +static bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) { - class ExrStream : public Imf::IStream - { - public: - ExrStream(const char * name, Stream & s) : Imf::IStream(name), m_stream(s) - { - nvDebugCheck(s.isLoading()); - } - - virtual bool read(char c[], int n) - { - m_stream.serialize(c, n); - - if (m_stream.isError()) - { - throw Iex::InputExc("I/O error."); - } - - return m_stream.isAtEnd(); - } - - virtual Imf::Int64 tellg() - { - return m_stream.tell(); - } - - virtual void seekg(Imf::Int64 pos) - { - m_stream.seek(pos); - } - - virtual void clear() - { - m_stream.clearError(); - } - - private: - Stream & m_stream; - }; + nvCheck(fileName != NULL); + nvCheck(fimage != NULL); + nvCheck(base_component + num_components <= fimage->componentCount()); -} // namespace + const int iW = fimage->width(); + const int iH = fimage->height(); + const int iC = num_components; + + TIFF * image = TIFFOpen(fileName, "w"); + + // Open the TIFF file + if (image == NULL) + { + nvDebug("Could not open '%s' for writing\n", fileName); + return false; + } + + TIFFSetField(image, TIFFTAG_IMAGEWIDTH, iW); + TIFFSetField(image, TIFFTAG_IMAGELENGTH, iH); + TIFFSetField(image, TIFFTAG_SAMPLESPERPIXEL, iC); + TIFFSetField(image, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP); + TIFFSetField(image, TIFFTAG_BITSPERSAMPLE, 32); + + uint32 rowsperstrip = TIFFDefaultStripSize(image, (uint32)-1); + + TIFFSetField(image, TIFFTAG_ROWSPERSTRIP, rowsperstrip); + TIFFSetField(image, TIFFTAG_COMPRESSION, COMPRESSION_PACKBITS); + if (num_components == 3) + { + // Set this so that it can be visualized with pfstools. + TIFFSetField(image, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_RGB); + } + TIFFSetField(image, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT); + TIFFSetField(image, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); -FloatImage * nv::ImageIO::loadFloatEXR(const char * fileName, Stream & s) + float * scanline = new float[iW * iC]; + for (int y = 0; y < iH; y++) + { + for (int c = 0; c < iC; c++) + { + const float * src = fimage->scanline(y, base_component + c); + for (int x = 0; x < iW; x++) scanline[x * iC + c] = src[x]; + } + if (TIFFWriteScanline(image, scanline, y, 0)==-1) + { + nvDebug("Error writing scanline %d\n", y); + return false; + } + } + delete [] scanline; + + // Close the file + TIFFClose(image); + return true; +} + +#endif // defined(HAVE_TIFF) + +#if defined(HAVE_OPENEXR) + +namespace { - nvCheck(s.isLoading()); - nvCheck(!s.isError()); + class ExrStream : public Imf::IStream + { + public: + ExrStream(const char * name, Stream & s) : Imf::IStream(name), m_stream(s) + { + nvDebugCheck(s.isLoading()); + } - ExrStream stream(fileName, s); - Imf::InputFile inputFile(stream); + virtual bool read(char c[], int n) + { + m_stream.serialize(c, n); - Imath::Box2i box = inputFile.header().dataWindow(); + if (m_stream.isError()) + { + throw Iex::InputExc("I/O error."); + } - int width = box.max.x - box.min.y + 1; - int height = box.max.x - box.min.y + 1; + return m_stream.isAtEnd(); + } - const Imf::ChannelList & channels = inputFile.header().channels(); - - // Count channels. - uint channelCount= 0; - for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it) + virtual Imf::Int64 tellg() { - channelCount++; + return m_stream.tell(); } - - // Allocate FloatImage. - AutoPtr fimage(new FloatImage()); - fimage->allocate(channelCount, width, height); - - // Describe image's layout with a framebuffer. - Imf::FrameBuffer frameBuffer; - uint i = 0; - for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it, ++i) + + virtual void seekg(Imf::Int64 pos) { - frameBuffer.insert(it.name(), Imf::Slice(Imf::FLOAT, (char *)fimage->channel(i), sizeof(float), sizeof(float) * width)); + nvDebugCheck(pos >= 0 && pos < UINT_MAX); + m_stream.seek((uint)pos); } - - // Read it. - inputFile.setFrameBuffer (frameBuffer); - inputFile.readPixels (box.min.y, box.max.y); - - return fimage.release(); -} - -bool nv::ImageIO::saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) -{ - nvCheck(fileName != NULL); - nvCheck(fimage != NULL); - nvCheck(base_component + num_components <= fimage->componentNum()); - nvCheck(num_components > 0 && num_components <= 4); - - const int w = fimage->width(); - const int h = fimage->height(); - - const char * channelNames[] = {"R", "G", "B", "A"}; - - Imf::Header header (w, h); - - for (uint c = 0; c < num_components; c++) + + virtual void clear() { - header.channels().insert(channelNames[c], Imf::Channel(Imf::FLOAT)); + m_stream.clearError(); } - + + private: + Stream & m_stream; + }; + + static int channelIndexFromName(const char* name) + { + char c = tolower(name[0]); + switch (c) + { + default: + case 'r': + return 0; + case 'g': + return 1; + case 'b': + return 2; + case 'a': + return 3; + } + } + +} // namespace + +static FloatImage * loadFloatEXR(const char * fileName, Stream & s) +{ + nvCheck(s.isLoading()); + nvCheck(!s.isError()); + + ExrStream stream(fileName, s); + Imf::InputFile inputFile(stream); + + Imath::Box2i box = inputFile.header().dataWindow(); + + int width = box.max.x - box.min.y + 1; + int height = box.max.x - box.min.y + 1; + + const Imf::ChannelList & channels = inputFile.header().channels(); + + // Count channels. + uint channelCount= 0; + for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it) + { + channelCount++; + } + + // Allocate FloatImage. + AutoPtr fimage(new FloatImage()); + fimage->allocate(channelCount, width, height); + + // Describe image's layout with a framebuffer. + Imf::FrameBuffer frameBuffer; + uint i = 0; + for (Imf::ChannelList::ConstIterator it = channels.begin(); it != channels.end(); ++it, ++i) + { + int channelIndex = channelIndexFromName(it.name()); + frameBuffer.insert(it.name(), Imf::Slice(Imf::FLOAT, (char *)fimage->channel(channelIndex), sizeof(float), sizeof(float) * width)); + } + + // Read it. + inputFile.setFrameBuffer (frameBuffer); + inputFile.readPixels (box.min.y, box.max.y); + + return fimage.release(); +} + +static bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) +{ + nvCheck(fileName != NULL); + nvCheck(fimage != NULL); + nvCheck(base_component + num_components <= fimage->componentCount()); + nvCheck(num_components > 0 && num_components <= 4); + + const int w = fimage->width(); + const int h = fimage->height(); + + const char * channelNames[] = {"R", "G", "B", "A"}; + + Imf::Header header (w, h); + + for (uint c = 0; c < num_components; c++) + { + header.channels().insert(channelNames[c], Imf::Channel(Imf::FLOAT)); + } + Imf::OutputFile file(fileName, header); Imf::FrameBuffer frameBuffer; - - for (uint c = 0; c < num_components; c++) - { - char * channel = (char *) fimage->channel(base_component + c); - frameBuffer.insert(channelNames[c], Imf::Slice(Imf::FLOAT, channel, sizeof(float), sizeof(float) * w)); - } - - file.setFrameBuffer(frameBuffer); - file.writePixels(h); - - return true; + + for (uint c = 0; c < num_components; c++) + { + char * channel = (char *) fimage->channel(base_component + c); + frameBuffer.insert(channelNames[c], Imf::Slice(Imf::FLOAT, channel, sizeof(float), sizeof(float) * w)); + } + + file.setFrameBuffer(frameBuffer); + file.writePixels(h); + + return true; } #endif // defined(HAVE_OPENEXR) -#if 0 // @@ Disable temporarily. -FloatImage * nv::ImageIO::loadFloatPFM(const char * fileName, Stream & s) +#if defined(HAVE_FREEIMAGE) + +static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle) { - nvCheck(s.isLoading()); - nvCheck(!s.isError()); + Stream * s = (Stream *) handle; + s->serialize(buffer, size * count); + return count; +} - Tokenizer parser(&s); +static unsigned DLL_CALLCONV WriteProc(void *buffer, unsigned size, unsigned count, fi_handle handle) +{ + Stream * s = (Stream *) handle; + s->serialize(buffer, size * count); + return count; +} - parser.nextToken(); +static int DLL_CALLCONV SeekProc(fi_handle handle, long offset, int origin) +{ + Stream * s = (Stream *) handle; - bool grayscale; - if (parser.token() == "PF") - { - grayscale = false; - } - else if (parser.token() == "Pf") - { - grayscale = true; - } - else - { - // Invalid file. - return NULL; - } + switch(origin) { + case SEEK_SET : + s->seek(offset); + break; + case SEEK_END : + s->seek(s->size() + offset); + break; + case SEEK_CUR : + s->seek(s->tell() + offset); + break; + default : + return 1; + } - parser.nextLine(); - - int width = parser.token().toInt(); parser.nextToken(); - int height = parser.token().toInt(); + return 0; +} - parser.nextLine(); +static long DLL_CALLCONV TellProc(fi_handle handle) +{ + Stream * s = (Stream *) handle; + return s->tell(); +} - float scaleFactor = parser.token().toFloat(); - if (scaleFactor >= 0) - { - s.setByteOrder(Stream::BigEndian); - } - else - { - s.setByteOrder(Stream::LittleEndian); - } - scaleFactor = fabsf(scaleFactor); +Image * nv::ImageIO::loadFreeImage(FREE_IMAGE_FORMAT fif, Stream & s) +{ + nvCheck(!s.isError()); - // Allocate image. - AutoPtr fimage(new FloatImage()); + FreeImageIO io; + io.read_proc = ReadProc; + io.write_proc = NULL; + io.seek_proc = SeekProc; + io.tell_proc = TellProc; - if (grayscale) - { - fimage->allocate(1, width, height); + FIBITMAP * bitmap = FreeImage_LoadFromHandle(fif, &io, (fi_handle)&s, 0); - float * channel = fimage->channel(0); + if (bitmap == NULL) + { + return NULL; + } - for (int i = 0; i < width * height; i++) - { - s << channel[i]; - } - } - else - { - fimage->allocate(3, width, height); + const int w = FreeImage_GetWidth(bitmap); + const int h = FreeImage_GetHeight(bitmap); - float * rchannel = fimage->channel(0); - float * gchannel = fimage->channel(1); - float * bchannel = fimage->channel(2); + if (FreeImage_GetImageType(bitmap) != FIT_BITMAP) + { + // @@ Use tone mapping? + FIBITMAP * tmp = FreeImage_ConvertToType(bitmap, FIT_BITMAP, true); + FreeImage_Unload(bitmap); + bitmap = tmp; + } - for (int i = 0; i < width * height; i++) - { - s << rchannel[i] << gchannel[i] << bchannel[i]; - } - } + nvDebugCheck(FreeImage_GetImageType(bitmap) == FIT_BITMAP); + if (FreeImage_GetBPP(bitmap) != 32) + { + FIBITMAP * tmp = FreeImage_ConvertTo32Bits(bitmap); + FreeImage_Unload(bitmap); + bitmap = tmp; + } + + + Image * image = new Image(); + image->allocate(w, h, 1); // freeimage can only load 2d images: + + // Copy the image over to our internal format, FreeImage has the scanlines bottom to top though. + for (int y=0; y < h; y++) + { + const void * src = FreeImage_GetScanLine(bitmap, h - y - 1); + void * dst = image->scanline(y); + + memcpy(dst, src, 4 * w); + } + + FreeImage_Unload(bitmap); - return fimage.release(); + return image; } -bool nv::ImageIO::saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components) +FloatImage * nv::ImageIO::loadFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s) { - nvCheck(fileName != NULL); - nvCheck(fimage != NULL); - nvCheck(fimage->componentNum() <= base_component + num_components); - nvCheck(num_components == 1 || num_components == 3); + nvCheck(!s.isError()); - StdOutputStream stream(fileName); - TextWriter writer(&stream); + FreeImageIO io; + io.read_proc = ReadProc; + io.write_proc = NULL; + io.seek_proc = SeekProc; + io.tell_proc = TellProc; - if (num_components == 1) writer.write("Pf\n"); - else /*if (num_components == 3)*/ writer.write("PF\n"); + FIBITMAP * bitmap = FreeImage_LoadFromHandle(fif, &io, (fi_handle)&s, 0); - int w = fimage->width(); - int h = fimage->height(); - writer.write("%d %d\n", w, h); - writer.write("%f\n", -1.0f); // little endian with 1.0 scale. + if (bitmap == NULL) + { + return NULL; + } - if (num_components == 1) - { - float * channel = const_cast(fimage->channel(0)); + const int w = FreeImage_GetWidth(bitmap); + const int h = FreeImage_GetHeight(bitmap); - for (int i = 0; i < w * h; i++) - { - stream << channel[i]; - } - } - else - { - float * rchannel = const_cast(fimage->channel(0)); - float * gchannel = const_cast(fimage->channel(1)); - float * bchannel = const_cast(fimage->channel(2)); + FREE_IMAGE_TYPE fit = FreeImage_GetImageType(bitmap); - for (int i = 0; i < w * h; i++) - { - stream << rchannel[i] << gchannel[i] << bchannel[i]; - } - } + FloatImage * floatImage = new FloatImage(); + + switch (fit) + { + case FIT_BITMAP: + floatImage->allocate(4, w, h); + { + FIBITMAP * tmp = FreeImage_ConvertTo32Bits(bitmap); + + uint bitcount = FreeImage_GetBPP(bitmap); + uint byteCount = bitcount / 8; + + for (int y=0; y < h; y++) + { + const Color32 * src = (const Color32 *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + + float * r = floatImage->scanline(y, 0); + float * g = floatImage->scanline(y, 1); + float * b = floatImage->scanline(y, 2); + float * a = floatImage->scanline(y, 3); + + for (int x=0; x < w; x++) + { + r[x] = float(src[x].r) / 255.0f; + g[x] = float(src[x].g) / 255.0f; + b[x] = float(src[x].b) / 255.0f; + a[x] = float(src[x].a) / 255.0f; + } + + src += byteCount; + } - return true; + FreeImage_Unload(tmp); + } + break; + case FIT_FLOAT: + floatImage->allocate(1, w, h); + + for (int y=0; y < h; y++) + { + const float * src = (const float *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + float * dst = floatImage->scanline(y, 0); + + for (int x=0; x < w; x++) + { + dst[x] = src[x]; + } + } + break; + case FIT_UINT16: + floatImage->allocate(1, w, h); + + for (int y=0; y < h; y++) + { + const uint16 * src = (const uint16 *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + float * dst = floatImage->scanline(y, 0); + + for (int x=0; x < w; x++) + { + dst[x] = float(src[x]) / 65535; + } + } + break; + case FIT_COMPLEX: + floatImage->allocate(2, w, h); + + for (int y=0; y < h; y++) + { + const FICOMPLEX * src = (const FICOMPLEX *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + + float * dst_real = floatImage->scanline(y, 0); + float * dst_imag = floatImage->scanline(y, 1); + + for (int x=0; x < w; x++) + { + dst_real[x] = (float)src[x].r; + dst_imag[x] = (float)src[x].i; + } + } + break; + case FIT_RGBF: + floatImage->allocate(3, w, h); + + for (int y=0; y < h; y++) + { + const FIRGBF * src = (const FIRGBF *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + + float * dst_red = floatImage->scanline(y, 0); + float * dst_green = floatImage->scanline(y, 1); + float * dst_blue = floatImage->scanline(y, 2); + + for (int x=0; x < w; x++) + { + dst_red[x] = src[x].red; + dst_green[x] = src[x].green; + dst_blue[x] = src[x].blue; + } + } + break; + case FIT_RGBAF: + floatImage->allocate(4, w, h); + + for (int y=0; y < h; y++) + { + const FIRGBAF * src = (const FIRGBAF *)FreeImage_GetScanLine(bitmap, h - y - 1 ); + + float * dst_red = floatImage->scanline(y, 0); + float * dst_green = floatImage->scanline(y, 1); + float * dst_blue = floatImage->scanline(y, 2); + float * dst_alpha = floatImage->scanline(y, 3); + + for (int x=0; x < w; x++) + { + dst_red[x] = src[x].red; + dst_green[x] = src[x].green; + dst_blue[x] = src[x].blue; + dst_alpha[x] = src[x].alpha; + } + } + break; + default: + delete floatImage; + floatImage = NULL; + } + + FreeImage_Unload(bitmap); + + return floatImage; } -#endif +bool nv::ImageIO::saveFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Image * img, const char ** tags) +{ + nvCheck(!s.isError()); -#if 0 + FreeImageIO io; + io.read_proc = NULL; + io.write_proc = WriteProc; + io.seek_proc = SeekProc; + io.tell_proc = TellProc; -/** Save PNG*/ -static bool SavePNG(const PiImage * img, const char * name) { - nvCheck( img != NULL ); - nvCheck( img->mem != NULL ); + const uint w = img->width(); + const uint h = img->height(); - if( piStrCmp(piExtension(name), ".png" ) != 0 ) { - return false; - } - - if( img->flags & PI_IT_CUBEMAP ) { - nvDebug("*** Cannot save cubemaps as PNG."); - return false; - } - if( img->flags & PI_IT_DDS ) { - nvDebug("*** Cannot save DDS surface as PNG."); - return false; - } + FIBITMAP * bitmap = FreeImage_Allocate(w, h, 32); - nvDebug( "--- Saving '%s'.\n", name ); - - PiAutoPtr ar( PiFileSystem::CreateFileWriter( name ) ); - if( ar == NULL ) { - nvDebug( "*** SavePNG: Error, cannot save file '%s'.\n", name ); - return false; - } + for (uint i = 0; i < h; i++) + { + uint8 * scanline = FreeImage_GetScanLine(bitmap, i); + memcpy(scanline, img->scanline(h - i - 1), w * sizeof(Color32)); + } -/* -public class PNGEnc { + if (tags != NULL) + { + #pragma NV_MESSAGE("TODO: Save image metadata") + //FreeImage_SetMetadata( + } - public static function encode(img:BitmapData):ByteArray { - // Create output byte array - var png:ByteArray = new ByteArray(); - // Write PNG signature - png.writeUnsignedInt(0x89504e47); - png.writeUnsignedInt(0x0D0A1A0A); - // Build IHDR chunk - var IHDR:ByteArray = new ByteArray(); - IHDR.writeInt(img.width); - IHDR.writeInt(img.height); - IHDR.writeUnsignedInt(0x08060000); // 32bit RGBA - IHDR.writeByte(0); - writeChunk(png,0x49484452,IHDR); - // Build IDAT chunk - var IDAT:ByteArray= new ByteArray(); - for(var i:int=0;i < img.height;i++) { - // no filter - IDAT.writeByte(0); - var p:uint; - if ( !img.transparent ) { - for(var j:int=0;j < img.width;j++) { - p = img.getPixel(j,i); - IDAT.writeUnsignedInt( - uint(((p&0xFFFFFF) << 8)|0xFF)); - } - } else { - for(var j:int=0;j < img.width;j++) { - p = img.getPixel32(j,i); - IDAT.writeUnsignedInt( - uint(((p&0xFFFFFF) << 8)| - (shr(p,24)))); - } + bool result = FreeImage_SaveToHandle(fif, bitmap, &io, (fi_handle)&s, 0) != 0; + + FreeImage_Unload(bitmap); + + return result; +} + +bool nv::ImageIO::saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const FloatImage * img, uint baseComponent, uint componentCount) +{ + nvCheck(!s.isError()); + + FreeImageIO io; + io.read_proc = NULL; + io.write_proc = WriteProc; + io.seek_proc = SeekProc; + io.tell_proc = TellProc; + + const uint w = img->width(); + const uint h = img->height(); + + FREE_IMAGE_TYPE type; + if (componentCount == 1) + { + type = FIT_FLOAT; + } + else if (componentCount == 3) + { + type = FIT_RGBF; + } + else if (componentCount == 4) + { + type = FIT_RGBAF; + } + else { + return false; + } + + + FIBITMAP * bitmap = FreeImage_AllocateT(type, w, h); + + for (uint y = 0; y < h; y++) + { + float * scanline = (float *)FreeImage_GetScanLine(bitmap, y); + + for (uint x = 0; x < w; x++) + { + for (uint c = 0; c < componentCount; c++) + { + scanline[x * componentCount + c] = img->pixel(x, y, baseComponent + c); } } - IDAT.compress(); - writeChunk(png,0x49444154,IDAT); - // Build IEND chunk - writeChunk(png,0x49454E44,null); - // return PNG - return png; - } - - private static var crcTable:Array; - private static var crcTableComputed:Boolean = false; - - private static function writeChunk(png:ByteArray, - type:uint, data:ByteArray) { - if (!crcTableComputed) { - crcTableComputed = true; - crcTable = []; - for (var n:uint = 0; n < 256; n++) { - var c:uint = n; - for (var k:uint = 0; k < 8; k++) { - if (c & 1) { - c = uint(uint(0xedb88320) ^ - uint(c >>> 1)); - } else { - c = uint(c >>> 1); - } - } - crcTable[n] = c; + } + + bool result = FreeImage_SaveToHandle(fif, bitmap, &io, (fi_handle)&s, 0) != 0; + + FreeImage_Unload(bitmap); + + return result; +} + +#endif // defined(HAVE_FREEIMAGE) + + +#if defined(HAVE_STBIMAGE) + +static Image * loadSTB(Stream & s) +{ + // @@ Assumes stream cursor is at the beginning and that image occupies the whole stream. + const int size = s.size(); + uint8 * buffer = new uint8[size]; + + s.serialize(buffer, size); + + int w, h, n; + uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4); + + delete [] buffer; + + if (data != NULL) { + Image * img = new Image; + img->allocate(w, h); + img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB); + + for (int y = 0; y < h; ++y) + { + nv::Color32* dest = img->scanline(y); + uint8* src = data + y * w * 4; + + for (int x = 0; x < w; ++x) + { + dest[x].r = src[x * 4 + 0]; + dest[x].g = src[x * 4 + 1]; + dest[x].b = src[x * 4 + 2]; + dest[x].a = src[x * 4 + 3]; + } + } + + free(data); + + return img; + } + + return NULL; +} + +static FloatImage * loadFloatSTB(Stream & s) +{ + // @@ Assumes stream cursor is at the beginning and that image occupies the whole stream. + const int size = s.size(); + uint8 * buffer = new uint8[size]; + + s.serialize(buffer, size); + + int w, h, n; + float * data = stbi_loadf_from_memory(buffer, size, &w, &h, &n, 0); + + delete [] buffer; + + // Copy to image. + if (data != NULL) { + FloatImage * img = new FloatImage; + img->allocate(n, w, h); + + const int count = w * h; + + for (int c = 0; c < n; c++) { + float * dst = img->channel(c); + + for (int i = 0; i < count; i++) { + dst[i] = data[i*n + c]; } } - var len:uint = 0; - if (data != null) { - len = data.length; - } - png.writeUnsignedInt(len); - var p:uint = png.position; - png.writeUnsignedInt(type); - if ( data != null ) { - png.writeBytes(data); - } - var e:uint = png.position; - png.position = p; - var c:uint = 0xffffffff; - for (var i:int = 0; i < (e-p); i++) { - c = uint(crcTable[ - (c ^ png.readUnsignedByte()) & - uint(0xff)] ^ uint(c >>> 8)); - } - c = uint(c^uint(0xffffffff)); - png.position = e; - png.writeUnsignedInt(c); + return img; } + + return NULL; } -*/ + +#endif // defined(HAVE_STBIMAGE) + + + + + +Image * nv::ImageIO::load(const char * fileName) +{ + nvDebugCheck(fileName != NULL); + + StdInputStream stream(fileName); + + if (stream.isError()) { + return NULL; + } + + return ImageIO::load(fileName, stream); } -#endif // 0 +Image * nv::ImageIO::load(const char * fileName, Stream & s) +{ + nvDebugCheck(fileName != NULL); + nvDebugCheck(s.isLoading()); + + const char * extension = Path::extension(fileName); -#if 0 + if (strCaseDiff(extension, ".tga") == 0) { + return loadTGA(s); + } + if (strCaseDiff(extension, ".psd") == 0) { + return loadPSD(s); + } -namespace ImageIO { + /*if (strCaseDiff(extension, ".ppm") == 0) { + return loadPPM(s); + }*/ - /** Init ImageIO plugins. */ - void InitPlugins() { - // AddInputPlugin( "", LoadANY ); - AddInputPlugin( "tga", LoadTGA ); -#if HAVE_PNG - AddInputPlugin( "png", LoadPNG ); +#if defined(HAVE_JPEG) + if (strCaseDiff(extension, ".jpg") == 0 || strCaseDiff(extension, ".jpeg") == 0) { + return loadJPG(s); + } #endif -#if HAVE_JPEG - AddInputPlugin( "jpg", LoadJPG ); + +#if defined(HAVE_PNG) + if (strCaseDiff(extension, ".png") == 0) { + return loadPNG(s); + } #endif - AddInputPlugin( "dds", LoadDDS ); - - AddOutputPlugin( "tga", SaveTGA ); - } - - /** Reset ImageIO plugins. */ - void ResetPlugins() { - s_plugin_load_map.Clear(); - s_plugin_save_map.Clear(); - } - - /** Add an input plugin. */ - void AddInputPlugin( const char * ext, ImageInput_Plugin plugin ) { - s_plugin_load_map.Add(ext, plugin); - } - - /** Add an output plugin. */ - void AddOutputPlugin( const char * ext, ImageOutput_Plugin plugin ) { - s_plugin_save_map.Add(ext, plugin); - } - - bool Load(PiImage * img, const char * name, PiStream & stream) { - - // Get name extension. - const char * extension = piExtension(name); - - // Skip the dot. - if( *extension == '.' ) { - extension++; - } - - // Lookup plugin in the map. - ImageInput_Plugin plugin = NULL; - if( s_plugin_load_map.Get(extension, &plugin) ) { - return plugin(img, stream); - } - - /*foreach(i, s_plugin_load_map) { - nvDebug("%s %s %d\n", s_plugin_load_map[i].key.GetStr(), extension, 0 == strcmp(extension, s_plugin_load_map[i].key)); - } - - nvDebug("No plugin found for '%s' %d.\n", extension, s_plugin_load_map.Size());*/ - - return false; - } +#if defined(HAVE_FREEIMAGE) + FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); + if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { + return loadFreeImage(fif, s); + } +#endif - bool Save(const PiImage * img, const char * name, PiStream & stream) { - - // Get name extension. - const char * extension = piExtension(name); - - // Skip the dot. - if( *extension == '.' ) { - extension++; - } - - // Lookup plugin in the map. - ImageOutput_Plugin plugin = NULL; - if( s_plugin_save_map.Get(extension, &plugin) ) { - return plugin(img, stream); - } - - return false; - } - -} // ImageIO +#if defined(HAVE_STBIMAGE) + return loadSTB(s); +#endif + + return NULL; +} + +bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/) +{ + nvDebugCheck(fileName != NULL); + nvDebugCheck(s.isSaving()); + nvDebugCheck(img != NULL); + + const char * extension = Path::extension(fileName); + + if (strCaseDiff(extension, ".tga") == 0) { + return saveTGA(s, img); + } + + if (strCaseDiff(extension, ".ppm") == 0) { + return savePPM(s, img); + } -#endif // 0 +#if defined(HAVE_PNG) + if (strCaseDiff(extension, ".png") == 0) { + return savePNG(s, img, tags); + } +#endif + +#if defined(HAVE_FREEIMAGE) + FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); + if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { + return saveFreeImage(fif, s, img, tags); + } +#endif + + return false; +} + +bool nv::ImageIO::save(const char * fileName, const Image * img, const char ** tags/*=NULL*/) +{ + nvDebugCheck(fileName != NULL); + nvDebugCheck(img != NULL); + + StdOutputStream stream(fileName); + if (stream.isError()) + { + return false; + } + + return ImageIO::save(fileName, stream, img, tags); +} + +FloatImage * nv::ImageIO::loadFloat(const char * fileName) +{ + nvDebugCheck(fileName != NULL); + + StdInputStream stream(fileName); + + if (stream.isError()) { + return NULL; + } + + return loadFloat(fileName, stream); +} + +FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s) +{ + nvDebugCheck(fileName != NULL); + + const char * extension = Path::extension(fileName); + + /*if (strCaseDiff(extension, ".pfm") == 0) { + return loadFloatPFM(s); + }*/ + +#if defined(HAVE_TIFF) + #pragma NV_MESSAGE("TODO: Load TIFF from stream.") + if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) { + return loadFloatTIFF(fileName, s); + } +#endif + +#if defined(HAVE_OPENEXR) + #pragma NV_MESSAGE("TODO: Load EXR from stream.") + if (strCaseDiff(extension, ".exr") == 0) { + return loadFloatEXR(fileName, s); + } +#endif + +#if defined(HAVE_FREEIMAGE) + FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); + if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { + return loadFloatFreeImage(fif, s); + } +#endif + + if (strCaseDiff(extension, ".dds") == 0) { + const uint spos = s.tell(); // Save stream position. + FloatImage * floatImage = loadFloatDDS(s); + if (floatImage != NULL) return floatImage; + else s.seek(spos); + } + // Try to load as an RGBA8 image and convert to float. + AutoPtr img(load(fileName, s)); + if (img != NULL) { + return new FloatImage(img.ptr()); + } + + return NULL; +} + +bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage * fimage, uint baseComponent, uint componentCount) +{ + if (componentCount == 0) { + componentCount = fimage->componentCount() - baseComponent; + } + if (baseComponent + componentCount < fimage->componentCount()) { + return false; + } + + const char * extension = Path::extension(fileName); + + if (strCaseDiff(extension, ".dds") == 0) { + return saveFloatDDS(s, fimage, baseComponent, componentCount); + } + + /*if (strCaseDiff(extension, ".pfm") == 0) { + return saveFloatPFM(s, fimage, baseComponent, componentCount); + }*/ + +#if defined(HAVE_FREEIMAGE) + FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); + if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { + return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount); + } +#endif + + // If everything else fails, save as LDR. + if (componentCount <= 4) + { + AutoPtr image(fimage->createImage(baseComponent, componentCount)); + nvCheck(image != NULL); + + if (componentCount == 1) + { + Color32 * c = image->pixels(); + const uint count = image->width() * image->height(); + for (uint i = 0; i < count; i++) + { + c[i].b = c[i].g = c[i].r; + } + } + + if (componentCount == 4) + { + image->setFormat(Image::Format_ARGB); + } + + return ImageIO::save(fileName, s, image.ptr()); + } + + return false; +} + +bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount) +{ + if (componentCount == 0) { + componentCount = fimage->componentCount() - baseComponent; + } + if (baseComponent + componentCount < fimage->componentCount()) { + return false; + } + + const char * extension = Path::extension(fileName); + +#if defined(HAVE_OPENEXR) + if (strCaseDiff(extension, ".exr") == 0) { + return saveFloatEXR(fileName, fimage, baseComponent, componentCount); + } +#endif + +#if defined(HAVE_TIFF) + if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) { + return saveFloatTIFF(fileName, fimage, baseComponent, componentCount); + } +#endif + + StdOutputStream stream(fileName); + + if (stream.isError()) { + return false; + } + + return saveFloat(fileName, stream, fimage, baseComponent, componentCount); +} Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.h @@ -0,0 +1,102 @@ +// This code is in the public domain -- Ignacio Castaño + +#pragma once +#ifndef NV_IMAGE_KTXFILE_H +#define NV_IMAGE_KTXFILE_H + +#include "nvimage.h" +#include "nvcore/StrLib.h" + +// KTX File format specification: +// http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key + +namespace nv +{ + class Stream; + + // GL types (Table 3.2) + const uint KTX_UNSIGNED_BYTE; + const uint KTX_UNSIGNED_SHORT_5_6_5; + // ... + + // GL formats (Table 3.3) + // ... + + // GL internal formats (Table 3.12, 3.13) + // ... + + // GL base internal format. (Table 3.11) + const uint KTX_RGB; + const uint KTX_RGBA; + const uint KTX_ALPHA; + // ... + + + struct KtxHeader { + uint8 identifier[12]; + uint32 endianness; + uint32 glType; + uint32 glTypeSize; + uint32 glFormat; + uint32 glInternalFormat; + uint32 glBaseInternalFormat; + uint32 pixelWidth; + uint32 pixelHeight; + uint32 pixelDepth; + uint32 numberOfArrayElements; + uint32 numberOfFaces; + uint32 numberOfMipmapLevels; + uint32 bytesOfKeyValueData; + + KtxHeader(); + + }; + + NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); + + + struct KtxFile { + KtxFile(); + ~KtxFile(); + + void addKeyValue(const char * key, const char * value); + + private: + KtxHeader header; + + Array keyArray; + Array valueArray; + + }; + + NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file); + + + /* + for each keyValuePair that fits in bytesOfKeyValueData + UInt32 keyAndValueByteSize + Byte keyAndValue[keyAndValueByteSize] + Byte valuePadding[3 - ((keyAndValueByteSize + 3) % 4)] + end + + for each mipmap_level in numberOfMipmapLevels* + UInt32 imageSize; + for each array_element in numberOfArrayElements* + for each face in numberOfFaces + for each z_slice in pixelDepth* + for each row or row_of_blocks in pixelHeight* + for each pixel or block_of_pixels in pixelWidth + Byte data[format-specific-number-of-bytes]** + end + end + end + Byte cubePadding[0-3] + end + end + Byte mipPadding[3 - ((imageSize + 3) % 4)] + end + */ + +} // nv namespace + +#endif // NV_IMAGE_KTXFILE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.cpp @@ -0,0 +1,83 @@ +// This code is in the public domain -- Ignacio Castaño + +#include "KtxFile.h" + +using namespace nv; + +static const uint8 fileIdentifier[12] = { + 0xAB, 0x4B, 0x54, 0x58, + 0x20, 0x31, 0x31, 0xBB, + 0x0D, 0x0A, 0x1A, 0x0A +}; + + +KtxHeader::KtxHeader() { + memcpy(identifier, fileIdentifier, 12); + + endianness = 0x04030201; + + glType = 0; + glTypeSize = 1; + glFormat = 0; + glInternalFormat = KTX_RGBA; + glBaseInternalFormat = KTX_RGBA; + pixelWidth = 0; + pixelHeight = 0; + pixelDepth = 0; + numberOfArrayElements = 0; + numberOfFaces = 1; + numberOfMipmapLevels = 0; + bytesOfKeyValueData = 0; +} + + +Stream & operator<< (Stream & s, DDSHeader & header) { + s.serialize(header.identifier, 12); + s << header.endiannes << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat; + s << header.pixelWidth << header.pixelHeight << header.pixelDepth; + s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels; + s << header.bytesOfKeyValueData; + return s; +} + + +KtxFile::KtxFile() { +} +KtxFile::~KtxFile() { +} + +void KtxFile::addKeyValue(const char * key, const char * value) { + keyArray.append(key); + valueArray.append(value); + bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1; +} + + +Stream & operator<< (Stream & s, KtxFile & file) { + s << header; + + if (s.isSaving()) { + + int keyValueCount = keyArray.count(); + for (int i = 0; i < keyValueCount; i++) { + const String & key = keyArray[i]; + const String & value = valueArray[i]; + uint keySize = key.length() + 1; + uint valueSize = value.length() + 1; + uint keyValueSize = keySize + valueSize; + + s << keyValueSize; + + s.serialize(key.str(), keySize); + s.serialize(value.str(), valueSize); + } + } + else { + // @@ Read key value pairs. + } + + return s; +} + + + Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMap.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMap.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMap.h @@ -21,12 +21,14 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. +#pragma once #ifndef NV_IMAGE_NORMALMAP_H #define NV_IMAGE_NORMALMAP_H -#include -#include -#include +#include "nvimage.h" +#include "FloatImage.h" + +#include "nvmath/Vector.h" namespace nv @@ -41,11 +43,13 @@ NormalMapFilter_Sobel9x9, // very large }; - FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); + // @@ These two functions should be deprecated: + NVIMAGE_API FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); + NVIMAGE_API FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); - FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); + NVIMAGE_API FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights); - void normalizeNormalMap(FloatImage * img); + NVIMAGE_API void normalizeNormalMap(FloatImage * img); // @@ Add generation of DU/DV maps. Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMap.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMap.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMap.cpp @@ -21,14 +21,17 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include -#include -#include -#include +#include "NormalMap.h" +#include "Filter.h" +#include "FloatImage.h" +#include "Image.h" -#include +#include "nvmath/Color.inl" +#include "nvmath/Vector.h" -#include +#include "nvcore/Ptr.h" + +#include // memcpy using namespace nv; @@ -36,106 +39,170 @@ // Create normal map using the given kernels. static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv) { - nvCheck(kdu != NULL); - nvCheck(kdv != NULL); - nvCheck(img != NULL); - - const uint w = img->width(); - const uint h = img->height(); - - AutoPtr fimage(new FloatImage()); - fimage->allocate(4, w, h); - - // Compute height and store in alpha channel: - float * alphaChannel = fimage->channel(3); - for(uint i = 0; i < w*h; i++) - { - Vector4 color = toVector4(img->pixel(i)); - alphaChannel[i] = dot(color, heightWeights); - } - - float heightScale = 1.0f / 16.0f; // @@ Use a user defined factor. - - for(uint y = 0; y < h; y++) - { - for(uint x = 0; x < w; x++) - { - const float du = fimage->applyKernel(kdu, x, y, 3, wm); - const float dv = fimage->applyKernel(kdv, x, y, 3, wm); - - Vector3 n = normalize(Vector3(du, dv, heightScale)); - - fimage->setPixel(0.5f * n.x() + 0.5f, x, y, 0); - fimage->setPixel(0.5f * n.y() + 0.5f, x, y, 1); - fimage->setPixel(0.5f * n.z() + 0.5f, x, y, 2); - } - } - - return fimage.release(); + nvDebugCheck(kdu != NULL); + nvDebugCheck(kdv != NULL); + nvDebugCheck(img != NULL); + + const uint w = img->width(); + const uint h = img->height(); + + AutoPtr fimage(new FloatImage()); + fimage->allocate(4, w, h); + + // Compute height and store in alpha channel: + float * alphaChannel = fimage->channel(3); + for(uint i = 0; i < w * h; i++) + { + Vector4 color = toVector4(img->pixel(i)); + alphaChannel[i] = dot(color, heightWeights); + } + + float heightScale = 1.0f / 16.0f; // @@ Use a user defined factor. + + for(uint y = 0; y < h; y++) + { + for(uint x = 0; x < w; x++) + { + const float du = fimage->applyKernelXY(kdu, x, y, 0, 3, wm); + const float dv = fimage->applyKernelXY(kdv, x, y, 0, 3, wm); + + Vector3 n = normalize(Vector3(du, dv, heightScale)); + + fimage->pixel(0, x, y, 0) = 0.5f * n.x + 0.5f; + fimage->pixel(1, x, y, 0) = 0.5f * n.y + 0.5f; + fimage->pixel(2, x, y, 0) = 0.5f * n.z + 0.5f; + } + } + + return fimage.release(); +} + + +// Create normal map using the given kernels. +static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv) +{ + nvDebugCheck(kdu != NULL); + nvDebugCheck(kdv != NULL); + nvDebugCheck(img != NULL); + +#pragma NV_MESSAGE("FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.") + const float heightScale = 1.0f / 16.0f; + + const uint w = img->width(); + const uint h = img->height(); + + AutoPtr img_out(new FloatImage()); + img_out->allocate(4, w, h); + + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + const float du = img->applyKernelXY(kdu, x, y, 0, 3, wm); + const float dv = img->applyKernelXY(kdv, x, y, 0, 3, wm); + + Vector3 n = normalize(Vector3(du, dv, heightScale)); + + img_out->pixel(0, x, y, 0) = n.x; + img_out->pixel(1, x, y, 0) = n.y; + img_out->pixel(2, x, y, 0) = n.z; + } + } + + // Copy alpha channel. + /*for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + + img_out->pixel(3, x, y, 0) = img->pixel(3, x, y, 0); + } + }*/ + memcpy(img_out->channel(3), img->channel(3), w * h * sizeof(float)); + + return img_out.release(); } /// Create normal map using the given filter. FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/) { - nvCheck(img != NULL); - - // Init the kernels. - Kernel2 * kdu = NULL; - Kernel2 * kdv = NULL; - - switch(filter) - { - case NormalMapFilter_Sobel3x3: - kdu = new Kernel2(3); - break; - case NormalMapFilter_Sobel5x5: - kdu = new Kernel2(5); - break; - case NormalMapFilter_Sobel7x7: - kdu = new Kernel2(7); - break; - case NormalMapFilter_Sobel9x9: - kdu = new Kernel2(9); - break; - default: - nvDebugCheck(false); - }; + nvDebugCheck(img != NULL); + + // Init the kernels. + Kernel2 * kdu = NULL; + Kernel2 * kdv = NULL; + + switch(filter) + { + case NormalMapFilter_Sobel3x3: + kdu = new Kernel2(3); + break; + case NormalMapFilter_Sobel5x5: + kdu = new Kernel2(5); + break; + case NormalMapFilter_Sobel7x7: + kdu = new Kernel2(7); + break; + case NormalMapFilter_Sobel9x9: + kdu = new Kernel2(9); + break; + default: + nvDebugCheck(false); + }; - kdu->initSobel(); - kdu->normalize(); + kdu->initSobel(); + kdu->normalize(); - kdv = new Kernel2(*kdu); - kdv->transpose(); + kdv = new Kernel2(*kdu); + kdv->transpose(); - return ::createNormalMap(img, wm, heightWeights, kdu, kdv); + return ::createNormalMap(img, wm, heightWeights, kdu, kdv); } /// Create normal map combining multiple sobel filters. FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights) { - nvCheck(img != NULL); + nvDebugCheck(img != NULL); + + Kernel2 * kdu = NULL; + Kernel2 * kdv = NULL; + + kdu = new Kernel2(9); + kdu->initBlendedSobel(filterWeights); + kdu->normalize(); - Kernel2 * kdu = NULL; - Kernel2 * kdv = NULL; + kdv = new Kernel2(*kdu); + kdv->transpose(); - kdu = new Kernel2(9); - kdu->initBlendedSobel(filterWeights); - kdu->normalize(); - - kdv = new Kernel2(*kdu); - kdv->transpose(); - - return ::createNormalMap(img, wm, heightWeights, kdu, kdv); + return ::createNormalMap(img, wm, heightWeights, kdu, kdv); } + +FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights) +{ + nvDebugCheck(img != NULL); + + Kernel2 * kdu = NULL; + Kernel2 * kdv = NULL; + + kdu = new Kernel2(9); + kdu->initBlendedSobel(filterWeights); + kdu->normalize(); + + kdv = new Kernel2(*kdu); + kdv->transpose(); + + return ::createNormalMap(img, wm, kdu, kdv); +} + + /// Normalize the given image in place. void nv::normalizeNormalMap(FloatImage * img) { - nvCheck(img != NULL); - img->expandNormals(0); - img->normalize(0); - img->packNormals(0); + nvDebugCheck(img != NULL); + + img->normalize(0); } Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMipmap.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMipmap.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMipmap.h @@ -1,17 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_IMAGE_NORMALMIPMAP_H -#define NV_IMAGE_NORMALMIPMAP_H - -#include - - -namespace nv -{ - class FloatImage; - - FloatImage * createNormalMipmapMap(const FloatImage * img); - -} // nv namespace - -#endif // NV_IMAGE_NORMALMIPMAP_H Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMipmap.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMipmap.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/NormalMipmap.cpp @@ -1,98 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include - -#include -#include - -#include -#include - -using namespace nv; - -FloatImage * nv::createNormalMipmapMap(const FloatImage * img) -{ - nvDebugCheck(img != NULL); - - uint w = img->width(); - uint h = img->height(); - - uint hw = w / 2; - uint hh = h / 2; - - FloatImage dotImg; - dotImg.allocate(1, w, h); - - FloatImage shImg; - shImg.allocate(9, hw, hh); - - SampleDistribution distribution(256); - const uint sampleCount = distribution.sampleCount(); - - for (uint d = 0; d < sampleCount; d++) - { - const float * xChannel = img->channel(0); - const float * yChannel = img->channel(1); - const float * zChannel = img->channel(2); - - Vector3 dir = distribution.sampleDir(d); - - Sh2 basis; - basis.eval(dir); - - for(uint i = 0; i < w*h; i++) - { - Vector3 normal(xChannel[i], yChannel[i], zChannel[i]); - normal = normalizeSafe(normal, Vector3(zero), 0.0f); - - dotImg.setPixel(dot(dir, normal), d); - } - - // @@ It would be nice to have a fastDownSample that took an existing image as an argument, to avoid allocations. - AutoPtr dotMip(dotImg.fastDownSample()); - - for(uint p = 0; p < hw*hh; p++) - { - float f = dotMip->pixel(p); - - // Project irradiance to sh basis and accumulate. - for (uint i = 0; i < 9; i++) - { - float & sum = shImg.channel(i)[p]; - sum += f * basis.elemAt(i); - } - } - } - - - - FloatImage * normalMipmap = new FloatImage; - normalMipmap->allocate(4, hw, hh); - - // Precompute the clamped cosine radiance transfer. - Sh2 prt; - prt.cosineTransfer(); - - // Allocate outside the loop. - Sh2 sh; - - for(uint p = 0; p < hw*hh; p++) - { - for (uint i = 0; i < 9; i++) - { - sh.elemAt(i) = shImg.channel(i)[p]; - } - - // Convolve sh irradiance by radiance transfer. - sh *= prt; - - // Now sh(0) is the ambient occlusion. - // and sh(1) is the normal direction. - - // Should we use SVD to fit only the normals to the SH? - - } - - return normalMipmap; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/PixelFormat.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/PixelFormat.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/PixelFormat.h @@ -21,60 +21,96 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. +#pragma once #ifndef NV_IMAGE_PIXELFORMAT_H #define NV_IMAGE_PIXELFORMAT_H -#include +#include "nvimage.h" namespace nv { - namespace PixelFormat - { + namespace PixelFormat + { - // Convert component @a c having @a inbits to the returned value having @a outbits. - inline uint convert(uint c, uint inbits, uint outbits) - { - if (inbits == 0) - { - return 0; - } - else if (inbits >= outbits) - { - // truncate - return c >> (inbits - outbits); - } - else - { - // bitexpand - return (c << (outbits - inbits)) | convert(c, inbits, outbits - inbits); - } - } - - // Get pixel component shift and size given its mask. - inline void maskShiftAndSize(uint mask, uint * shift, uint * size) - { - if (!mask) - { - *shift = 0; - *size = 0; - return; - } - - *shift = 0; - while((mask & 1) == 0) { - ++(*shift); - mask >>= 1; - } - - *size = 0; - while((mask & 1) == 1) { - ++(*size); - mask >>= 1; - } - } + // Convert component @a c having @a inbits to the returned value having @a outbits. + inline uint convert(uint c, uint inbits, uint outbits) + { + if (inbits == 0) + { + return 0; + } + else if (inbits >= outbits) + { + // truncate + return c >> (inbits - outbits); + } + else + { + // bitexpand + return (c << (outbits - inbits)) | convert(c, inbits, outbits - inbits); + } + } + + // Get pixel component shift and size given its mask. + inline void maskShiftAndSize(uint mask, uint * shift, uint * size) + { + if (!mask) + { + *shift = 0; + *size = 0; + return; + } + + *shift = 0; + while((mask & 1) == 0) { + ++(*shift); + mask >>= 1; + } + + *size = 0; + while((mask & 1) == 1) { + ++(*size); + mask >>= 1; + } + } + + inline float quantizeCeil(float f, int inbits, int outbits) + { + nvDebugCheck(f >= 0.0f && f <= 1.0f); + //uint i = f * (float(1 << inbits) - 1); + //i = convert(i, inbits, outbits); + //float result = float(i) / (float(1 << outbits) - 1); + //nvCheck(result >= f); + float result; + int offset = 0; + do { + uint i = offset + uint(f * (float(1 << inbits) - 1)); + i = convert(i, inbits, outbits); + result = float(i) / (float(1 << outbits) - 1); + offset++; + } while (result < f); + + return result; + } + + /* + inline float quantizeRound(float f, int bits) + { + nvDebugCheck(f >= 0.0f && f <= 1.0f); + float scale = float(1 << bits); + return fround(f * scale) / scale; + } + + inline float quantizeFloor(float f, int bits) + { + nvDebugCheck(f >= 0.0f && f <= 1.0f); + float scale = float(1 << bits); + return floor(f * scale) / scale; + } + */ - } // PixelFormat namespace + } // PixelFormat namespace } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/PsdFile.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/PsdFile.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/PsdFile.h @@ -1,69 +1,70 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_IMAGE_PSDFILE_H #define NV_IMAGE_PSDFILE_H -#include +#include "nvcore/Stream.h" namespace nv { - enum PsdColorMode - { - PsdColorMode_Bitmap = 0, - PsdColorMode_GrayScale = 1, - PsdColorMode_Indexed = 2, - PsdColorMode_RGB = 3, - PsdColorMode_CMYK = 4, - PsdColorMode_MultiChannel = 7, - PsdColorMode_DuoTone = 8, - PsdColorMode_LabColor = 9 - }; - - /// PSD header. - struct PsdHeader - { - uint32 signature; - uint16 version; - uint8 reserved[6]; - uint16 channel_count; - uint32 height; - uint32 width; - uint16 depth; - uint16 color_mode; - - bool isValid() const - { - return signature == 0x38425053; // '8BPS' - } - - bool isSupported() const - { - if (version != 1) { - nvDebug("*** bad version number %u\n", version); - return false; - } - if (channel_count > 4) { - return false; - } - if (depth != 8) { - return false; - } - if (color_mode != PsdColorMode_RGB) { - return false; - } - return true; - } - }; - - - inline Stream & operator<< (Stream & s, PsdHeader & head) - { - s << head.signature << head.version; - for (int i = 0; i < 6; i++) { - s << head.reserved[i]; - } - return s << head.channel_count << head.height << head.width << head.depth << head.color_mode; - } + enum PsdColorMode + { + PsdColorMode_Bitmap = 0, + PsdColorMode_GrayScale = 1, + PsdColorMode_Indexed = 2, + PsdColorMode_RGB = 3, + PsdColorMode_CMYK = 4, + PsdColorMode_MultiChannel = 7, + PsdColorMode_DuoTone = 8, + PsdColorMode_LabColor = 9 + }; + + /// PSD header. + struct PsdHeader + { + uint32 signature; + uint16 version; + uint8 reserved[6]; + uint16 channel_count; + uint32 height; + uint32 width; + uint16 depth; + uint16 color_mode; + + bool isValid() const + { + return signature == 0x38425053; // '8BPS' + } + + bool isSupported() const + { + if (version != 1) { + nvDebug("*** bad version number %u\n", version); + return false; + } + if (channel_count > 4) { + return false; + } + if (depth != 8) { // @@ Add support for 16 bit depths. + return false; + } + if (color_mode != PsdColorMode_RGB) { + return false; + } + return true; + } + }; + + + inline Stream & operator<< (Stream & s, PsdHeader & head) + { + s << head.signature << head.version; + for (int i = 0; i < 6; i++) { + s << head.reserved[i]; + } + return s << head.channel_count << head.height << head.width << head.depth << head.color_mode; + } } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/Quantize.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/Quantize.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/Quantize.h @@ -1,9 +1,10 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_IMAGE_QUANTIZE_H #define NV_IMAGE_QUANTIZE_H -#include +#include "nvimage.h" namespace nv Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/Quantize.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/Quantize.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/Quantize.cpp @@ -12,13 +12,16 @@ @@ This code needs to be reviewed, I'm not sure it's correct. */ -#include -#include -#include +#include "Quantize.h" +#include "Image.h" +#include "PixelFormat.h" -#include +#include "nvmath/Color.h" +#include "nvmath/Vector.inl" -#include // swap +#include "nvcore/Utils.h" // swap + +#include // memset using namespace nv; @@ -82,8 +85,8 @@ memset(row0, 0, sizeof(float)*(w+2)); memset(row1, 0, sizeof(float)*(w+2)); - for(uint y = 0; y < h; y++) { - for(uint x = 0; x < w; x++) { + for (uint y = 0; y < h; y++) { + for (uint x = 0; x < w; x++) { Color32 pixel = image->pixel(x, y); @@ -91,7 +94,7 @@ int alpha = int(pixel.a) + int(row0[1+x]); // Convert color. - if( alpha > alpha_threshold ) pixel.a = 255; + if (alpha > alpha_threshold) pixel.a = 255; else pixel.a = 0; // Store color. @@ -174,10 +177,10 @@ Color32 pixel = image->pixel(x, y); // Add error. - pixel.r = clamp(int(pixel.r) + int(row0[1+x].x()), 0, 255); - pixel.g = clamp(int(pixel.g) + int(row0[1+x].y()), 0, 255); - pixel.b = clamp(int(pixel.b) + int(row0[1+x].z()), 0, 255); - pixel.a = clamp(int(pixel.a) + int(row0[1+x].w()), 0, 255); + pixel.r = clamp(int(pixel.r) + int(row0[1+x].x), 0, 255); + pixel.g = clamp(int(pixel.g) + int(row0[1+x].y), 0, 255); + pixel.b = clamp(int(pixel.b) + int(row0[1+x].z), 0, 255); + pixel.a = clamp(int(pixel.a) + int(row0[1+x].w), 0, 255); int r = pixel.r; int g = pixel.g; Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/TgaFile.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/TgaFile.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/TgaFile.h @@ -1,9 +1,10 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_IMAGE_TGAFILE_H #define NV_IMAGE_TGAFILE_H -#include +#include "nvcore/Stream.h" namespace nv { Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/ValveTextureFormat.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/ValveTextureFormat.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/ValveTextureFormat.h @@ -0,0 +1,122 @@ + +/* +For more info: +http://developer.valvesoftware.com/wiki/VTF + +File Layout: + VTF Header + VTF Low Resolution Image Data + For Each Mipmap (Smallest to Largest) + For Each Frame (First to Last) + For Each Face (First to Last) + For Each Z Slice (Min to Max; Varies with Mipmap) + VTF High Resolution Image Data + + +*/ + + +enum +{ + IMAGE_FORMAT_NONE = -1, + IMAGE_FORMAT_RGBA8888 = 0, + IMAGE_FORMAT_ABGR8888, + IMAGE_FORMAT_RGB888, + IMAGE_FORMAT_BGR888, + IMAGE_FORMAT_RGB565, + IMAGE_FORMAT_I8, + IMAGE_FORMAT_IA88, + IMAGE_FORMAT_P8, + IMAGE_FORMAT_A8, + IMAGE_FORMAT_RGB888_BLUESCREEN, + IMAGE_FORMAT_BGR888_BLUESCREEN, + IMAGE_FORMAT_ARGB8888, + IMAGE_FORMAT_BGRA8888, + IMAGE_FORMAT_DXT1, + IMAGE_FORMAT_DXT3, + IMAGE_FORMAT_DXT5, + IMAGE_FORMAT_BGRX8888, + IMAGE_FORMAT_BGR565, + IMAGE_FORMAT_BGRX5551, + IMAGE_FORMAT_BGRA4444, + IMAGE_FORMAT_DXT1_ONEBITALPHA, + IMAGE_FORMAT_BGRA5551, + IMAGE_FORMAT_UV88, + IMAGE_FORMAT_UVWQ8888, + IMAGE_FORMAT_RGBA16161616F, + IMAGE_FORMAT_RGBA16161616, + IMAGE_FORMAT_UVLX8888, + IMAGE_FORMAT_R32F, //!< = Luminance - 32 bpp + IMAGE_FORMAT_RGB323232F, //!< = Red, Green, Blue - 96 bpp + IMAGE_FORMAT_RGBA32323232F, //!< = Red, Green, Blue, Alpha - 128 bpp + IMAGE_FORMAT_NV_DST16, + IMAGE_FORMAT_NV_DST24, + IMAGE_FORMAT_NV_INTZ, + IMAGE_FORMAT_NV_RAWZ, + IMAGE_FORMAT_ATI_DST16, + IMAGE_FORMAT_ATI_DST24, + IMAGE_FORMAT_NV_NULL, + IMAGE_FORMAT_ATI2N, + IMAGE_FORMAT_ATI1N, +}; + + +enum +{ + TEXTUREFLAGS_POINTSAMPLE = 0x00000001, + TEXTUREFLAGS_TRILINEAR = 0x00000002, + TEXTUREFLAGS_CLAMPS = 0x00000004, + TEXTUREFLAGS_CLAMPT = 0x00000008, + TEXTUREFLAGS_ANISOTROPIC = 0x00000010, + TEXTUREFLAGS_HINT_DXT5 = 0x00000020, + TEXTUREFLAGS_NOCOMPRESS = 0x00000040, + TEXTUREFLAGS_NORMAL = 0x00000080, + TEXTUREFLAGS_NOMIP = 0x00000100, + TEXTUREFLAGS_NOLOD = 0x00000200, + TEXTUREFLAGS_MINMIP = 0x00000400, + TEXTUREFLAGS_PROCEDURAL = 0x00000800, + TEXTUREFLAGS_ONEBITALPHA = 0x00001000, + TEXTUREFLAGS_EIGHTBITALPHA = 0x00002000, + TEXTUREFLAGS_ENVMAP = 0x00004000, + TEXTUREFLAGS_RENDERTARGET = 0x00008000, + TEXTUREFLAGS_DEPTHRENDERTARGET = 0x00010000, + TEXTUREFLAGS_NODEBUGOVERRIDE = 0x00020000, + TEXTUREFLAGS_SINGLECOPY = 0x00040000, + TEXTUREFLAGS_ONEOVERMIPLEVELINALPHA = 0x00080000, + TEXTUREFLAGS_PREMULTCOLORBYONEOVERMIPLEVEL = 0x00100000, + TEXTUREFLAGS_NORMALTODUDV = 0x00200000, + TEXTUREFLAGS_ALPHATESTMIPGENERATION = 0x00400000, + TEXTUREFLAGS_NODEPTHBUFFER = 0x00800000, + TEXTUREFLAGS_NICEFILTERED = 0x01000000, + TEXTUREFLAGS_CLAMPU = 0x02000000 +}; + + +struct VtfHeader +{ + char signature[4]; // File signature ("VTF\0"). + uint32 version[2]; // version[0].version[1] (currently 7.2). + uint32 headerSize; // Size of the header struct (16 byte aligned; currently 80 bytes). + + // 7.0 + uint16 width; // Width of the largest mipmap in pixels. Must be a power of 2. + uint16 height; // Height of the largest mipmap in pixels. Must be a power of 2. + uint32 flags; // VTF flags. + uint16 frames; // Number of frames, if animated (1 for no animation). + uint16 firstFrame; // First frame in animation (0 based). + uint8 padding0[4]; // reflectivity padding (16 byte alignment). + float reflectivity[3]; // reflectivity vector. + uint8 padding1[4]; // reflectivity padding (8 byte packing). + float bumpmapScale; // Bumpmap scale. + uint32 highResImageFormat; // High resolution image format. + uint8 mipmapCount; // Number of mipmaps. + uint32 lowResImageFormat; // Low resolution image format (always DXT1). + uint8 lowResImageWidth; // Low resolution image width. + uint8 lowResImageHeight; // Low resolution image height. + + // 7.2 + uint16 depth; // Depth of the largest mipmap in pixels. + // Must be a power of 2. Can be 0 or 1 for a 2D texture (v7.2 only). +}; + + Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/nvimage.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/nvimage.h +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/nvimage.h @@ -1,9 +1,12 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_IMAGE_H #define NV_IMAGE_H -#include +#include "nvcore/nvcore.h" +#include "nvcore/Debug.h" // nvDebugCheck +#include "nvcore/Utils.h" // isPowerOfTwo // Function linkage #if NVIMAGE_SHARED @@ -19,4 +22,27 @@ #define NVIMAGE_CLASS #endif + +namespace nv { + + // Some utility functions: + + inline uint computeBitPitch(uint w, uint bitsize, uint alignmentInBits) + { + nvDebugCheck(isPowerOfTwo(alignmentInBits)); + + return ((w * bitsize + alignmentInBits - 1) / alignmentInBits) * alignmentInBits; + } + + inline uint computeBytePitch(uint w, uint bitsize, uint alignmentInBytes) + { + uint pitch = computeBitPitch(w, bitsize, 8*alignmentInBytes); + nvDebugCheck((pitch & 7) == 0); + + return (pitch + 7) / 8; + } + + +} // nv namespace + #endif // NV_IMAGE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Basis.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Basis.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Basis.h @@ -1,78 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_MATH_BASIS_H -#define NV_MATH_BASIS_H - -#include -#include -#include - -namespace nv -{ - - /// Basis class to compute tangent space basis, ortogonalizations and to - /// transform vectors from one space to another. - struct Basis - { - /// Create a null basis. - Basis() : tangent(0, 0, 0), bitangent(0, 0, 0), normal(0, 0, 0) {} - - /// Create a basis given three vectors. - Basis(Vector3::Arg n, Vector3::Arg t, Vector3::Arg b) : tangent(t), bitangent(b), normal(n) {} - - /// Create a basis with the given tangent vectors and the handness. - Basis(Vector3::Arg n, Vector3::Arg t, float sign) - { - build(n, t, sign); - } - - NVMATH_API void normalize(float epsilon = NV_EPSILON); - NVMATH_API void orthonormalize(float epsilon = NV_EPSILON); - NVMATH_API void robustOrthonormalize(float epsilon = NV_EPSILON); - NVMATH_API void buildFrameForDirection(Vector3::Arg d); - - /// Calculate the determinant [ F G N ] to obtain the handness of the basis. - float handness() const - { - return determinant() > 0.0f ? 1.0f : -1.0f; - } - - /// Build a basis from 2 vectors and a handness flag. - void build(Vector3::Arg n, Vector3::Arg t, float sign) - { - normal = n; - tangent = t; - bitangent = sign * cross(t, n); - } - - /// Compute the determinant of this basis. - float determinant() const - { - return - tangent.x() * bitangent.y() * normal.z() - tangent.z() * bitangent.y() * normal.x() + - tangent.y() * bitangent.z() * normal.x() - tangent.y() * bitangent.x() * normal.z() + - tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y(); - } - - /* - // Get transform matrix for this basis. - NVMATH_API Matrix matrix() const; - - // Transform by this basis. (From this basis to object space). - NVMATH_API Vector3 transform(Vector3::Arg v) const; - - // Transform by the transpose. (From object space to this basis). - NVMATH_API Vector3 transformT(Vector3::Arg v); - - // Transform by the inverse. (From object space to this basis). - NVMATH_API Vector3 transformI(Vector3::Arg v) const; - */ - - Vector3 tangent; - Vector3 bitangent; - Vector3 normal; - }; - -} // nv namespace - -#endif // NV_MATH_BASIS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Basis.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Basis.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Basis.cpp @@ -1,173 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include - -using namespace nv; - - -/// Normalize basis vectors. -void Basis::normalize(float epsilon /*= NV_EPSILON*/) -{ - normal = ::normalize(normal, epsilon); - tangent = ::normalize(tangent, epsilon); - bitangent = ::normalize(bitangent, epsilon); -} - - -/// Gram-Schmidt orthogonalization. -/// @note Works only if the vectors are close to orthogonal. -void Basis::orthonormalize(float epsilon /*= NV_EPSILON*/) -{ - // N' = |N| - // T' = |T - (N' dot T) N'| - // B' = |B - (N' dot B) N' - (T' dot B) T'| - - normal = ::normalize(normal, epsilon); - - tangent -= normal * dot(normal, tangent); - tangent = ::normalize(tangent, epsilon); - - bitangent -= normal * dot(normal, bitangent); - bitangent -= tangent * dot(tangent, bitangent); - bitangent = ::normalize(bitangent, epsilon); -} - - -/// Robust orthonormalization. -/// Returns an orthonormal basis even when the original is degenerate. -void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/) -{ - if (length(normal) < epsilon) - { - normal = cross(tangent, bitangent); - - if (length(normal) < epsilon) - { - tangent = Vector3(1, 0, 0); - bitangent = Vector3(0, 1, 0); - normal = Vector3(0, 0, 1); - return; - } - } - normal = ::normalize(normal, epsilon); - - tangent -= normal * dot(normal, tangent); - bitangent -= normal * dot(normal, bitangent); - - if (length(tangent) < epsilon) - { - if (length(bitangent) < epsilon) - { - buildFrameForDirection(normal); - } - else - { - tangent = cross(bitangent, normal); - nvCheck(isNormalized(tangent, epsilon)); - } - } - else - { - tangent = ::normalize(tangent, epsilon); - bitangent -= tangent * dot(tangent, bitangent); - - if (length(bitangent) < epsilon) - { - bitangent = cross(tangent, normal); - nvCheck(isNormalized(bitangent)); - } - else - { - tangent = ::normalize(tangent, epsilon); - } - } - - // Check vector lengths. - nvCheck(isNormalized(normal, epsilon)); - nvCheck(isNormalized(tangent, epsilon)); - nvCheck(isNormalized(bitangent, epsilon)); - - // Check vector angles. - nvCheck(equal(dot(normal, tangent), 0.0f, epsilon)); - nvCheck(equal(dot(normal, bitangent), 0.0f, epsilon)); - nvCheck(equal(dot(tangent, bitangent), 0.0f, epsilon)); - - // Check vector orientation. - const float det = dot(cross(normal, tangent), bitangent); - nvCheck(equal(det, 1.0f, epsilon) || equal(det, -1.0f, epsilon)); -} - - -/// Build an arbitrary frame for the given direction. -void Basis::buildFrameForDirection(Vector3::Arg d) -{ - nvCheck(isNormalized(d)); - normal = d; - - // Choose minimum axis. - if (fabsf(normal.x()) < fabsf(normal.y()) && fabsf(normal.x()) < fabsf(normal.z())) - { - tangent = Vector3(1, 0, 0); - } - else if (fabsf(normal.y()) < fabsf(normal.z())) - { - tangent = Vector3(0, 1, 0); - } - else - { - tangent = Vector3(0, 0, 1); - } - - // Ortogonalize - tangent -= normal * dot(normal, tangent); - tangent = ::normalize(tangent); - - bitangent = cross(normal, tangent); -} - - - -/* -/// Transform by this basis. (From this basis to object space). -Vector3 Basis::transform(Vector3::Arg v) const -{ - Vector3 o = tangent * v.x(); - o += bitangent * v.y(); - o += normal * v.z(); - return o; -} - -/// Transform by the transpose. (From object space to this basis). -Vector3 Basis::transformT(Vector3::Arg v) -{ - return Vector3(dot(tangent, v), dot(bitangent, v), dot(normal, v)); -} - -/// Transform by the inverse. (From object space to this basis). -/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned. -Vector3 Basis::transformI(Vector3::Arg v) const -{ - const float det = determinant(); - nvCheck(!equalf(det, 0.0f)); - - const float idet = 1.0f / det; - - // Rows of the inverse matrix. - Vector3 r0, r1, r2; - r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet; - r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet; - r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet; - - r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet; - r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet; - r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet; - - r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet; - r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet; - r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet; - - return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)); -} -*/ - - Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Box.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Box.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Box.h @@ -1,138 +1,101 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_MATH_BOX_H #define NV_MATH_BOX_H -#include +#include "Vector.h" #include // FLT_MAX namespace nv { + class Vector; + class Stream; + class Sphere; + + // Axis Aligned Bounding Box. + class Box + { + public: + + inline Box() {} + inline Box(const Box & b) : minCorner(b.minCorner), maxCorner(b.maxCorner) {} + inline Box(const Vector3 & mins, const Vector3 & maxs) : minCorner(mins), maxCorner(maxs) {} + + Box & operator=(const Box & b); + + operator const float * () const { return reinterpret_cast(this); } + + // Clear the bounds. + void clearBounds(); + + // min < max + bool isValid() const; + + // Build a cube centered on center and with edge = 2*dist + void cube(const Vector3 & center, float dist); + + // Build a box, given center and extents. + void setCenterExtents(const Vector3 & center, const Vector3 & extents); + + // Get box center. + Vector3 center() const; + + // Return extents of the box. + Vector3 extents() const; + + // Return extents of the box. + float extents(uint axis) const; + + // Add a point to this box. + void addPointToBounds(const Vector3 & p); + + // Add a box to this box. + void addBoxToBounds(const Box & b); + + // Add sphere to this box. + void addSphereToBounds(const Vector3 & p, float r); + + // Translate box. + void translate(const Vector3 & v); + + // Scale the box. + void scale(float s); + + // Expand the box by a fixed amount. + void expand(float r); + + // Get the area of the box. + float area() const; + + // Get the volume of the box. + float volume() const; + + // Return true if the box contains the given point. + bool contains(const Vector3 & p) const; + + // Split the given box in 8 octants and assign the ith one to this box. + void setOctant(const Box & box, const Vector3 & center, int i); + + + // Clip the given segment against this box. + bool clipSegment(const Vector3 & origin, const Vector3 & dir, float * t_near, float * t_far) const; -/// Axis Aligned Bounding Box. -class Box -{ -public: - /// Default ctor. - Box() { }; + friend Stream & operator<< (Stream & s, Box & box); - /// Copy ctor. - Box( const Box & b ) : m_mins(b.m_mins), m_maxs(b.m_maxs) { } + const Vector3 & corner(int i) const { return (&minCorner)[i]; } - /// Init ctor. - Box( Vector3::Arg mins, Vector3::Arg maxs ) : m_mins(mins), m_maxs(maxs) { } - - // Cast operators. - operator const float * () const { return reinterpret_cast(this); } - - /// Min corner of the box. - Vector3 mins() const { return m_mins; } - - /// Max corner of the box. - Vector3 maxs() const { return m_maxs; } - - /// Clear the bounds. - void clearBounds() - { - m_mins.set(FLT_MAX, FLT_MAX, FLT_MAX); - m_maxs.set(-FLT_MAX, -FLT_MAX, -FLT_MAX); - } - - /// Build a cube centered on center and with edge = 2*dist - void cube(Vector3::Arg center, float dist) - { - setCenterExtents(center, Vector3(dist, dist, dist)); - } - - /// Build a box, given center and extents. - void setCenterExtents(Vector3::Arg center, Vector3::Arg extents) - { - m_mins = center - extents; - m_maxs = center + extents; - } - - /// Get box center. - Vector3 center() const - { - return (m_mins + m_maxs) * 0.5f; - } - - /// Return extents of the box. - Vector3 extents() const - { - return (m_maxs - m_mins) * 0.5f; - } - - /// Return extents of the box. - scalar extents(uint axis) const - { - nvDebugCheck(axis < 3); - if (axis == 0) return (m_maxs.x() - m_mins.x()) * 0.5f; - if (axis == 1) return (m_maxs.y() - m_mins.y()) * 0.5f; - if (axis == 2) return (m_maxs.z() - m_mins.z()) * 0.5f; - nvAssume(false); - return 0.0f; - } - - /// Add a point to this box. - void addPointToBounds(Vector3::Arg p) - { - m_mins = min(m_mins, p); - m_maxs = max(m_maxs, p); - } - - /// Add a box to this box. - void addBoxToBounds(const Box & b) - { - m_mins = min(m_mins, b.m_mins); - m_maxs = max(m_maxs, b.m_maxs); - } - - /// Translate box. - void translate(Vector3::Arg v) - { - m_mins += v; - m_maxs += v; - } - - /// Scale the box. - void scale(float s) - { - m_mins *= s; - m_maxs *= s; - } - - /// Get the area of the box. - float area() const - { - const Vector3 d = extents(); - return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z()); - } - - /// Get the volume of the box. - float volume() const - { - Vector3 d = extents(); - return 8.0f * (d.x() * d.y() * d.z()); - } - - /// Return true if the box contains the given point. - bool contains(Vector3::Arg p) const - { - return - m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() && - m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z(); - } - -private: - - Vector3 m_mins; - Vector3 m_maxs; -}; + Vector3 minCorner; + Vector3 maxCorner; + }; + float distanceSquared(const Box &box, const Vector3 &point); + bool overlap(const Box &box, const Sphere &sphere); + // p is ray origin, id is inverse ray direction. + bool intersect(const Box & box, const Vector3 & p, const Vector3 & id, float * t); } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Box.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Box.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Box.cpp @@ -0,0 +1,119 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Box.h" +#include "Box.inl" +//#include "Sphere.h" + +using namespace nv; + + + + +// Clip the given segment against this box. +bool Box::clipSegment(const Vector3 & origin, const Vector3 & dir, float * t_near, float * t_far) const { + + // Avoid aliasing. + float tnear = *t_near; + float tfar = *t_far; + + // clip ray segment to box + for (int i = 0; i < 3; i++) + { + const float pos = origin.component[i] + tfar * dir.component[i]; + const float dt = tfar - tnear; + + if (dir.component[i] < 0) { + + // clip end point + if (pos < minCorner.component[i]) { + tfar = tnear + dt * (origin.component[i] - minCorner.component[i]) / (origin.component[i] - pos); + } + + // clip start point + if (origin.component[i] > maxCorner.component[i]) { + tnear = tnear + dt * (origin.component[i] - maxCorner.component[i]) / (tfar * dir.component[i]); + } + } + else { + + // clip end point + if (pos > maxCorner.component[i]) { + tfar = tnear + dt * (maxCorner.component[i] - origin.component[i]) / (pos - origin.component[i]); + } + + // clip start point + if (origin.component[i] < minCorner.component[i]) { + tnear = tnear + dt * (minCorner.component[i] - origin.component[i]) / (tfar * dir.component[i]); + } + } + + if (tnear > tfar) { + // Clipped away. + return false; + } + } + + // Return result. + *t_near = tnear; + *t_far = tfar; + return true; +} + + +float nv::distanceSquared(const Box &box, const Vector3 &point) { + Vector3 closest; + + if (point.x < box.minCorner.x) closest.x = box.minCorner.x; + else if (point.x > box.maxCorner.x) closest.x = box.maxCorner.x; + else closest.x = point.x; + + if (point.y < box.minCorner.y) closest.y = box.minCorner.y; + else if (point.y > box.maxCorner.y) closest.y = box.maxCorner.y; + else closest.y = point.y; + + if (point.z < box.minCorner.z) closest.z = box.minCorner.z; + else if (point.z > box.maxCorner.z) closest.z = box.maxCorner.z; + else closest.z = point.z; + + return lengthSquared(point - closest); +} + +/*bool nv::overlap(const Box &box, const Sphere &sphere) { + return distanceSquared(box, sphere.center) < sphere.radius * sphere.radius; +}*/ + + +bool nv::intersect(const Box & box, const Vector3 & p, const Vector3 & id, float * t /*= NULL*/) { + // Precompute these in ray structure? + int sdx = (id.x < 0); + int sdy = (id.y < 0); + int sdz = (id.z < 0); + + float tmin = (box.corner( sdx).x - p.x) * id.x; + float tmax = (box.corner(1-sdx).x - p.x) * id.x; + float tymin = (box.corner( sdy).y - p.y) * id.y; + float tymax = (box.corner(1-sdy).y - p.y) * id.y; + + if ((tmin > tymax) || (tymin > tmax)) + return false; + + if (tymin > tmin) tmin = tymin; + if (tymax < tmax) tmax = tymax; + + float tzmin = (box.corner( sdz).z - p.z) * id.z; + float tzmax = (box.corner(1-sdz).z - p.z) * id.z; + + if ((tmin > tzmax) || (tzmin > tmax)) + return false; + + if (tzmin > tmin) tmin = tzmin; + if (tzmax < tmax) tmax = tzmax; + + if (tmax < 0) + return false; + + if (t != NULL) *t = tmin; + + return true; +} + Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Box.inl =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Box.inl +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Box.inl @@ -0,0 +1,154 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_BOX_INL +#define NV_MATH_BOX_INL + +#include "Box.h" +#include "Vector.inl" + +#include // FLT_MAX + +namespace nv +{ + // Default ctor. + //inline Box::Box() { }; + + // Copy ctor. + //inline Box::Box(const Box & b) : minCorner(b.minCorner), maxCorner(b.maxCorner) { } + + // Init ctor. + //inline Box::Box(const Vector3 & mins, const Vector3 & maxs) : minCorner(mins), maxCorner(maxs) { } + + // Assignment operator. + inline Box & Box::operator=(const Box & b) { minCorner = b.minCorner; maxCorner = b.maxCorner; return *this; } + + // Clear the bounds. + inline void Box::clearBounds() + { + minCorner.set(FLT_MAX, FLT_MAX, FLT_MAX); + maxCorner.set(-FLT_MAX, -FLT_MAX, -FLT_MAX); + } + + // min < max + inline bool Box::isValid() const + { + return minCorner.x <= maxCorner.x && minCorner.y <= maxCorner.y && minCorner.z <= maxCorner.z; + } + + // Build a cube centered on center and with edge = 2*dist + inline void Box::cube(const Vector3 & center, float dist) + { + setCenterExtents(center, Vector3(dist)); + } + + // Build a box, given center and extents. + inline void Box::setCenterExtents(const Vector3 & center, const Vector3 & extents) + { + minCorner = center - extents; + maxCorner = center + extents; + } + + // Get box center. + inline Vector3 Box::center() const + { + return (minCorner + maxCorner) * 0.5f; + } + + // Return extents of the box. + inline Vector3 Box::extents() const + { + return (maxCorner - minCorner) * 0.5f; + } + + // Return extents of the box. + inline float Box::extents(uint axis) const + { + nvDebugCheck(axis < 3); + if (axis == 0) return (maxCorner.x - minCorner.x) * 0.5f; + if (axis == 1) return (maxCorner.y - minCorner.y) * 0.5f; + if (axis == 2) return (maxCorner.z - minCorner.z) * 0.5f; + nvUnreachable(); + return 0.0f; + } + + // Add a point to this box. + inline void Box::addPointToBounds(const Vector3 & p) + { + minCorner = min(minCorner, p); + maxCorner = max(maxCorner, p); + } + + // Add a box to this box. + inline void Box::addBoxToBounds(const Box & b) + { + minCorner = min(minCorner, b.minCorner); + maxCorner = max(maxCorner, b.maxCorner); + } + + // Add sphere to this box. + inline void Box::addSphereToBounds(const Vector3 & p, float r) { + minCorner = min(minCorner, p - Vector3(r)); + maxCorner = min(maxCorner, p + Vector3(r)); + } + + // Translate box. + inline void Box::translate(const Vector3 & v) + { + minCorner += v; + maxCorner += v; + } + + // Scale the box. + inline void Box::scale(float s) + { + minCorner *= s; + maxCorner *= s; + } + + // Expand the box by a fixed amount. + inline void Box::expand(float r) { + minCorner -= Vector3(r,r,r); + maxCorner += Vector3(r,r,r); + } + + // Get the area of the box. + inline float Box::area() const + { + const Vector3 d = extents(); + return 8.0f * (d.x*d.y + d.x*d.z + d.y*d.z); + } + + // Get the volume of the box. + inline float Box::volume() const + { + Vector3 d = extents(); + return 8.0f * (d.x * d.y * d.z); + } + + // Return true if the box contains the given point. + inline bool Box::contains(const Vector3 & p) const + { + return + minCorner.x < p.x && minCorner.y < p.y && minCorner.z < p.z && + maxCorner.x > p.x && maxCorner.y > p.y && maxCorner.z > p.z; + } + + // Split the given box in 8 octants and assign the ith one to this box. + inline void Box::setOctant(const Box & box, const Vector3 & center, int i) + { + minCorner = box.minCorner; + maxCorner = box.maxCorner; + + if (i & 4) minCorner.x = center.x; + else maxCorner.x = center.x; + if (i & 2) minCorner.y = center.y; + else maxCorner.y = center.y; + if (i & 1) minCorner.z = center.z; + else maxCorner.z = center.z; + } + +} // nv namespace + + +#endif // NV_MATH_BOX_INL Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/CMakeLists.txt @@ -1,17 +1,17 @@ PROJECT(nvmath) SET(MATH_SRCS - nvmath.h - Vector.h - Matrix.h - Quaternion.h - Box.h - Color.h - Montecarlo.h Montecarlo.cpp - Random.h Random.cpp - SphericalHarmonic.h SphericalHarmonic.cpp - Basis.h Basis.cpp - Triangle.h Triangle.cpp TriBox.cpp) + nvmath.h + Box.h Box.inl + Color.h Color.inl + Fitting.h Fitting.cpp + Gamma.h Gamma.cpp + Half.h Half.cpp + Matrix.h + Plane.h Plane.inl Plane.cpp + SphericalHarmonic.h SphericalHarmonic.cpp + SimdVector.h SimdVector_SSE.h SimdVector_VE.h + Vector.h Vector.inl) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) @@ -19,15 +19,15 @@ ADD_DEFINITIONS(-DNVMATH_EXPORTS) IF(NVMATH_SHARED) - ADD_DEFINITIONS(-DNVMATH_SHARED=1) - ADD_LIBRARY(nvmath SHARED ${MATH_SRCS}) + ADD_DEFINITIONS(-DNVMATH_SHARED=1) + ADD_LIBRARY(nvmath SHARED ${MATH_SRCS}) ELSE(NVMATH_SHARED) - ADD_LIBRARY(nvmath ${MATH_SRCS}) + ADD_LIBRARY(nvmath ${MATH_SRCS}) ENDIF(NVMATH_SHARED) TARGET_LINK_LIBRARIES(nvmath ${LIBS} nvcore) INSTALL(TARGETS nvmath - RUNTIME DESTINATION ${BINDIR} - LIBRARY DESTINATION ${LIBDIR} - ARCHIVE DESTINATION ${LIBDIR}) + RUNTIME DESTINATION ${BINDIR} + LIBRARY DESTINATION ${LIBDIR} + ARCHIVE DESTINATION ${LIBDIR}) Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Color.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Color.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Color.h @@ -1,178 +1,149 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_MATH_COLOR_H #define NV_MATH_COLOR_H -#include -#include +#include "nvmath.h" namespace nv { -/// 64 bit color stored as BGRA. -class NVMATH_CLASS Color64 -{ -public: - Color64() { } - Color64(const Color64 & c) : u(c.u) { } - Color64(uint16 R, uint16 G, uint16 B, uint16 A) { setRGBA(R, G, B, A); } - explicit Color64(uint64 U) : u(U) { } - - void setRGBA(uint16 R, uint16 G, uint16 B, uint16 A) - { - r = R; - g = G; - b = B; - a = A; - } - - operator uint64 () const { - return u; - } + /// 64 bit color stored as BGRA. + class NVMATH_CLASS Color64 + { + public: + Color64() { } + Color64(const Color64 & c) : u(c.u) { } + Color64(uint16 R, uint16 G, uint16 B, uint16 A) { setRGBA(R, G, B, A); } + explicit Color64(uint64 U) : u(U) { } + + void setRGBA(uint16 R, uint16 G, uint16 B, uint16 A) + { + r = R; + g = G; + b = B; + a = A; + } + + operator uint64 () const { + return u; + } - union { - struct { + union { + struct { #if NV_LITTLE_ENDIAN - uint16 r, a, b, g; + uint16 r, a, b, g; #else - uint16 a: 16; - uint16 r: 16; - uint16 g: 16; - uint16 b: 16; + uint16 a: 16; + uint16 r: 16; + uint16 g: 16; + uint16 b: 16; #endif - }; - uint64 u; - }; -}; + }; + uint64 u; + }; + }; + + /// 32 bit color stored as BGRA. + class NVMATH_CLASS Color32 + { + public: + Color32() { } + Color32(const Color32 & c) : u(c.u) { } + Color32(uint8 R, uint8 G, uint8 B) { setRGBA(R, G, B, 0xFF); } + Color32(uint8 R, uint8 G, uint8 B, uint8 A) { setRGBA( R, G, B, A); } + //Color32(uint8 c[4]) { setRGBA(c[0], c[1], c[2], c[3]); } + //Color32(float R, float G, float B) { setRGBA(uint(R*255), uint(G*255), uint(B*255), 0xFF); } + //Color32(float R, float G, float B, float A) { setRGBA(uint(R*255), uint(G*255), uint(B*255), uint(A*255)); } + explicit Color32(uint32 U) : u(U) { } + + void setRGBA(uint8 R, uint8 G, uint8 B, uint8 A) + { + r = R; + g = G; + b = B; + a = A; + } + + void setBGRA(uint8 B, uint8 G, uint8 R, uint8 A = 0xFF) + { + r = R; + g = G; + b = B; + a = A; + } + + operator uint32 () const { + return u; + } -/// 32 bit color stored as BGRA. -class NVMATH_CLASS Color32 -{ -public: - Color32() { } - Color32(const Color32 & c) : u(c.u) { } - Color32(uint8 R, uint8 G, uint8 B) { setRGBA(R, G, B, 0xFF); } - Color32(uint8 R, uint8 G, uint8 B, uint8 A) { setRGBA( R, G, B, A); } - //Color32(uint8 c[4]) { setRGBA(c[0], c[1], c[2], c[3]); } - //Color32(float R, float G, float B) { setRGBA(uint(R*255), uint(G*255), uint(B*255), 0xFF); } - //Color32(float R, float G, float B, float A) { setRGBA(uint(R*255), uint(G*255), uint(B*255), uint(A*255)); } - explicit Color32(uint32 U) : u(U) { } - - void setRGBA(uint8 R, uint8 G, uint8 B, uint8 A) - { - r = R; - g = G; - b = B; - a = A; - } - - void setBGRA(uint8 B, uint8 G, uint8 R, uint8 A = 0xFF) - { - r = R; - g = G; - b = B; - a = A; - } - - operator uint32 () const { - return u; - } - - union { - struct { + union { + struct { #if NV_LITTLE_ENDIAN - uint8 b, g, r, a; + uint8 b, g, r, a; #else - uint8 a: 8; - uint8 r: 8; - uint8 g: 8; - uint8 b: 8; + uint8 a: 8; + uint8 r: 8; + uint8 g: 8; + uint8 b: 8; #endif - }; - uint32 u; - }; -}; - + }; + uint8 component[4]; + uint32 u; + }; + }; + + + /// 16 bit 565 BGR color. + class NVMATH_CLASS Color16 + { + public: + Color16() { } + Color16(const Color16 & c) : u(c.u) { } + explicit Color16(uint16 U) : u(U) { } -/// 16 bit 565 BGR color. -class NVMATH_CLASS Color16 -{ -public: - Color16() { } - Color16(const Color16 & c) : u(c.u) { } - explicit Color16(uint16 U) : u(U) { } - - union { - struct { + union { + struct { #if NV_LITTLE_ENDIAN - uint16 b : 5; - uint16 g : 6; - uint16 r : 5; + uint16 b : 5; + uint16 g : 6; + uint16 r : 5; #else - uint16 r : 5; - uint16 g : 6; - uint16 b : 5; + uint16 r : 5; + uint16 g : 6; + uint16 b : 5; #endif - }; - uint16 u; - }; -}; - - -/// Clamp color components. -inline Vector3 colorClamp(Vector3::Arg c) -{ - return Vector3(clamp(c.x(), 0.0f, 1.0f), clamp(c.y(), 0.0f, 1.0f), clamp(c.z(), 0.0f, 1.0f)); -} - -/// Clamp without allowing the hue to change. -inline Vector3 colorNormalize(Vector3::Arg c) -{ - float scale = 1.0f; - if (c.x() > scale) scale = c.x(); - if (c.y() > scale) scale = c.y(); - if (c.z() > scale) scale = c.z(); - return c / scale; -} + }; + uint16 u; + }; + }; + + /// 16 bit 4444 BGRA color. + class NVMATH_CLASS Color16_4444 + { + public: + Color16_4444() { } + Color16_4444(const Color16_4444 & c) : u(c.u) { } + explicit Color16_4444(uint16 U) : u(U) { } -/// Convert Color32 to Color16. -inline Color16 toColor16(Color32 c) -{ - Color16 color; - // rrrrrggggggbbbbb - // rrrrr000gggggg00bbbbb000 -// color.u = (c.u >> 3) & 0x1F; -// color.u |= (c.u >> 5) & 0x7E0; -// color.u |= (c.u >> 8) & 0xF800; - - color.r = c.r >> 3; - color.g = c.g >> 2; - color.b = c.b >> 3; - return color; -} - - -/// Promote 16 bit color to 32 bit using regular bit expansion. -inline Color32 toColor32(Color16 c) -{ - Color32 color; -// c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000); -// c.u |= (c.u >> 5) & 0x070007; -// c.u |= (c.u >> 6) & 0x000300; - - color.b = (c.b << 3) | (c.b >> 2); - color.g = (c.g << 2) | (c.g >> 4); - color.r = (c.r << 3) | (c.r >> 2); - color.a = 0xFF; - - return color; -} - -inline Vector4 toVector4(Color32 c) -{ - const float scale = 1.0f / 255.0f; - return Vector4(c.r * scale, c.g * scale, c.b * scale, c.a * scale); -} + union { + struct { +#if NV_LITTLE_ENDIAN + uint16 b : 4; + uint16 g : 4; + uint16 r : 4; + uint16 a : 4; +#else + uint16 a : 4; + uint16 r : 4; + uint16 g : 4; + uint16 b : 4; +#endif + }; + uint16 u; + }; + }; } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Color.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Color.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Color.cpp @@ -0,0 +1,4 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Color.h" +#include "Color.inl" Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Color.inl =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Color.inl +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Color.inl @@ -0,0 +1,203 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_COLOR_INL +#define NV_MATH_COLOR_INL + +#include "Color.h" +#include "Vector.inl" +#include "ftoi.h" + + +namespace nv +{ + // for Color16 & Color16_4444 bitfields + NV_FORCEINLINE uint32 U32round(float f) { return uint32(floorf(f + 0.5f)); } + NV_FORCEINLINE uint16 U16round(float f) { return uint16(floorf(f + 0.5f)); } + NV_FORCEINLINE uint16 toU4_in_U16(int x) { nvDebugCheck(x >= 0 && x <= 15u); return (uint16)x; } + NV_FORCEINLINE uint16 toU5_in_U16(int x) { nvDebugCheck(x >= 0 && x <= 31u); return (uint16)x; } + NV_FORCEINLINE uint16 toU6_in_U16(int x) { nvDebugCheck(x >= 0 && x <= 63u); return (uint16)x; } + + // Clamp color components. + inline Vector3 colorClamp(Vector3::Arg c) + { + return Vector3(saturate(c.x), saturate(c.y), saturate(c.z)); + } + + // Clamp without allowing the hue to change. + inline Vector3 colorNormalize(Vector3::Arg c) + { + float scale = 1.0f; + if (c.x > scale) scale = c.x; + if (c.y > scale) scale = c.y; + if (c.z > scale) scale = c.z; + return c / scale; + } + + // Convert Color16 from float components + inline Color16 toColor16(float r, float g, float b) + { + Color16 color; // 5,6,5 + color.r = toU5_in_U16(nv::U16round(saturate(r) * 31u)); + color.g = toU6_in_U16(nv::U16round(saturate(g) * 63u)); + color.b = toU5_in_U16(nv::U16round(saturate(b) * 31u)); + return color; + } + + // Convert Color32 to Color16. + inline Color16 toColor16(Color32 c) + { + Color16 color; + // rrrrrggggggbbbbb + // rrrrr000gggggg00bbbbb000 + // color.u = (c.u >> 3) & 0x1F; + // color.u |= (c.u >> 5) & 0x7E0; + // color.u |= (c.u >> 8) & 0xF800; + + color.r = c.r >> 3; + color.g = c.g >> 2; + color.b = c.b >> 3; + return color; + } + + // Convert Color32 to Color16_4444. + inline Color16_4444 toColor16_4444(Color32 c) + { + Color16_4444 color; + color.a = c.a >> 4; + color.r = c.r >> 4; + color.g = c.g >> 4; + color.b = c.b >> 4; + return color; + } + + // Convert float[4] to Color16_4444. + inline Color16_4444 toColor16_4444(float r, float g, float b, float a) + { + Color16_4444 color; + color.a = toU4_in_U16(nv::U16round(saturate(a) * 15u)); + color.r = toU4_in_U16(nv::U16round(saturate(r) * 15u)); + color.g = toU4_in_U16(nv::U16round(saturate(g) * 15u)); + color.b = toU4_in_U16(nv::U16round(saturate(b) * 15u)); + return color; + } + + // Convert float[4] to Color16_4444. + inline Color16_4444 toColor16_4444_from_argb(float * fc) + { + Color16_4444 color; + color.a = toU4_in_U16(nv::U16round(saturate(fc[0]) * 15u)); + color.r = toU4_in_U16(nv::U16round(saturate(fc[1]) * 15u)); + color.g = toU4_in_U16(nv::U16round(saturate(fc[2]) * 15u)); + color.b = toU4_in_U16(nv::U16round(saturate(fc[3]) * 15u)); + return color; + } + + // Convert float[4] to Color16_4444. + inline Color16_4444 toColor16_4444_from_bgra(float * fc) + { + Color16_4444 color; + color.b = toU4_in_U16(nv::U16round(saturate(fc[0]) * 15u)); + color.g = toU4_in_U16(nv::U16round(saturate(fc[1]) * 15u)); + color.r = toU4_in_U16(nv::U16round(saturate(fc[2]) * 15u)); + color.a = toU4_in_U16(nv::U16round(saturate(fc[3]) * 15u)); + return color; + } + + // Promote 16 bit color to 32 bit using regular bit expansion. + inline Color32 toColor32(Color16 c) + { + Color32 color; + // c.u = ((col0.u << 3) & 0xf8) | ((col0.u << 5) & 0xfc00) | ((col0.u << 8) & 0xf80000); + // c.u |= (c.u >> 5) & 0x070007; + // c.u |= (c.u >> 6) & 0x000300; + + color.b = (c.b << 3) | (c.b >> 2); + color.g = (c.g << 2) | (c.g >> 4); + color.r = (c.r << 3) | (c.r >> 2); + color.a = 0xFF; + + return color; + } + + // @@ Quantize with exact endpoints or with uniform bins? + inline Color32 toColor32(const Vector4 & v) + { + Color32 color; + color.r = U8(ftoi_round(saturate(v.x) * 255)); + color.g = U8(ftoi_round(saturate(v.y) * 255)); + color.b = U8(ftoi_round(saturate(v.z) * 255)); + color.a = U8(ftoi_round(saturate(v.w) * 255)); + return color; + } + + inline Color32 toColor32_from_bgra(const Vector4 & v) + { + Color32 color; + color.b = U8(ftoi_round(saturate(v.x) * 255)); + color.g = U8(ftoi_round(saturate(v.y) * 255)); + color.r = U8(ftoi_round(saturate(v.z) * 255)); + color.a = U8(ftoi_round(saturate(v.w) * 255)); + return color; + } + + inline Color32 toColor32_from_argb(const Vector4 & v) + { + Color32 color; + color.a = U8(ftoi_round(saturate(v.x) * 255)); + color.r = U8(ftoi_round(saturate(v.y) * 255)); + color.g = U8(ftoi_round(saturate(v.z) * 255)); + color.b = U8(ftoi_round(saturate(v.w) * 255)); + return color; + } + + inline Vector4 toVector4(Color32 c) + { + const float scale = 1.0f / 255.0f; + return Vector4(c.r * scale, c.g * scale, c.b * scale, c.a * scale); + } + + + inline float perceptualColorDistance(Vector3::Arg c0, Vector3::Arg c1) + { + float rmean = (c0.x + c1.x) * 0.5f; + float r = c1.x - c0.x; + float g = c1.y - c0.y; + float b = c1.z - c0.z; + return sqrtf((2 + rmean)*r*r + 4*g*g + (3 - rmean)*b*b); + } + + + inline float hue(float r, float g, float b) { + float h = atan2f(sqrtf(3.0f)*(g-b), 2*r-g-b) * (1.0f / (2 * PI)) + 0.5f; + return h; + } + + inline float toSrgb(float f) { + if (nv::isNan(f)) f = 0.0f; + else if (f <= 0.0f) f = 0.0f; + else if (f <= 0.0031308f) f = 12.92f * f; + else if (f <= 1.0f) f = (powf(f, 0.41666f) * 1.055f) - 0.055f; + else f = 1.0f; + return f; + } + + inline float fromSrgb(float f) { + if (f < 0.0f) f = 0.0f; + else if (f < 0.04045f) f = f / 12.92f; + else if (f <= 1.0f) f = powf((f + 0.055f) / 1.055f, 2.4f); + else f = 1.0f; + return f; + } + + inline Vector3 toSrgb(const Vector3 & v) { + return Vector3(toSrgb(v.x), toSrgb(v.y), toSrgb(v.z)); + } + + inline Vector3 fromSrgb(const Vector3 & v) { + return Vector3(fromSrgb(v.x), fromSrgb(v.y), fromSrgb(v.z)); + } + +} // nv namespace + +#endif // NV_MATH_COLOR_INL Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Fitting.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Fitting.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Fitting.h @@ -0,0 +1,50 @@ +// This code is in the public domain -- Ignacio Castaño + +#pragma once +#ifndef NV_MATH_FITTING_H +#define NV_MATH_FITTING_H + +#include "Vector.h" +#include "Plane.h" + +namespace nv +{ + namespace Fit + { + Vector3 computeCentroid(int n, const Vector3 * points); + Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + + Vector4 computeCentroid(int n, const Vector4 * points); + Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric); + + Vector3 computeCovariance(int n, const Vector3 * points, float * covariance); + Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance); + + Vector4 computeCovariance(int n, const Vector4 * points, float * covariance); + Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance); + + NVMATH_API Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points); + NVMATH_API Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + + NVMATH_API Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); + NVMATH_API Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + + NVMATH_API Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points); + NVMATH_API Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric); + + Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points); + Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points); + + Plane bestPlane(int n, const Vector3 * points); + bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON); + + bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]); + bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]); + + // Returns number of clusters [1-4]. + int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster); + } + +} // nv namespace + +#endif // NV_MATH_FITTING_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Fitting.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Fitting.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Fitting.cpp @@ -0,0 +1,1205 @@ +// This code is in the public domain -- Ignacio Castaño + +#include "Fitting.h" +#include "Vector.inl" +#include "Plane.inl" + +#include "nvcore/Array.inl" +#include "nvcore/Utils.h" // max, swap + +#include // FLT_MAX +//#include +#include + +using namespace nv; + +// @@ Move to EigenSolver.h + +// @@ We should be able to do something cheaper... +static Vector3 estimatePrincipalComponent(const float * __restrict matrix) +{ + const Vector3 row0(matrix[0], matrix[1], matrix[2]); + const Vector3 row1(matrix[1], matrix[3], matrix[4]); + const Vector3 row2(matrix[2], matrix[4], matrix[5]); + + float r0 = lengthSquared(row0); + float r1 = lengthSquared(row1); + float r2 = lengthSquared(row2); + + if (r0 > r1 && r0 > r2) return row0; + if (r1 > r2) return row1; + return row2; +} + + +static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix) +{ + if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) + { + return Vector3(0.0f); + } + + Vector3 v = estimatePrincipalComponent(matrix); + + const int NUM = 8; + for (int i = 0; i < NUM; i++) + { + float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2]; + float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4]; + float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5]; + + float norm = max(max(x, y), z); + + v = Vector3(x, y, z) / norm; + } + + return v; +} + + +Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points) +{ + Vector3 centroid(0.0f); + + for (int i = 0; i < n; i++) + { + centroid += points[i]; + } + centroid /= float(n); + + return centroid; +} + +Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) +{ + Vector3 centroid(0.0f); + float total = 0.0f; + + for (int i = 0; i < n; i++) + { + total += weights[i]; + centroid += weights[i]*points[i]; + } + centroid /= total; + + return centroid; +} + +Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points) +{ + Vector4 centroid(0.0f); + + for (int i = 0; i < n; i++) + { + centroid += points[i]; + } + centroid /= float(n); + + return centroid; +} + +Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric) +{ + Vector4 centroid(0.0f); + float total = 0.0f; + + for (int i = 0; i < n; i++) + { + total += weights[i]; + centroid += weights[i]*points[i]; + } + centroid /= total; + + return centroid; +} + + + +Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance) +{ + // compute the centroid + Vector3 centroid = computeCentroid(n, points); + + // compute covariance matrix + for (int i = 0; i < 6; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector3 v = points[i] - centroid; + + covariance[0] += v.x * v.x; + covariance[1] += v.x * v.y; + covariance[2] += v.x * v.z; + covariance[3] += v.y * v.y; + covariance[4] += v.y * v.z; + covariance[5] += v.z * v.z; + } + + return centroid; +} + +Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance) +{ + // compute the centroid + Vector3 centroid = computeCentroid(n, points, weights, metric); + + // compute covariance matrix + for (int i = 0; i < 6; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector3 a = (points[i] - centroid) * metric; + Vector3 b = weights[i]*a; + + covariance[0] += a.x * b.x; + covariance[1] += a.x * b.y; + covariance[2] += a.x * b.z; + covariance[3] += a.y * b.y; + covariance[4] += a.y * b.z; + covariance[5] += a.z * b.z; + } + + return centroid; +} + +Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, float *__restrict covariance) +{ + // compute the centroid + Vector4 centroid = computeCentroid(n, points); + + // compute covariance matrix + for (int i = 0; i < 10; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector4 v = points[i] - centroid; + + covariance[0] += v.x * v.x; + covariance[1] += v.x * v.y; + covariance[2] += v.x * v.z; + covariance[3] += v.x * v.w; + + covariance[4] += v.y * v.y; + covariance[5] += v.y * v.z; + covariance[6] += v.y * v.w; + + covariance[7] += v.z * v.z; + covariance[8] += v.z * v.w; + + covariance[9] += v.w * v.w; + } + + return centroid; +} + +Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric, float *__restrict covariance) +{ + // compute the centroid + Vector4 centroid = computeCentroid(n, points, weights, metric); + + // compute covariance matrix + for (int i = 0; i < 10; i++) + { + covariance[i] = 0.0f; + } + + for (int i = 0; i < n; i++) + { + Vector4 a = (points[i] - centroid) * metric; + Vector4 b = weights[i]*a; + + covariance[0] += a.x * b.x; + covariance[1] += a.x * b.y; + covariance[2] += a.x * b.z; + covariance[3] += a.x * b.w; + + covariance[4] += a.y * b.y; + covariance[5] += a.y * b.z; + covariance[6] += a.y * b.w; + + covariance[7] += a.z * b.z; + covariance[8] += a.z * b.w; + + covariance[9] += a.w * b.w; + } + + return centroid; +} + + + +Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points) +{ + float matrix[6]; + computeCovariance(n, points, matrix); + + return firstEigenVector_PowerMethod(matrix); +} + +Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) +{ + float matrix[6]; + computeCovariance(n, points, weights, metric, matrix); + + return firstEigenVector_PowerMethod(matrix); +} + + + +static inline Vector3 firstEigenVector_EigenSolver3(const float *__restrict matrix) +{ + if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) + { + return Vector3(0.0f); + } + + float eigenValues[3]; + Vector3 eigenVectors[3]; + if (!nv::Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) + { + return Vector3(0.0f); + } + + return eigenVectors[0]; +} + +Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points) +{ + float matrix[6]; + computeCovariance(n, points, matrix); + + return firstEigenVector_EigenSolver3(matrix); +} + +Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric) +{ + float matrix[6]; + computeCovariance(n, points, weights, metric, matrix); + + return firstEigenVector_EigenSolver3(matrix); +} + + + +static inline Vector4 firstEigenVector_EigenSolver4(const float *__restrict matrix) +{ + if (matrix[0] == 0 && matrix[4] == 0 && matrix[7] == 0&& matrix[9] == 0) + { + return Vector4(0.0f); + } + + float eigenValues[4]; + Vector4 eigenVectors[4]; + if (!nv::Fit::eigenSolveSymmetric4(matrix, eigenValues, eigenVectors)) + { + return Vector4(0.0f); + } + + return eigenVectors[0]; +} + +Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points) +{ + float matrix[10]; + computeCovariance(n, points, matrix); + + return firstEigenVector_EigenSolver4(matrix); +} + +Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric) +{ + float matrix[10]; + computeCovariance(n, points, weights, metric, matrix); + + return firstEigenVector_EigenSolver4(matrix); +} + + + +void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R); + +Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points) +{ + // Store the points in an n x n matrix + Array Q; Q.resize(n*n, 0.0f); + for (int i = 0; i < n; ++i) + { + Q[i*n+0] = points[i].x; + Q[i*n+1] = points[i].y; + Q[i*n+2] = points[i].z; + } + + // Alloc space for the SVD outputs + Array diag; diag.resize(n, 0.0f); + Array R; R.resize(n*n, 0.0f); + + ArvoSVD(n, n, &Q[0], &diag[0], &R[0]); + + // Get the principal component + return Vector3(R[0], R[1], R[2]); +} + +Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points) +{ + // Store the points in an n x n matrix + Array Q; Q.resize(n*n, 0.0f); + for (int i = 0; i < n; ++i) + { + Q[i*n+0] = points[i].x; + Q[i*n+1] = points[i].y; + Q[i*n+2] = points[i].z; + Q[i*n+3] = points[i].w; + } + + // Alloc space for the SVD outputs + Array diag; diag.resize(n, 0.0f); + Array R; R.resize(n*n, 0.0f); + + ArvoSVD(n, n, &Q[0], &diag[0], &R[0]); + + // Get the principal component + return Vector4(R[0], R[1], R[2], R[3]); +} + + + +Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points) +{ + // compute the centroid and covariance + float matrix[6]; + Vector3 centroid = computeCovariance(n, points, matrix); + + if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) + { + // If no plane defined, then return a horizontal plane. + return Plane(Vector3(0, 0, 1), centroid); + } + + float eigenValues[3]; + Vector3 eigenVectors[3]; + if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) { + // If no plane defined, then return a horizontal plane. + return Plane(Vector3(0, 0, 1), centroid); + } + + return Plane(eigenVectors[2], centroid); +} + +bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON*/) +{ + // compute the centroid and covariance + float matrix[6]; + computeCovariance(n, points, matrix); + + float eigenValues[3]; + Vector3 eigenVectors[3]; + if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) { + return false; + } + + return eigenValues[2] < epsilon; +} + + + +// Tridiagonal solver from Charles Bloom. +// Householder transforms followed by QL decomposition. +// Seems to be based on the code from Numerical Recipes in C. + +static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd); +static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd); + +bool nv::Fit::eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) +{ + nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL); + + float subd[3]; + float diag[3]; + float work[3][3]; + + work[0][0] = matrix[0]; + work[0][1] = work[1][0] = matrix[1]; + work[0][2] = work[2][0] = matrix[2]; + work[1][1] = matrix[3]; + work[1][2] = work[2][1] = matrix[4]; + work[2][2] = matrix[5]; + + EigenSolver3_Tridiagonal(work, diag, subd); + if (!EigenSolver3_QLAlgorithm(work, diag, subd)) + { + for (int i = 0; i < 3; i++) { + eigenValues[i] = 0; + eigenVectors[i] = Vector3(0); + } + return false; + } + + for (int i = 0; i < 3; i++) { + eigenValues[i] = (float)diag[i]; + } + + // eigenvectors are the columns; make them the rows : + + for (int i=0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + { + eigenVectors[j].component[i] = (float) work[i][j]; + } + } + + // shuffle to sort by singular value : + if (eigenValues[2] > eigenValues[0] && eigenValues[2] > eigenValues[1]) + { + swap(eigenValues[0], eigenValues[2]); + swap(eigenVectors[0], eigenVectors[2]); + } + if (eigenValues[1] > eigenValues[0]) + { + swap(eigenValues[0], eigenValues[1]); + swap(eigenVectors[0], eigenVectors[1]); + } + if (eigenValues[2] > eigenValues[1]) + { + swap(eigenValues[1], eigenValues[2]); + swap(eigenVectors[1], eigenVectors[2]); + } + + nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2]); + nvDebugCheck(eigenValues[1] >= eigenValues[2]); + + return true; +} + +static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd) +{ + // Householder reduction T = Q^t M Q + // Input: + // mat, symmetric 3x3 matrix M + // Output: + // mat, orthogonal matrix Q + // diag, diagonal entries of T + // subd, subdiagonal entries of T (T is symmetric) + const float epsilon = 1e-08f; + + float a = mat[0][0]; + float b = mat[0][1]; + float c = mat[0][2]; + float d = mat[1][1]; + float e = mat[1][2]; + float f = mat[2][2]; + + diag[0] = a; + subd[2] = 0.f; + if (fabsf(c) >= epsilon) + { + const float ell = sqrtf(b*b+c*c); + b /= ell; + c /= ell; + const float q = 2*b*e+c*(f-d); + diag[1] = d+c*q; + diag[2] = f-c*q; + subd[0] = ell; + subd[1] = e-b*q; + mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0; + mat[1][0] = 0; mat[1][1] = b; mat[1][2] = c; + mat[2][0] = 0; mat[2][1] = c; mat[2][2] = -b; + } + else + { + diag[1] = d; + diag[2] = f; + subd[0] = b; + subd[1] = e; + mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0; + mat[1][0] = 0; mat[1][1] = 1; mat[1][2] = 0; + mat[2][0] = 0; mat[2][1] = 0; mat[2][2] = 1; + } +} + +static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd) +{ + // QL iteration with implicit shifting to reduce matrix from tridiagonal + // to diagonal + const int maxiter = 32; + + for (int ell = 0; ell < 3; ell++) + { + int iter; + for (iter = 0; iter < maxiter; iter++) + { + int m; + for (m = ell; m <= 1; m++) + { + float dd = fabsf(diag[m]) + fabsf(diag[m+1]); + if ( fabsf(subd[m]) + dd == dd ) + break; + } + if ( m == ell ) + break; + + float g = (diag[ell+1]-diag[ell])/(2*subd[ell]); + float r = sqrtf(g*g+1); + if ( g < 0 ) + g = diag[m]-diag[ell]+subd[ell]/(g-r); + else + g = diag[m]-diag[ell]+subd[ell]/(g+r); + float s = 1, c = 1, p = 0; + for (int i = m-1; i >= ell; i--) + { + float f = s*subd[i], b = c*subd[i]; + if ( fabsf(f) >= fabsf(g) ) + { + c = g/f; + r = sqrtf(c*c+1); + subd[i+1] = f*r; + c *= (s = 1/r); + } + else + { + s = f/g; + r = sqrtf(s*s+1); + subd[i+1] = g*r; + s *= (c = 1/r); + } + g = diag[i+1]-p; + r = (diag[i]-g)*s+2*b*c; + p = s*r; + diag[i+1] = g+p; + g = c*r-b; + + for (int k = 0; k < 3; k++) + { + f = mat[k][i+1]; + mat[k][i+1] = s*mat[k][i]+c*f; + mat[k][i] = c*mat[k][i]-s*f; + } + } + diag[ell] -= p; + subd[ell] = g; + subd[m] = 0; + } + + if ( iter == maxiter ) + // should not get here under normal circumstances + return false; + } + + return true; +} + + + +// Tridiagonal solver for 4x4 symmetric matrices. + +static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd); +static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd); + +bool nv::Fit::eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]) +{ + nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL); + + float subd[4]; + float diag[4]; + float work[4][4]; + + work[0][0] = matrix[0]; + work[0][1] = work[1][0] = matrix[1]; + work[0][2] = work[2][0] = matrix[2]; + work[0][3] = work[3][0] = matrix[3]; + work[1][1] = matrix[4]; + work[1][2] = work[2][1] = matrix[5]; + work[1][3] = work[3][1] = matrix[6]; + work[2][2] = matrix[7]; + work[2][3] = work[3][2] = matrix[8]; + work[3][3] = matrix[9]; + + EigenSolver4_Tridiagonal(work, diag, subd); + if (!EigenSolver4_QLAlgorithm(work, diag, subd)) + { + for (int i = 0; i < 4; i++) { + eigenValues[i] = 0; + eigenVectors[i] = Vector4(0); + } + return false; + } + + for (int i = 0; i < 4; i++) { + eigenValues[i] = (float)diag[i]; + } + + // eigenvectors are the columns; make them the rows + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + eigenVectors[j].component[i] = (float) work[i][j]; + } + } + + // sort by singular value + + for (int i = 0; i < 3; ++i) + { + for (int j = i+1; j < 4; ++j) + { + if (eigenValues[j] > eigenValues[i]) + { + swap(eigenValues[i], eigenValues[j]); + swap(eigenVectors[i], eigenVectors[j]); + } + } + } + + nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2] && eigenValues[0] >= eigenValues[3]); + nvDebugCheck(eigenValues[1] >= eigenValues[2] && eigenValues[1] >= eigenValues[3]); + nvDebugCheck(eigenValues[2] >= eigenValues[2]); + + return true; +} + +#include "nvmath/Matrix.inl" + +inline float signNonzero(float x) +{ + return (x >= 0.0f) ? 1.0f : -1.0f; +} + +static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd) +{ + // Householder reduction T = Q^t M Q + // Input: + // mat, symmetric 3x3 matrix M + // Output: + // mat, orthogonal matrix Q + // diag, diagonal entries of T + // subd, subdiagonal entries of T (T is symmetric) + + static const int n = 4; + + // Set epsilon relative to size of elements in matrix + static const float relEpsilon = 1e-6f; + float maxElement = FLT_MAX; + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + maxElement = max(maxElement, fabsf(mat[i][j])); + float epsilon = relEpsilon * maxElement; + + // Iterative algorithm, works for any size of matrix but might be slower than + // a closed-form solution for symmetric 4x4 matrices. Based on this article: + // http://en.wikipedia.org/wiki/Householder_transformation#Tridiagonalization + + Matrix A, Q(identity); + memcpy(&A, mat, sizeof(float)*n*n); + + // We proceed from left to right, making the off-tridiagonal entries zero in + // one column of the matrix at a time. + for (int k = 0; k < n - 2; ++k) + { + float sum = 0.0f; + for (int j = k+1; j < n; ++j) + sum += A(j,k)*A(j,k); + float alpha = -signNonzero(A(k+1,k)) * sqrtf(sum); + float r = sqrtf(0.5f * (alpha*alpha - A(k+1,k)*alpha)); + + // If r is zero, skip this column - already in tridiagonal form + if (fabsf(r) < epsilon) + continue; + + float v[n] = {}; + v[k+1] = 0.5f * (A(k+1,k) - alpha) / r; + for (int j = k+2; j < n; ++j) + v[j] = 0.5f * A(j,k) / r; + + Matrix P(identity); + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + P(i,j) -= 2.0f * v[i] * v[j]; + + A = mul(mul(P, A), P); + Q = mul(Q, P); + } + + nvDebugCheck(fabsf(A(2,0)) < epsilon); + nvDebugCheck(fabsf(A(0,2)) < epsilon); + nvDebugCheck(fabsf(A(3,0)) < epsilon); + nvDebugCheck(fabsf(A(0,3)) < epsilon); + nvDebugCheck(fabsf(A(3,1)) < epsilon); + nvDebugCheck(fabsf(A(1,3)) < epsilon); + + for (int i = 0; i < n; ++i) + diag[i] = A(i,i); + for (int i = 0; i < n - 1; ++i) + subd[i] = A(i+1,i); + subd[n-1] = 0.0f; + + memcpy(mat, &Q, sizeof(float)*n*n); +} + +static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd) +{ + // QL iteration with implicit shifting to reduce matrix from tridiagonal + // to diagonal + const int maxiter = 32; + + for (int ell = 0; ell < 4; ell++) + { + int iter; + for (iter = 0; iter < maxiter; iter++) + { + int m; + for (m = ell; m < 3; m++) + { + float dd = fabsf(diag[m]) + fabsf(diag[m+1]); + if ( fabsf(subd[m]) + dd == dd ) + break; + } + if ( m == ell ) + break; + + float g = (diag[ell+1]-diag[ell])/(2*subd[ell]); + float r = sqrtf(g*g+1); + if ( g < 0 ) + g = diag[m]-diag[ell]+subd[ell]/(g-r); + else + g = diag[m]-diag[ell]+subd[ell]/(g+r); + float s = 1, c = 1, p = 0; + for (int i = m-1; i >= ell; i--) + { + float f = s*subd[i], b = c*subd[i]; + if ( fabsf(f) >= fabsf(g) ) + { + c = g/f; + r = sqrtf(c*c+1); + subd[i+1] = f*r; + c *= (s = 1/r); + } + else + { + s = f/g; + r = sqrtf(s*s+1); + subd[i+1] = g*r; + s *= (c = 1/r); + } + g = diag[i+1]-p; + r = (diag[i]-g)*s+2*b*c; + p = s*r; + diag[i+1] = g+p; + g = c*r-b; + + for (int k = 0; k < 4; k++) + { + f = mat[k][i+1]; + mat[k][i+1] = s*mat[k][i]+c*f; + mat[k][i] = c*mat[k][i]-s*f; + } + } + diag[ell] -= p; + subd[ell] = g; + subd[m] = 0; + } + + if ( iter == maxiter ) + // should not get here under normal circumstances + return false; + } + + return true; +} + + + +int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster) +{ + // Compute principal component. + float matrix[6]; + Vector3 centroid = computeCovariance(n, points, weights, metric, matrix); + Vector3 principal = firstEigenVector_PowerMethod(matrix); + + // Pick initial solution. + int mini, maxi; + mini = maxi = 0; + + float mindps, maxdps; + mindps = maxdps = dot(points[0] - centroid, principal); + + for (int i = 1; i < n; ++i) + { + float dps = dot(points[i] - centroid, principal); + + if (dps < mindps) { + mindps = dps; + mini = i; + } + else { + maxdps = dps; + maxi = i; + } + } + + cluster[0] = centroid + mindps * principal; + cluster[1] = centroid + maxdps * principal; + cluster[2] = (2.0f * cluster[0] + cluster[1]) / 3.0f; + cluster[3] = (2.0f * cluster[1] + cluster[0]) / 3.0f; + + // Now we have to iteratively refine the clusters. + while (true) + { + Vector3 newCluster[4] = { Vector3(0.0f), Vector3(0.0f), Vector3(0.0f), Vector3(0.0f) }; + float total[4] = {0, 0, 0, 0}; + + for (int i = 0; i < n; ++i) + { + // Find nearest cluster. + int nearest = 0; + float mindist = FLT_MAX; + for (int j = 0; j < 4; j++) + { + float dist = lengthSquared((cluster[j] - points[i]) * metric); + if (dist < mindist) + { + mindist = dist; + nearest = j; + } + } + + newCluster[nearest] += weights[i] * points[i]; + total[nearest] += weights[i]; + } + + for (int j = 0; j < 4; j++) + { + if (total[j] != 0) + newCluster[j] /= total[j]; + } + + if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) && + equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3])) + { + return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0); + } + + cluster[0] = newCluster[0]; + cluster[1] = newCluster[1]; + cluster[2] = newCluster[2]; + cluster[3] = newCluster[3]; + + // Sort clusters by weight. + for (int i = 0; i < 4; i++) + { + for (int j = i; j > 0 && total[j] > total[j - 1]; j--) + { + swap( total[j], total[j - 1] ); + swap( cluster[j], cluster[j - 1] ); + } + } + } +} + + + +// Adaptation of James Arvo's SVD code, as found in ZOH. + +inline float Sqr(float x) { return x*x; } + +inline float svd_pythag( float a, float b ) +{ + float at = fabsf(a); + float bt = fabsf(b); + if( at > bt ) + return at * sqrtf( 1.0f + Sqr( bt / at ) ); + else if( bt > 0.0f ) + return bt * sqrtf( 1.0f + Sqr( at / bt ) ); + else return 0.0f; +} + +inline float SameSign( float a, float b ) +{ + float t; + if( b >= 0.0f ) t = fabsf( a ); + else t = -fabsf( a ); + return t; +} + +void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R) +{ + static const int MaxIterations = 30; + + int i, j, k, l, p, q, iter; + float c, f, h, s, x, y, z; + float norm = 0.0f; + float g = 0.0f; + float scale = 0.0f; + + Array temp; temp.resize(cols, 0.0f); + + for( i = 0; i < cols; i++ ) + { + temp[i] = scale * g; + scale = 0.0f; + g = 0.0f; + s = 0.0f; + l = i + 1; + + if( i < rows ) + { + for( k = i; k < rows; k++ ) scale += fabsf( Q[k*cols+i] ); + if( scale != 0.0f ) + { + for( k = i; k < rows; k++ ) + { + Q[k*cols+i] /= scale; + s += Sqr( Q[k*cols+i] ); + } + f = Q[i*cols+i]; + g = -SameSign( sqrtf(s), f ); + h = f * g - s; + Q[i*cols+i] = f - g; + if( i != cols - 1 ) + { + for( j = l; j < cols; j++ ) + { + s = 0.0f; + for( k = i; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j]; + f = s / h; + for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i]; + } + } + for( k = i; k < rows; k++ ) Q[k*cols+i] *= scale; + } + } + + diag[i] = scale * g; + g = 0.0f; + s = 0.0f; + scale = 0.0f; + + if( i < rows && i != cols - 1 ) + { + for( k = l; k < cols; k++ ) scale += fabsf( Q[i*cols+k] ); + if( scale != 0.0f ) + { + for( k = l; k < cols; k++ ) + { + Q[i*cols+k] /= scale; + s += Sqr( Q[i*cols+k] ); + } + f = Q[i*cols+l]; + g = -SameSign( sqrtf(s), f ); + h = f * g - s; + Q[i*cols+l] = f - g; + for( k = l; k < cols; k++ ) temp[k] = Q[i*cols+k] / h; + if( i != rows - 1 ) + { + for( j = l; j < rows; j++ ) + { + s = 0.0f; + for( k = l; k < cols; k++ ) s += Q[j*cols+k] * Q[i*cols+k]; + for( k = l; k < cols; k++ ) Q[j*cols+k] += s * temp[k]; + } + } + for( k = l; k < cols; k++ ) Q[i*cols+k] *= scale; + } + } + norm = max( norm, fabsf( diag[i] ) + fabsf( temp[i] ) ); + } + + + for( i = cols - 1; i >= 0; i-- ) + { + if( i < cols - 1 ) + { + if( g != 0.0f ) + { + for( j = l; j < cols; j++ ) R[i*cols+j] = ( Q[i*cols+j] / Q[i*cols+l] ) / g; + for( j = l; j < cols; j++ ) + { + s = 0.0f; + for( k = l; k < cols; k++ ) s += Q[i*cols+k] * R[j*cols+k]; + for( k = l; k < cols; k++ ) R[j*cols+k] += s * R[i*cols+k]; + } + } + for( j = l; j < cols; j++ ) + { + R[i*cols+j] = 0.0f; + R[j*cols+i] = 0.0f; + } + } + R[i*cols+i] = 1.0f; + g = temp[i]; + l = i; + } + + + for( i = cols - 1; i >= 0; i-- ) + { + l = i + 1; + g = diag[i]; + if( i < cols - 1 ) for( j = l; j < cols; j++ ) Q[i*cols+j] = 0.0f; + if( g != 0.0f ) + { + g = 1.0f / g; + if( i != cols - 1 ) + { + for( j = l; j < cols; j++ ) + { + s = 0.0f; + for( k = l; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j]; + f = ( s / Q[i*cols+i] ) * g; + for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i]; + } + } + for( j = i; j < rows; j++ ) Q[j*cols+i] *= g; + } + else + { + for( j = i; j < rows; j++ ) Q[j*cols+i] = 0.0f; + } + Q[i*cols+i] += 1.0f; + } + + + for( k = cols - 1; k >= 0; k-- ) + { + for( iter = 1; iter <= MaxIterations; iter++ ) + { + int jump; + + for( l = k; l >= 0; l-- ) + { + q = l - 1; + if( fabsf( temp[l] ) + norm == norm ) { jump = 1; break; } + if( fabsf( diag[q] ) + norm == norm ) { jump = 0; break; } + } + + if( !jump ) + { + c = 0.0f; + s = 1.0f; + for( i = l; i <= k; i++ ) + { + f = s * temp[i]; + temp[i] *= c; + if( fabsf( f ) + norm == norm ) break; + g = diag[i]; + h = svd_pythag( f, g ); + diag[i] = h; + h = 1.0f / h; + c = g * h; + s = -f * h; + for( j = 0; j < rows; j++ ) + { + y = Q[j*cols+q]; + z = Q[j*cols+i]; + Q[j*cols+q] = y * c + z * s; + Q[j*cols+i] = z * c - y * s; + } + } + } + + z = diag[k]; + if( l == k ) + { + if( z < 0.0f ) + { + diag[k] = -z; + for( j = 0; j < cols; j++ ) R[k*cols+j] *= -1.0f; + } + break; + } + if( iter >= MaxIterations ) return; + x = diag[l]; + q = k - 1; + y = diag[q]; + g = temp[q]; + h = temp[k]; + f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0f * h * y ); + g = svd_pythag( f, 1.0f ); + f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x; + c = 1.0f; + s = 1.0f; + for( j = l; j <= q; j++ ) + { + i = j + 1; + g = temp[i]; + y = diag[i]; + h = s * g; + g = c * g; + z = svd_pythag( f, h ); + temp[j] = z; + c = f / z; + s = h / z; + f = x * c + g * s; + g = g * c - x * s; + h = y * s; + y = y * c; + for( p = 0; p < cols; p++ ) + { + x = R[j*cols+p]; + z = R[i*cols+p]; + R[j*cols+p] = x * c + z * s; + R[i*cols+p] = z * c - x * s; + } + z = svd_pythag( f, h ); + diag[j] = z; + if( z != 0.0f ) + { + z = 1.0f / z; + c = f * z; + s = h * z; + } + f = c * g + s * y; + x = c * y - s * g; + for( p = 0; p < rows; p++ ) + { + y = Q[p*cols+j]; + z = Q[p*cols+i]; + Q[p*cols+j] = y * c + z * s; + Q[p*cols+i] = z * c - y * s; + } + } + temp[l] = 0.0f; + temp[k] = f; + diag[k] = x; + } + } + + // Sort the singular values into descending order. + + for( i = 0; i < cols - 1; i++ ) + { + float biggest = diag[i]; // Biggest singular value so far. + int bindex = i; // The row/col it occurred in. + for( j = i + 1; j < cols; j++ ) + { + if( diag[j] > biggest ) + { + biggest = diag[j]; + bindex = j; + } + } + if( bindex != i ) // Need to swap rows and columns. + { + // Swap columns in Q. + for (int j = 0; j < rows; ++j) + swap(Q[j*cols+i], Q[j*cols+bindex]); + + // Swap rows in R. + for (int j = 0; j < rows; ++j) + swap(R[i*cols+j], R[bindex*cols+j]); + + // Swap elements in diag. + swap(diag[i], diag[bindex]); + } + } +} Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Gamma.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Gamma.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Gamma.h @@ -0,0 +1,38 @@ +// +// Fast implementations of powf(x,5/11) and powf(x,11/5) for gamma conversion +// Copyright 2017 Ken Cooke +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#pragma once +#ifndef NV_MATH_GAMMA_H +#define NV_MATH_GAMMA_H + +#include "nvmath.h" + +namespace nv { + + // gamma conversion of float array (in-place is allowed) + NVMATH_API void powf_5_11(const float* src, float* dst, int count); + NVMATH_API void powf_11_5(const float* src, float* dst, int count); + +} // nv namespace + +#endif // NV_MATH_GAMMA_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Gamma.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Gamma.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Gamma.cpp @@ -0,0 +1,444 @@ +// +// Fast implementations of powf(x,5/11) and powf(x,11/5) for gamma conversion +// Copyright 2017 Ken Cooke +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#include "Gamma.h" +#include +#include + +#define INFINITE_RESULT std::numeric_limits::infinity() + +// +// pow(2.0, e * 5/11.0) over e=[-127,128] +// +static const float pow_5_11_table[512] = { + // sign bit = 0 + 0.00000000e+00f, 5.74369237e-18f, 7.87087416e-18f, 1.07858603e-17f, + 1.47804139e-17f, 2.02543544e-17f, 2.77555756e-17f, 3.80348796e-17f, + 5.21211368e-17f, 7.14242467e-17f, 9.78762916e-17f, 1.34124875e-16f, + 1.83798156e-16f, 2.51867973e-16f, 3.45147530e-16f, 4.72973245e-16f, + 6.48139341e-16f, 8.88178420e-16f, 1.21711615e-15f, 1.66787638e-15f, + 2.28557589e-15f, 3.13204133e-15f, 4.29199599e-15f, 5.88154098e-15f, + 8.05977514e-15f, 1.10447209e-14f, 1.51351438e-14f, 2.07404589e-14f, + 2.84217094e-14f, 3.89477167e-14f, 5.33720441e-14f, 7.31384286e-14f, + 1.00225323e-13f, 1.37343872e-13f, 1.88209311e-13f, 2.57912805e-13f, + 3.53431070e-13f, 4.84324603e-13f, 6.63694685e-13f, 9.09494702e-13f, + 1.24632693e-12f, 1.70790541e-12f, 2.34042972e-12f, 3.20721032e-12f, + 4.39500389e-12f, 6.02269797e-12f, 8.25320975e-12f, 1.13097942e-11f, + 1.54983873e-11f, 2.12382299e-11f, 2.91038305e-11f, 3.98824619e-11f, + 5.46529731e-11f, 7.48937509e-11f, 1.02630730e-10f, 1.40640125e-10f, + 1.92726335e-10f, 2.64102712e-10f, 3.61913416e-10f, 4.95948393e-10f, + 6.79623358e-10f, 9.31322575e-10f, 1.27623878e-09f, 1.74889514e-09f, + 2.39660003e-09f, 3.28418337e-09f, 4.50048399e-09f, 6.16724272e-09f, + 8.45128678e-09f, 1.15812293e-08f, 1.58703486e-08f, 2.17479474e-08f, + 2.98023224e-08f, 4.08396410e-08f, 5.59646445e-08f, 7.66912009e-08f, + 1.05093868e-07f, 1.44015488e-07f, 1.97351767e-07f, 2.70441177e-07f, + 3.70599338e-07f, 5.07851155e-07f, 6.95934318e-07f, 9.53674316e-07f, + 1.30686851e-06f, 1.79086862e-06f, 2.45411843e-06f, 3.36300377e-06f, + 4.60849560e-06f, 6.31525654e-06f, 8.65411766e-06f, 1.18591788e-05f, + 1.62512370e-05f, 2.22698982e-05f, 3.05175781e-05f, 4.18197924e-05f, + 5.73077959e-05f, 7.85317898e-05f, 1.07616121e-04f, 1.47471859e-04f, + 2.02088209e-04f, 2.76931765e-04f, 3.79493722e-04f, 5.20039583e-04f, + 7.12636742e-04f, 9.76562500e-04f, 1.33823336e-03f, 1.83384947e-03f, + 2.51301727e-03f, 3.44371586e-03f, 4.71909950e-03f, 6.46682270e-03f, + 8.86181649e-03f, 1.21437991e-02f, 1.66412666e-02f, 2.28043757e-02f, + 3.12500000e-02f, 4.28234674e-02f, 5.86831830e-02f, 8.04165527e-02f, + 1.10198908e-01f, 1.51011184e-01f, 2.06938326e-01f, 2.83578128e-01f, + 3.88601571e-01f, 5.32520533e-01f, 7.29740024e-01f, 1.00000000e+00f, + 1.37035096e+00f, 1.87786186e+00f, 2.57332969e+00f, 3.52636504e+00f, + 4.83235788e+00f, 6.62202644e+00f, 9.07450008e+00f, 1.24352503e+01f, + 1.70406570e+01f, 2.33516808e+01f, 3.20000000e+01f, 4.38512306e+01f, + 6.00915794e+01f, 8.23465500e+01f, 1.12843681e+02f, 1.54635452e+02f, + 2.11904846e+02f, 2.90384003e+02f, 3.97928009e+02f, 5.45301025e+02f, + 7.47253784e+02f, 1.02400000e+03f, 1.40323938e+03f, 1.92293054e+03f, + 2.63508960e+03f, 3.61099780e+03f, 4.94833447e+03f, 6.78095508e+03f, + 9.29228809e+03f, 1.27336963e+04f, 1.74496328e+04f, 2.39121211e+04f, + 3.27680000e+04f, 4.49036602e+04f, 6.15337773e+04f, 8.43228672e+04f, + 1.15551930e+05f, 1.58346703e+05f, 2.16990563e+05f, 2.97353219e+05f, + 4.07478281e+05f, 5.58388250e+05f, 7.65187875e+05f, 1.04857600e+06f, + 1.43691713e+06f, 1.96908088e+06f, 2.69833175e+06f, 3.69766175e+06f, + 5.06709450e+06f, 6.94369800e+06f, 9.51530300e+06f, 1.30393050e+07f, + 1.78684240e+07f, 2.44860120e+07f, 3.35544320e+07f, 4.59813480e+07f, + 6.30105880e+07f, 8.63466160e+07f, 1.18325176e+08f, 1.62147024e+08f, + 2.22198336e+08f, 3.04489696e+08f, 4.17257760e+08f, 5.71789568e+08f, + 7.83552384e+08f, 1.07374182e+09f, 1.47140314e+09f, 2.01633882e+09f, + 2.76309171e+09f, 3.78640563e+09f, 5.18870477e+09f, 7.11034675e+09f, + 9.74367027e+09f, 1.33522483e+10f, 1.82972662e+10f, 2.50736763e+10f, + 3.43597384e+10f, 4.70849004e+10f, 6.45228421e+10f, 8.84189348e+10f, + 1.21164980e+11f, 1.66038553e+11f, 2.27531096e+11f, 3.11797449e+11f, + 4.27271946e+11f, 5.85512518e+11f, 8.02357641e+11f, 1.09951163e+12f, + 1.50671681e+12f, 2.06473095e+12f, 2.82940591e+12f, 3.87727937e+12f, + 5.31323368e+12f, 7.28099507e+12f, 9.97751836e+12f, 1.36727023e+13f, + 1.87364006e+13f, 2.56754445e+13f, 3.51843721e+13f, 4.82149380e+13f, + 6.60713903e+13f, 9.05409892e+13f, 1.24072940e+14f, 1.70023478e+14f, + 2.32991842e+14f, 3.19280587e+14f, 4.37526473e+14f, 5.99564818e+14f, + 8.21614225e+14f, 1.12589991e+15f, 1.54287801e+15f, 2.11428449e+15f, + 2.89731166e+15f, 3.97033407e+15f, 5.44075129e+15f, 7.45573896e+15f, + 1.02169788e+16f, 1.40008471e+16f, 1.91860742e+16f, 2.62916552e+16f, + 3.60287970e+16f, 4.93720965e+16f, 6.76571037e+16f, 9.27139730e+16f, + 1.27050690e+17f, 1.74104041e+17f, 2.38583647e+17f, INFINITE_RESULT, + // sign bit = 1 + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, +}; + +// +// pow(2.0, e * 11/5.0) over e=[-127,128] +// +static const float pow_11_5_table[512] = { + // sign bit = 0 + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 1.40129846e-45f, + 4.20389539e-45f, 1.96181785e-44f, 8.96831017e-44f, 4.11981749e-43f, + 1.89315423e-42f, 8.69926087e-42f, 3.99734400e-41f, 1.83670992e-40f, + 8.43930599e-40f, 3.87768572e-39f, 1.78171625e-38f, 8.18661824e-38f, + 3.76158192e-37f, 1.72836915e-36f, 7.94149964e-36f, 3.64895487e-35f, + 1.67661942e-34f, 7.70371978e-34f, 3.53970002e-33f, 1.62641913e-32f, + 7.47305957e-32f, 3.43371656e-31f, 1.57772181e-30f, 7.24930563e-30f, + 3.33090637e-29f, 1.53048260e-28f, 7.03225152e-28f, 3.23117427e-27f, + 1.48465779e-26f, 6.82169625e-26f, 3.13442837e-25f, 1.44020511e-24f, + 6.61744490e-24f, 3.04057916e-23f, 1.39708339e-22f, 6.41930929e-22f, + 2.94954007e-21f, 1.35525272e-20f, 6.22710612e-20f, 2.86122679e-19f, + 1.31467454e-18f, 6.04065806e-18f, 2.77555756e-17f, 1.27531133e-16f, + 5.85979246e-16f, 2.69245347e-15f, 1.23712677e-14f, 5.68434189e-14f, + 2.61183761e-13f, 1.20008550e-12f, 5.51414470e-12f, 2.53363563e-11f, + 1.16415322e-10f, 5.34904343e-10f, 2.45777509e-09f, 1.12929683e-08f, + 5.18888577e-08f, 2.38418579e-07f, 1.09548409e-06f, 5.03352339e-06f, + 2.31279992e-05f, 1.06268380e-04f, 4.88281250e-04f, 2.24355143e-03f, + 1.03086559e-02f, 4.73661423e-02f, 2.17637643e-01f, 1.00000000e+00f, + 4.59479332e+00f, 2.11121273e+01f, 9.70058594e+01f, 4.45721893e+02f, + 2.04800000e+03f, 9.41013672e+03f, 4.32376367e+04f, 1.98668000e+05f, + 9.12838438e+05f, 4.19430400e+06f, 1.92719600e+07f, 8.85506800e+07f, + 4.06872064e+08f, 1.86949312e+09f, 8.58993459e+09f, 3.94689741e+10f, + 1.81351793e+11f, 8.33273987e+11f, 3.82872191e+12f, 1.75921860e+13f, + 8.08324589e+13f, 3.71408471e+14f, 1.70654513e+15f, 7.84122247e+15f, + 3.60287970e+16f, 1.65544876e+17f, 7.60644549e+17f, 3.49500442e+18f, + 1.60588236e+19f, 7.37869763e+19f, 3.39035906e+20f, 1.55780004e+21f, + 7.15776905e+21f, 3.28884708e+22f, 1.51115727e+23f, 6.94345535e+23f, + 3.19037448e+24f, 1.46591110e+25f, 6.73555881e+25f, 3.09485010e+26f, + 1.42201966e+27f, 6.53388693e+27f, 3.00218593e+28f, 1.37944245e+29f, + 6.33825300e+29f, 2.91229625e+30f, 1.33814004e+31f, 6.14847679e+31f, + 2.82509813e+32f, 1.29807421e+33f, 5.96438273e+33f, 2.74051081e+34f, + 1.25920805e+35f, 5.78580097e+35f, 2.65845599e+36f, 1.22150558e+37f, + 5.61256613e+37f, 2.57885808e+38f, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, INFINITE_RESULT, + // sign bit = 1 + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, + 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, 0.00000000e+00f, +}; + +// +// powf(x, 5/11.0f) +// +// for x = +inf or NaN, returns +inf +// for x = subnormal or 0.0f, returns 0.0f +// for x < 0.0f, returns 0.0f +// +// rel |error| < 1.2e-5, smooth +// +static inline float _powf_5_11(float x) { + + union { float f; uint32_t u; } m = { x }; + + // split into mantissa and exponent + int k = m.u >> 23; // [sign|exponent] bits + m.u = (m.u & ((1 << 23) - 1)) | (127 << 23); // mantissa with zero exponent + + // pow(2, e * 5/11) from table + float pow_e = pow_5_11_table[k]; + + // polynomial for pow(m, 5/11) over m=[1,2) + float pow_m = (((-0.0110083047f * m.f + 0.0905038750f) * m.f - 0.324697506f) * m.f + 0.876040946f) * m.f + 0.369160989f; + + // recontruct the result + return pow_e * pow_m; +} + +// +// powf(x, 11/5.0f) +// +// for x = +inf or NaN, returns +inf +// for x = subnormal or 0.0f, returns 0.0f +// for x < 0.0f, returns 0.0f +// +// rel |error| < 2.9e-6, smooth +// +static inline float _powf_11_5(float x) { + + union { float f; uint32_t u; } m = { x }; + + // split into mantissa and exponent + int k = m.u >> 23; // [sign|exponent] bits + m.u = (m.u & ((1 << 23) - 1)) | (127 << 23); // mantissa with zero exponent + + // pow(2, e * 11/5) from table + float pow_e = pow_11_5_table[k]; + + // polynomial for pow(m, 11/5) over m=[1,2) + float pow_m = (((-0.00916587552f * m.f + 0.119315466f) * m.f + 1.01847068f) * m.f - 0.158338739f) * m.f + 0.0297184721f; + + // recontruct the result + return pow_e * pow_m; +} + +#if (NV_USE_SSE > 1) +#include // SSE2 + +void nv::powf_5_11(const float* src, float* dst, int count) { + + int i = 0; + for (; i < count - 3; i += 4) { + + __m128 x = _mm_loadu_ps(&src[i]); + + // split into mantissa and exponent + __m128i k = _mm_srli_epi32(_mm_castps_si128(x), 23); + x = _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32((1 << 23) - 1))); + x = _mm_or_ps(x, _mm_castsi128_ps(_mm_set1_epi32(127 << 23))); + + // pow(2, e * 5/11) from table + __m128 pow_e = _mm_setr_ps( + pow_5_11_table[_mm_cvtsi128_si32(k)], + pow_5_11_table[_mm_extract_epi16(k, 2)], + pow_5_11_table[_mm_extract_epi16(k, 4)], + pow_5_11_table[_mm_extract_epi16(k, 6)] + ); + + // polynomial for pow(m, 5/11) over m=[1,2) + __m128 pow_m = _mm_set1_ps(-0.0110083047f); + pow_m = _mm_add_ps(_mm_mul_ps(pow_m, x), _mm_set1_ps(0.0905038750f)); + pow_m = _mm_add_ps(_mm_mul_ps(pow_m, x), _mm_set1_ps(-0.324697506f)); + pow_m = _mm_add_ps(_mm_mul_ps(pow_m, x), _mm_set1_ps(0.876040946f)); + pow_m = _mm_add_ps(_mm_mul_ps(pow_m, x), _mm_set1_ps(0.369160989f)); + + // recontruct the result + _mm_storeu_ps(&dst[i], _mm_mul_ps(pow_e, pow_m)); + } + + for (; i < count; i++) { + dst[i] = _powf_5_11(src[i]); + } +} + +void nv::powf_11_5(const float* src, float* dst, int count) { + + int i = 0; + for (; i < count - 3; i += 4) { + + __m128 x = _mm_loadu_ps(&src[i]); + + // split into mantissa and exponent + __m128i k = _mm_srli_epi32(_mm_castps_si128(x), 23); + x = _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32((1 << 23) - 1))); + x = _mm_or_ps(x, _mm_castsi128_ps(_mm_set1_epi32(127 << 23))); + + // pow(2, e * 11/5) from table + __m128 pow_e = _mm_setr_ps( + pow_11_5_table[_mm_cvtsi128_si32(k)], + pow_11_5_table[_mm_extract_epi16(k, 2)], + pow_11_5_table[_mm_extract_epi16(k, 4)], + pow_11_5_table[_mm_extract_epi16(k, 6)] + ); + + // polynomial for pow(m, 11/5) over m=[1,2) + __m128 pow_m = _mm_set1_ps(-0.00916587552f); + pow_m = _mm_add_ps(_mm_mul_ps(pow_m, x), _mm_set1_ps(0.119315466f)); + pow_m = _mm_add_ps(_mm_mul_ps(pow_m, x), _mm_set1_ps(1.01847068f)); + pow_m = _mm_add_ps(_mm_mul_ps(pow_m, x), _mm_set1_ps(-0.158338739f)); + pow_m = _mm_add_ps(_mm_mul_ps(pow_m, x), _mm_set1_ps(0.0297184721f)); + + // recontruct the result + _mm_storeu_ps(&dst[i], _mm_mul_ps(pow_e, pow_m)); + } + + for (; i < count; i++) { + dst[i] = _powf_11_5(src[i]); + } +} + +#else + +void nv::powf_5_11(const float* src, float* dst, int count) { + for (int i = 0; i < count; i++) { + dst[i] = _powf_5_11(src[i]); + } +} +void nv::powf_11_5(const float* src, float* dst, int count) { + for (int i = 0; i < count; i++) { + dst[i] = _powf_11_5(src[i]); + } +} + +#endif // SSE2 Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.h @@ -0,0 +1,78 @@ +#pragma once +#ifndef NV_MATH_HALF_H +#define NV_MATH_HALF_H + +#include "nvmath.h" + +namespace nv { + + NVMATH_API uint32 half_to_float( uint16 h ); + NVMATH_API uint16 half_from_float( uint32 f ); + + // vin,vout must be 16 byte aligned. count must be a multiple of 8. + // implement a non-SSE version if we need it. For now, this naming makes it clear this is only available when SSE2 is + void half_to_float_array_SSE2(const uint16 * vin, float * vout, int count); + + NVMATH_API void half_init_tables(); + NVMATH_API uint32 fast_half_to_float(uint16 h); + + inline uint16 to_half(float c) { + union { float f; uint32 u; } f; + f.f = c; + return nv::half_from_float( f.u ); + } + + inline float to_float(uint16 c) { + union { float f; uint32 u; } f; + f.u = nv::fast_half_to_float( c ); + return f.f; + } + + + union Half { + uint16 raw; + struct { + #if NV_BIG_ENDIAN + uint negative:1; + uint biasedexponent:5; + uint mantissa:10; + #else + uint mantissa:10; + uint biasedexponent:5; + uint negative:1; + #endif + } field; + }; + + + inline float TestHalfPrecisionAwayFromZero(float input) + { + Half h; + h.raw = to_half(input); + h.raw += 1; + + float f = to_float(h.raw); + + // Subtract the initial value to find our precision + float delta = f - input; + + return delta; + } + + inline float TestHalfPrecisionTowardsZero(float input) + { + Half h; + h.raw = to_half(input); + h.raw -= 1; + + float f = to_float(h.raw); + + // Subtract the initial value to find our precision + float delta = f - input; + + return -delta; + } + +} // nv namespace + +#endif // NV_MATH_HALF_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp @@ -0,0 +1,787 @@ +// Branch-free implementation of half-precision (16 bit) floating point +// Copyright 2006 Mike Acton +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE +// +// Half-precision floating point format +// ------------------------------------ +// +// | Field | Last | First | Note +// |----------|------|-------|---------- +// | Sign | 15 | 15 | +// | Exponent | 14 | 10 | Bias = 15 +// | Mantissa | 9 | 0 | +// +// Compiling +// --------- +// +// Preferred compile flags for GCC: +// -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing +// +// This file is a C99 source file, intended to be compiled with a C99 +// compliant compiler. However, for the moment it remains combatible +// with C++98. Therefore if you are using a compiler that poorly implements +// C standards (e.g. MSVC), it may be compiled as C++. This is not +// guaranteed for future versions. +// +// Features +// -------- +// +// * QNaN + = QNaN +// * + +INF = +INF +// * - -INF = -INF +// * INF - INF = SNaN +// * Denormalized values +// * Difference of ZEROs is always +ZERO +// * Sum round with guard + round + sticky bit (grs) +// * And of course... no branching +// +// Precision of Sum +// ---------------- +// +// (SUM) uint16 z = half_add( x, y ); +// (DIFFERENCE) uint16 z = half_add( x, -y ); +// +// Will have exactly (0 ulps difference) the same result as: +// (For 32 bit IEEE 784 floating point and same rounding mode) +// +// union FLOAT_32 +// { +// float f32; +// uint32 u32; +// }; +// +// union FLOAT_32 fx = { .u32 = half_to_float( x ) }; +// union FLOAT_32 fy = { .u32 = half_to_float( y ) }; +// union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32 }; +// uint16 z = float_to_half( fz ); +// + +#include "Half.h" +#include + + +// Load immediate +static inline uint32 _uint32_li( uint32 a ) +{ + return (a); +} + +// Decrement +static inline uint32 _uint32_dec( uint32 a ) +{ + return (a - 1); +} + +// Increment +static inline uint32 _uint32_inc( uint32 a ) +{ + return (a + 1); +} + +// Complement +static inline uint32 _uint32_not( uint32 a ) +{ + return (~a); +} + +// Negate +static inline uint32 _uint32_neg( uint32 a ) +{ +#pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned + return (-a); +#pragma warning(default : 4146) +} + +// Extend sign +static inline uint32 _uint32_ext( uint32 a ) +{ + return (((int32)a)>>31); +} + +// And +static inline uint32 _uint32_and( uint32 a, uint32 b ) +{ + return (a & b); +} + +// And with Complement +static inline uint32 _uint32_andc( uint32 a, uint32 b ) +{ + return (a & ~b); +} + +// Or +static inline uint32 _uint32_or( uint32 a, uint32 b ) +{ + return (a | b); +} + +// Shift Right Logical +static inline uint32 _uint32_srl( uint32 a, int sa ) +{ + return (a >> sa); +} + +// Shift Left Logical +static inline uint32 _uint32_sll( uint32 a, int sa ) +{ + return (a << sa); +} + +// Add +static inline uint32 _uint32_add( uint32 a, uint32 b ) +{ + return (a + b); +} + +// Subtract +static inline uint32 _uint32_sub( uint32 a, uint32 b ) +{ + return (a - b); +} + +// Select on Sign bit +static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b ) +{ + const uint32 mask = _uint32_ext( test ); + const uint32 sel_a = _uint32_and( a, mask ); + const uint32 sel_b = _uint32_andc( b, mask ); + const uint32 result = _uint32_or( sel_a, sel_b ); + + return (result); +} + +// Load Immediate +static inline uint16 _uint16_li( uint16 a ) +{ + return (a); +} + +// Extend sign +static inline uint16 _uint16_ext( uint16 a ) +{ + return (((int16)a)>>15); +} + +// Negate +static inline uint16 _uint16_neg( uint16 a ) +{ + return (-a); +} + +// Complement +static inline uint16 _uint16_not( uint16 a ) +{ + return (~a); +} + +// Decrement +static inline uint16 _uint16_dec( uint16 a ) +{ + return (a - 1); +} + +// Shift Left Logical +static inline uint16 _uint16_sll( uint16 a, int sa ) +{ + return (a << sa); +} + +// Shift Right Logical +static inline uint16 _uint16_srl( uint16 a, int sa ) +{ + return (a >> sa); +} + +// Add +static inline uint16 _uint16_add( uint16 a, uint16 b ) +{ + return (a + b); +} + +// Subtract +static inline uint16 _uint16_sub( uint16 a, uint16 b ) +{ + return (a - b); +} + +// And +static inline uint16 _uint16_and( uint16 a, uint16 b ) +{ + return (a & b); +} + +// Or +static inline uint16 _uint16_or( uint16 a, uint16 b ) +{ + return (a | b); +} + +// Exclusive Or +static inline uint16 _uint16_xor( uint16 a, uint16 b ) +{ + return (a ^ b); +} + +// And with Complement +static inline uint16 _uint16_andc( uint16 a, uint16 b ) +{ + return (a & ~b); +} + +// And then Shift Right Logical +static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa ) +{ + return ((a & b) >> sa); +} + +// Shift Right Logical then Mask +static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask ) +{ + return ((a >> sa) & mask); +} + +// Add then Mask +static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask ) +{ + return ((a + b) & mask); +} + + +// Select on Sign bit +static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b ) +{ + const uint16 mask = _uint16_ext( test ); + const uint16 sel_a = _uint16_and( a, mask ); + const uint16 sel_b = _uint16_andc( b, mask ); + const uint16 result = _uint16_or( sel_a, sel_b ); + + return (result); +} + +#if NV_OS_XBOX +#include +#elif NV_CC_MSVC + +#include +#pragma intrinsic(_BitScanReverse) + +uint32 _uint32_nlz( uint32 x ) { + unsigned long index; + _BitScanReverse(&index, x); + return 31 - index; +} +#endif + + +// Count Leading Zeros +static inline uint32 _uint32_cntlz( uint32 x ) +{ +#if NV_CC_GCC + /* On PowerPC, this will map to insn: cntlzw */ + /* On Pentium, this will map to insn: clz */ + uint32 is_x_nez_msb = _uint32_neg( x ); + uint32 nlz = __builtin_clz( x ); + uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); + return (result); +#elif NV_OS_XBOX + // Xbox PPC has this as an intrinsic. + return _CountLeadingZeros(x); +#elif NV_CC_MSVC + uint32 is_x_nez_msb = _uint32_neg( x ); + uint32 nlz = _uint32_nlz( x ); + uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); + return (result); +#else + const uint32 x0 = _uint32_srl( x, 1 ); + const uint32 x1 = _uint32_or( x, x0 ); + const uint32 x2 = _uint32_srl( x1, 2 ); + const uint32 x3 = _uint32_or( x1, x2 ); + const uint32 x4 = _uint32_srl( x3, 4 ); + const uint32 x5 = _uint32_or( x3, x4 ); + const uint32 x6 = _uint32_srl( x5, 8 ); + const uint32 x7 = _uint32_or( x5, x6 ); + const uint32 x8 = _uint32_srl( x7, 16 ); + const uint32 x9 = _uint32_or( x7, x8 ); + const uint32 xA = _uint32_not( x9 ); + const uint32 xB = _uint32_srl( xA, 1 ); + const uint32 xC = _uint32_and( xB, 0x55555555 ); + const uint32 xD = _uint32_sub( xA, xC ); + const uint32 xE = _uint32_and( xD, 0x33333333 ); + const uint32 xF = _uint32_srl( xD, 2 ); + const uint32 x10 = _uint32_and( xF, 0x33333333 ); + const uint32 x11 = _uint32_add( xE, x10 ); + const uint32 x12 = _uint32_srl( x11, 4 ); + const uint32 x13 = _uint32_add( x11, x12 ); + const uint32 x14 = _uint32_and( x13, 0x0f0f0f0f ); + const uint32 x15 = _uint32_srl( x14, 8 ); + const uint32 x16 = _uint32_add( x14, x15 ); + const uint32 x17 = _uint32_srl( x16, 16 ); + const uint32 x18 = _uint32_add( x16, x17 ); + const uint32 x19 = _uint32_and( x18, 0x0000003f ); + return ( x19 ); +#endif +} + +// Count Leading Zeros +static inline uint16 _uint16_cntlz( uint16 x ) +{ +#ifdef __GNUC__ + /* On PowerPC, this will map to insn: cntlzw */ + /* On Pentium, this will map to insn: clz */ + uint16 nlz32 = (uint16)_uint32_cntlz( (uint32)x ); + uint32 nlz = _uint32_sub( nlz32, 16 ); + return (nlz); +#elif _NV_OS_XBOX_ + uint16 nlz32 = (uint16)_CountLeadingZeros( (uint32)x ); + return _uint32_sub( nlz32, 16); +#else + const uint16 x0 = _uint16_srl( x, 1 ); + const uint16 x1 = _uint16_or( x, x0 ); + const uint16 x2 = _uint16_srl( x1, 2 ); + const uint16 x3 = _uint16_or( x1, x2 ); + const uint16 x4 = _uint16_srl( x3, 4 ); + const uint16 x5 = _uint16_or( x3, x4 ); + const uint16 x6 = _uint16_srl( x5, 8 ); + const uint16 x7 = _uint16_or( x5, x6 ); + const uint16 x8 = _uint16_not( x7 ); + const uint16 x9 = _uint16_srlm( x8, 1, 0x5555 ); + const uint16 xA = _uint16_sub( x8, x9 ); + const uint16 xB = _uint16_and( xA, 0x3333 ); + const uint16 xC = _uint16_srlm( xA, 2, 0x3333 ); + const uint16 xD = _uint16_add( xB, xC ); + const uint16 xE = _uint16_srl( xD, 4 ); + const uint16 xF = _uint16_addm( xD, xE, 0x0f0f ); + const uint16 x10 = _uint16_srl( xF, 8 ); + const uint16 x11 = _uint16_addm( xF, x10, 0x001f ); + return ( x11 ); +#endif +} + +uint16 +nv::half_from_float( uint32 f ) +{ + const uint32 one = _uint32_li( 0x00000001 ); + const uint32 f_s_mask = _uint32_li( 0x80000000 ); + const uint32 f_e_mask = _uint32_li( 0x7f800000 ); + const uint32 f_m_mask = _uint32_li( 0x007fffff ); + const uint32 f_m_hidden_bit = _uint32_li( 0x00800000 ); + const uint32 f_m_round_bit = _uint32_li( 0x00001000 ); + const uint32 f_snan_mask = _uint32_li( 0x7fc00000 ); + const uint32 f_e_pos = _uint32_li( 0x00000017 ); + const uint32 h_e_pos = _uint32_li( 0x0000000a ); + const uint32 h_e_mask = _uint32_li( 0x00007c00 ); + const uint32 h_snan_mask = _uint32_li( 0x00007e00 ); + const uint32 h_e_mask_value = _uint32_li( 0x0000001f ); + const uint32 f_h_s_pos_offset = _uint32_li( 0x00000010 ); + const uint32 f_h_bias_offset = _uint32_li( 0x00000070 ); + const uint32 f_h_m_pos_offset = _uint32_li( 0x0000000d ); + const uint32 h_nan_min = _uint32_li( 0x00007c01 ); + const uint32 f_h_e_biased_flag = _uint32_li( 0x0000008f ); + const uint32 f_s = _uint32_and( f, f_s_mask ); + const uint32 f_e = _uint32_and( f, f_e_mask ); + const uint16 h_s = _uint32_srl( f_s, f_h_s_pos_offset ); + const uint32 f_m = _uint32_and( f, f_m_mask ); + const uint16 f_e_amount = _uint32_srl( f_e, f_e_pos ); + const uint32 f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset ); + const uint32 f_snan = _uint32_and( f, f_snan_mask ); + const uint32 f_m_round_mask = _uint32_and( f_m, f_m_round_bit ); + const uint32 f_m_round_offset = _uint32_sll( f_m_round_mask, one ); + const uint32 f_m_rounded = _uint32_add( f_m, f_m_round_offset ); + const uint32 f_m_denorm_sa = _uint32_sub( one, f_e_half_bias ); + const uint32 f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit ); + const uint32 f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa ); + const uint32 h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset ); + const uint32 f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit ); + const uint32 m_nan = _uint32_srl( f_m, f_h_m_pos_offset ); + const uint32 h_em_nan = _uint32_or( h_e_mask, m_nan ); + const uint32 h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias ); + const uint32 h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos ); + const uint32 h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos ); + const uint32 h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset ); + const uint32 h_em_norm = _uint32_or( h_e_norm, h_m_norm ); + const uint32 is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount ); + const uint32 is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias ); + const uint32 is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb ); + const uint32 is_f_m_eqz_msb = _uint32_dec( f_m ); + const uint32 is_h_nan_eqz_msb = _uint32_dec( m_nan ); + const uint32 is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb ); + const uint32 is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb ); + const uint32 is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias ); + const uint32 is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb ); + const uint32 is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask ); + const uint32 is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow ); + const uint32 is_f_snan_msb = _uint32_not( is_f_nsnan_msb ); + const uint32 h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm ); + const uint32 h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result ); + const uint32 h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result ); + const uint32 h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result ); + const uint32 h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result ); + const uint32 h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result ); + const uint32 h_result = _uint32_or( h_s, h_em_snan_result ); + + return (uint16)(h_result); +} + +uint32 +nv::half_to_float( uint16 h ) +{ + const uint32 h_e_mask = _uint32_li( 0x00007c00 ); + const uint32 h_m_mask = _uint32_li( 0x000003ff ); + const uint32 h_s_mask = _uint32_li( 0x00008000 ); + const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 ); + const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d ); + const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 ); + const uint32 f_e_mask = _uint32_li( 0x7f800000 ); + const uint32 f_m_mask = _uint32_li( 0x007fffff ); + const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e ); + const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 ); + const uint32 f_e_pos = _uint32_li( 0x00000017 ); + const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff ); + const uint32 h_e = _uint32_and( h, h_e_mask ); + const uint32 h_m = _uint32_and( h, h_m_mask ); + const uint32 h_s = _uint32_and( h, h_s_mask ); + const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset ); + const uint32 h_m_nlz = _uint32_cntlz( h_m ); + const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset ); + const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset ); + const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset ); + const uint32 f_em = _uint32_or( f_e, f_m ); + const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias ); + const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa ); + const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa ); + const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask ); + const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos ); + const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm ); + const uint32 f_em_nan = _uint32_or( f_e_mask, f_m ); + const uint32 is_e_eqz_msb = _uint32_dec( h_e ); + const uint32 is_m_nez_msb = _uint32_neg( h_m ); + const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e ); + const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb ); + const uint32 is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb ); + const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb ); + const uint32 is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb ); + const uint32 is_zero = _uint32_ext( is_zero_msb ); + const uint32 f_zero_result = _uint32_andc( f_em, is_zero ); + const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result ); + const uint32 f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result ); + const uint32 f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result ); + const uint32 f_result = _uint32_or( f_s, f_nan_result ); + + return (f_result); +} + + +#if !NV_OS_IOS && (defined(__i386__) || defined(__x86_64__)) + +#if NV_CC_GNUC +#if defined(__i386__) || defined(__x86_64__) +#include +#endif +#endif + +#include "nvcore/Memory.h" // NV_ALIGN_16 + +static __m128 half_to_float4_SSE2(__m128i h) +{ +#define SSE_CONST4(name, val) static const NV_ALIGN_16 uint name[4] = { (val), (val), (val), (val) } + +#define CONST(name) *(const __m128i *)&name + + SSE_CONST4(mask_nosign, 0x7fff); + SSE_CONST4(mask_justsign, 0x8000); + SSE_CONST4(mask_shifted_exp, 0x7c00 << 13); + SSE_CONST4(expadjust_normal, (127 - 15) << 23); + SSE_CONST4(expadjust_infnan, (128 - 16) << 23); + SSE_CONST4(expadjust_denorm, 1 << 23); + SSE_CONST4(magic_denorm, 113 << 23); + + __m128i mnosign = CONST(mask_nosign); + __m128i expmant = _mm_and_si128(mnosign, h); + __m128i justsign = _mm_and_si128(h, CONST(mask_justsign)); + __m128i mshiftexp = CONST(mask_shifted_exp); + __m128i eadjust = CONST(expadjust_normal); + __m128i shifted = _mm_slli_epi32(expmant, 13); + __m128i adjusted = _mm_add_epi32(eadjust, shifted); + __m128i justexp = _mm_and_si128(shifted, mshiftexp); + + __m128i zero = _mm_setzero_si128(); + __m128i b_isinfnan = _mm_cmpeq_epi32(mshiftexp, justexp); + __m128i b_isdenorm = _mm_cmpeq_epi32(zero, justexp); + + __m128i adj_infnan = _mm_and_si128(b_isinfnan, CONST(expadjust_infnan)); + __m128i adjusted2 = _mm_add_epi32(adjusted, adj_infnan); + + __m128i adj_den = CONST(expadjust_denorm); + __m128i den1 = _mm_add_epi32(adj_den, adjusted2); + __m128 den2 = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm); + __m128 adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm)); + __m128 adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2)); + __m128 adjusted5 = _mm_or_ps(adjusted3, adjusted4); + __m128i sign = _mm_slli_epi32(justsign, 16); + __m128 final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign)); + + // ~21 SSE2 ops. + return final; + +#undef SSE_CONST4 +#undef CONST +} + + +void nv::half_to_float_array_SSE2(const uint16 * vin, float * vout, int count) { + nvDebugCheck((intptr_t(vin) & 15) == 0); + nvDebugCheck((intptr_t(vout) & 15) == 0); + nvDebugCheck((count & 7) == 0); + + __m128i zero = _mm_setzero_si128(); + + for (int i = 0; i < count; i += 8) + { + __m128i in = _mm_loadu_si128((const __m128i *)(vin + i)); + __m128i a = _mm_unpacklo_epi16(in, zero); + __m128i b = _mm_unpackhi_epi16(in, zero); + + __m128 outa = half_to_float4_SSE2(a); + _mm_storeu_ps((float *)(vout + i), outa); + + __m128 outb = half_to_float4_SSE2(b); + _mm_storeu_ps((float *)(vout + i + 4), outb); + } +} + +#endif + + +// @@ These tables could be smaller. +namespace nv { + uint32 mantissa_table[2048] = { 0xDEADBEEF }; + uint32 exponent_table[64]; + uint32 offset_table[64]; +} + +void nv::half_init_tables() +{ + // Init mantissa table. + mantissa_table[0] = 0; + + // denormals + for (int i = 1; i < 1024; i++) { + uint m = i << 13; + uint e = 0; + + while ((m & 0x00800000) == 0) { + e -= 0x00800000; + m <<= 1; + } + m &= ~0x00800000; + e += 0x38800000; + mantissa_table[i] = m | e; + } + + // normals + for (int i = 1024; i < 2048; i++) { + mantissa_table[i] = (i - 1024) << 13; + } + + + // Init exponent table. + exponent_table[0] = 0; + + for (int i = 1; i < 31; i++) { + exponent_table[i] = 0x38000000 + (i << 23); + } + + exponent_table[31] = 0x7f800000; + exponent_table[32] = 0x80000000; + + for (int i = 33; i < 63; i++) { + exponent_table[i] = 0xb8000000 + ((i - 32) << 23); + } + + exponent_table[63] = 0xff800000; + + + // Init offset table. + offset_table[0] = 0; + + for (int i = 1; i < 32; i++) { + offset_table[i] = 1024; + } + + offset_table[32] = 0; + + for (int i = 33; i < 64; i++) { + offset_table[i] = 1024; + } +} + +// Fast half to float conversion based on: +// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf +uint32 nv::fast_half_to_float(uint16 h) +{ + // Initialize table if necessary. + if (mantissa_table[0] != 0) + half_init_tables(); + uint exp = h >> 10; + return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; +} + +#if 0 + +// Inaccurate conversion suggested at the ffmpeg mailing list: +// http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2009-July/068949.html +uint32 nv::fast_half_to_float(uint16 v) +{ + if (v & 0x8000) return 0; + uint exp = v >> 10; + if (!exp) return (v>>9)&1; + if (exp >= 15) return 0xffff; + v <<= 6; + return (v+(1<<16)) >> (15-exp); +} + +#endif + +#if 0 + +// Some more from a gamedev thread: +// http://www.devmaster.net/forums/showthread.php?t=10924 + +// I believe it does not handle specials either. + +// Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though. + + +static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000}; +static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00}; +static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF}; +static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000}; + +__asm +{ + movaps xmm1, xmm0 // Input in xmm0 + movaps xmm2, xmm0 + + andps xmm0, half_sign + andps xmm1, half_exponent + andps xmm2, half_mantissa + paddd xmm1, half_bias_offset + + pslld xmm0, 16 + pslld xmm1, 13 + pslld xmm2, 13 + + orps xmm1, xmm2 + orps xmm0, xmm1 // Result in xmm0 +} + + +#endif + +#if 0 +// These version computes the tables at compile time: +// http://gamedev.stackexchange.com/questions/17326/conversion-of-a-number-from-single-precision-floating-point-representation-to-a + +/* This method is faster than the OpenEXR implementation (very often + * used, eg. in Ogre), with the additional benefit of rounding, inspired + * by James TursaÂ’s half-precision code. */ +static inline uint16_t float_to_half_branch(uint32_t x) +{ + uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */ + uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */ + unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */ + + /* If zero, or denormal, or exponent underflows too much for a denormal + * half, return signed zero. */ + if (e < 103) + return bits; + + /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ + if (e > 142) + { + bits |= 0x7c00u; + /* If exponent was 0xff and one mantissa bit was set, it means NaN, + * not Inf, so make sure we set one mantissa bit too. */ + bits |= e == 255 && (x & 0x007fffffu); + return bits; + } + + /* If exponent underflows but not too much, return a denormal */ + if (e < 113) + { + m |= 0x0800u; + /* Extra rounding may overflow and set mantissa to 0 and exponent + * to 1, which is OK. */ + bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); + return bits; + } + + bits |= ((e - 112) << 10) | (m >> 1); + /* Extra rounding. An overflow will set mantissa to 0 and increment + * the exponent, which is OK. */ + bits += m & 1; + return bits; +} + +/* These macros implement a finite iterator useful to build lookup + * tables. For instance, S64(0) will call S1(x) for all values of x + * between 0 and 63. + * Due to the exponential behaviour of the calls, the stress on the + * compiler may be important. */ +#define S4(x) S1((x)), S1((x)+1), S1((x)+2), S1((x)+3) +#define S16(x) S4((x)), S4((x)+4), S4((x)+8), S4((x)+12) +#define S64(x) S16((x)), S16((x)+16), S16((x)+32), S16((x)+48) +#define S256(x) S64((x)), S64((x)+64), S64((x)+128), S64((x)+192) +#define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768) + +/* Lookup table-based algorithm from “Fast Half Float Conversions” + * by Jeroen van der Zijp, November 2008. No rounding is performed, + * and some NaN values may be incorrectly converted to Inf. */ +static inline uint16_t float_to_half_nobranch(uint32_t x) +{ + static uint16_t const basetable[512] = + { +#define S1(i) (((i) < 103) ? 0x0000 : \ + ((i) < 113) ? 0x0400 >> (113 - (i)) : \ + ((i) < 143) ? ((i) - 112) << 10 : 0x7c00) + S256(0), +#undef S1 +#define S1(i) (0x8000 | (((i) < 103) ? 0x0000 : \ + ((i) < 113) ? 0x0400 >> (113 - (i)) : \ + ((i) < 143) ? ((i) - 112) << 10 : 0x7c00)) + S256(0), +#undef S1 + }; + + static uint8_t const shifttable[512] = + { +#define S1(i) (((i) < 103) ? 24 : \ + ((i) < 113) ? 126 - (i) : \ + ((i) < 143 || (i) == 255) ? 13 : 24) + S256(0), S256(0), +#undef S1 + }; + + uint16_t bits = basetable[(x >> 23) & 0x1ff]; + bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff]; + return bits; +} +#endif Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.h @@ -1,1000 +1,113 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_MATH_MATRIX_H #define NV_MATH_MATRIX_H -#include -#include +#include "Vector.h" -namespace nv -{ - -// @@ Use scalar defined in Vector.h, but should use a template instead. - -/// 4x4 transformation matrix. -/// -# Matrices are stored in memory in column major order. -/// -# Points are to be though of as column vectors. -/// -# Transformation of a point p by a matrix M is: p' = M * p -class NVMATH_CLASS Matrix -{ -public: - typedef Matrix const & Arg; - - Matrix(); - Matrix(zero_t); - Matrix(identity_t); - Matrix(const Matrix & m); - - scalar data(uint idx) const; - scalar & data(uint idx); - scalar get(uint row, uint col) const; - scalar operator()(uint row, uint col) const; - scalar & operator()(uint row, uint col); - const scalar * ptr() const; - - Vector4 row(uint i) const; - Vector4 column(uint i) const; - - void scale(scalar s); - void scale(Vector3::Arg s); - void translate(Vector3::Arg t); - void rotate(scalar theta, scalar v0, scalar v1, scalar v2); - scalar determinant() const; - - void apply(Matrix::Arg m); - -private: - scalar m_data[16]; -}; - - -inline Matrix::Matrix() -{ -} - -inline Matrix::Matrix(zero_t) -{ - for(int i = 0; i < 16; i++) { - m_data[i] = 0.0f; - } -} - -inline Matrix::Matrix(identity_t) -{ - for(int i = 0; i < 4; i++) { - for(int j = 0; j < 4; j++) { - m_data[4*j+i] = (i == j) ? 1.0f : 0.0f; - } - } -} - -inline Matrix::Matrix(const Matrix & m) -{ - for(int i = 0; i < 16; i++) { - m_data[i] = m.m_data[i]; - } -} - - -// Accessors -inline scalar Matrix::data(uint idx) const -{ - nvDebugCheck(idx < 16); - return m_data[idx]; -} -inline scalar & Matrix::data(uint idx) -{ - nvDebugCheck(idx < 16); - return m_data[idx]; -} -inline scalar Matrix::get(uint row, uint col) const -{ - nvDebugCheck(row < 4 && col < 4); - return m_data[col * 4 + row]; -} -inline scalar Matrix::operator()(uint row, uint col) const -{ - nvDebugCheck(row < 4 && col < 4); - return m_data[col * 4 + row]; -} -inline scalar & Matrix::operator()(uint row, uint col) -{ - nvDebugCheck(row < 4 && col < 4); - return m_data[col * 4 + row]; -} - -inline const scalar * Matrix::ptr() const -{ - return m_data; -} - -inline Vector4 Matrix::row(uint i) const -{ - nvDebugCheck(i < 4); - return Vector4(get(i, 0), get(i, 1), get(i, 2), get(i, 3)); -} - -inline Vector4 Matrix::column(uint i) const -{ - nvDebugCheck(i < 4); - return Vector4(get(0, i), get(1, i), get(2, i), get(3, i)); -} - -/// Apply scale. -inline void Matrix::scale(scalar s) -{ - m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s; - m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s; - m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s; - m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s; -} - -/// Apply scale. -inline void Matrix::scale(Vector3::Arg s) -{ - m_data[0] *= s.x(); m_data[1] *= s.x(); m_data[2] *= s.x(); m_data[3] *= s.x(); - m_data[4] *= s.y(); m_data[5] *= s.y(); m_data[6] *= s.y(); m_data[7] *= s.y(); - m_data[8] *= s.z(); m_data[9] *= s.z(); m_data[10] *= s.z(); m_data[11] *= s.z(); -} - -/// Apply translation. -inline void Matrix::translate(Vector3::Arg t) -{ - m_data[12] = m_data[0] * t.x() + m_data[4] * t.y() + m_data[8] * t.z() + m_data[12]; - m_data[13] = m_data[1] * t.x() + m_data[5] * t.y() + m_data[9] * t.z() + m_data[13]; - m_data[14] = m_data[2] * t.x() + m_data[6] * t.y() + m_data[10] * t.z() + m_data[14]; - m_data[15] = m_data[3] * t.x() + m_data[7] * t.y() + m_data[11] * t.z() + m_data[15]; -} - -Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2); - -/// Apply rotation. -inline void Matrix::rotate(scalar theta, scalar v0, scalar v1, scalar v2) -{ - Matrix R(rotation(theta, v0, v1, v2)); - apply(R); -} - -/// Apply transform. -inline void Matrix::apply(Matrix::Arg m) -{ - nvDebugCheck(this != &m); - - for(int i = 0; i < 4; i++) { - const scalar ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3); - m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0); - m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1); - m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2); - m_data[12+ i] = ai0 * m(0,3) + ai1 * m(1,3) + ai2 * m(2,3) + ai3 * m(3,3); - } -} - -/// Get scale matrix. -inline Matrix scale(Vector3::Arg s) -{ - Matrix m(identity); - m(0,0) = s.x(); - m(1,1) = s.y(); - m(2,2) = s.z(); - return m; -} - -/// Get scale matrix. -inline Matrix scale(scalar s) -{ - Matrix m(identity); - m(0,0) = m(1,1) = m(2,2) = s; - return m; -} - -/// Get translation matrix. -inline Matrix translation(Vector3::Arg t) -{ - Matrix m(identity); - m(0,3) = t.x(); - m(1,3) = t.y(); - m(2,3) = t.z(); - return m; -} - -/// Get rotation matrix. -inline Matrix rotation(scalar theta, scalar v0, scalar v1, scalar v2) -{ - scalar cost = cosf(theta); - scalar sint = sinf(theta); - - Matrix m(identity); - - if( 1 == v0 && 0 == v1 && 0 == v2 ) { - m(1,1) = cost; m(2,1) = -sint; - m(1,2) = sint; m(2,2) = cost; - } - else if( 0 == v0 && 1 == v1 && 0 == v2 ) { - m(0,0) = cost; m(2,0) = sint; - m(1,2) = -sint; m(2,2) = cost; - } - else if( 0 == v0 && 0 == v1 && 1 == v2 ) { - m(0,0) = cost; m(1,0) = -sint; - m(0,1) = sint; m(1,1) = cost; - } - else { - scalar a2, b2, c2; - a2 = v0 * v0; - b2 = v1 * v1; - c2 = v2 * v2; - - scalar iscale = 1.0f / sqrtf(a2 + b2 + c2); - v0 *= iscale; - v1 *= iscale; - v2 *= iscale; - - scalar abm, acm, bcm; - scalar mcos, asin, bsin, csin; - mcos = 1.0f - cost; - abm = v0 * v1 * mcos; - acm = v0 * v2 * mcos; - bcm = v1 * v2 * mcos; - asin = v0 * sint; - bsin = v1 * sint; - csin = v2 * sint; - m(0,0) = a2 * mcos + cost; - m(1,0) = abm - csin; - m(2,0) = acm + bsin; - m(3,0) = abm + csin; - m(1,1) = b2 * mcos + cost; - m(2,1) = bcm - asin; - m(3,1) = acm - bsin; - m(1,2) = bcm + asin; - m(2,2) = c2 * mcos + cost; - } - return m; -} - -//Matrix rotation(scalar yaw, scalar pitch, scalar roll); -//Matrix skew(scalar angle, Vector3::Arg v1, Vector3::Arg v2); - -/// Get frustum matrix. -inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar) -{ - Matrix m(zero); - - scalar doubleznear = 2.0f * zNear; - scalar one_deltax = 1.0f / (xmax - xmin); - scalar one_deltay = 1.0f / (ymax - ymin); - scalar one_deltaz = 1.0f / (zFar - zNear); - - m(0,0) = doubleznear * one_deltax; - m(1,1) = doubleznear * one_deltay; - m(0,2) = (xmax + xmin) * one_deltax; - m(1,2) = (ymax + ymin) * one_deltay; - m(2,2) = -(zFar + zNear) * one_deltaz; - m(3,2) = -1.0f; - m(2,3) = -(zFar * doubleznear) * one_deltaz; - - return m; -} - -/// Get infinite frustum matrix. -inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear) -{ - Matrix m(zero); - - scalar doubleznear = 2.0f * zNear; - scalar one_deltax = 1.0f / (xmax - xmin); - scalar one_deltay = 1.0f / (ymax - ymin); - scalar nudge = 1.0; // 0.999; - - m(0,0) = doubleznear * one_deltax; - m(1,1) = doubleznear * one_deltay; - m(0,2) = (xmax + xmin) * one_deltax; - m(1,2) = (ymax + ymin) * one_deltay; - m(2,2) = -1.0f * nudge; - m(3,2) = -1.0f; - m(2,3) = -doubleznear * nudge; - - return m; -} - -/// Get perspective matrix. -inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear, scalar zFar) -{ - scalar xmax = zNear * tan(fovy / 2); - scalar xmin = -xmax; - - scalar ymax = xmax / aspect; - scalar ymin = -ymax; - - return frustum(xmin, xmax, ymin, ymax, zNear, zFar); -} - -/// Get infinite perspective matrix. -inline Matrix perspective(scalar fovy, scalar aspect, scalar zNear) -{ - scalar x = zNear * tan(fovy / 2); - scalar y = x / aspect; - return frustum( -x, x, -y, y, zNear ); -} - -/// Get matrix determinant. -inline scalar Matrix::determinant() const -{ - return - m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] + - m_data[2] * m_data[5] * m_data[11] * m_data[12] - m_data[1] * m_data[6] * m_data[11] * m_data[12] - m_data[3] * m_data[6] * m_data[ 8] * m_data[13] + m_data[2] * m_data[7] * m_data[ 8] * m_data[13] + - m_data[3] * m_data[4] * m_data[10] * m_data[13] - m_data[0] * m_data[7] * m_data[10] * m_data[13] - m_data[2] * m_data[4] * m_data[11] * m_data[13] + m_data[0] * m_data[6] * m_data[11] * m_data[13] + - m_data[3] * m_data[5] * m_data[ 8] * m_data[14] - m_data[1] * m_data[7] * m_data[ 8] * m_data[14] - m_data[3] * m_data[4] * m_data[ 9] * m_data[14] + m_data[0] * m_data[7] * m_data[ 9] * m_data[14] + - m_data[1] * m_data[4] * m_data[11] * m_data[14] - m_data[0] * m_data[5] * m_data[11] * m_data[14] - m_data[2] * m_data[5] * m_data[ 8] * m_data[15] + m_data[1] * m_data[6] * m_data[ 8] * m_data[15] + - m_data[2] * m_data[4] * m_data[ 9] * m_data[15] - m_data[0] * m_data[6] * m_data[ 9] * m_data[15] - m_data[1] * m_data[4] * m_data[10] * m_data[15] + m_data[0] * m_data[5] * m_data[10] * m_data[15]; -} - -inline Matrix transpose(Matrix::Arg m) -{ - Matrix r; - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 4; j++) - { - r(i, j) = m(j, i); - } - } - return r; -} - -inline Matrix inverse(Matrix::Arg m) -{ - Matrix r; - r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15); - r.data( 1) = m.data(3)*m.data(10)*m.data(13) - m.data(2)*m.data(11)*m.data(13) - m.data(3)*m.data(9)*m.data(14) + m.data(1)*m.data(11)*m.data(14) + m.data(2)*m.data(9)*m.data(15) - m.data(1)*m.data(10)*m.data(15); - r.data( 2) = m.data(2)*m.data( 7)*m.data(13) - m.data(3)*m.data( 6)*m.data(13) + m.data(3)*m.data(5)*m.data(14) - m.data(1)*m.data( 7)*m.data(14) - m.data(2)*m.data(5)*m.data(15) + m.data(1)*m.data( 6)*m.data(15); - r.data( 3) = m.data(3)*m.data( 6)*m.data( 9) - m.data(2)*m.data( 7)*m.data( 9) - m.data(3)*m.data(5)*m.data(10) + m.data(1)*m.data( 7)*m.data(10) + m.data(2)*m.data(5)*m.data(11) - m.data(1)*m.data( 6)*m.data(11); - r.data( 4) = m.data(7)*m.data(10)*m.data(12) - m.data(6)*m.data(11)*m.data(12) - m.data(7)*m.data(8)*m.data(14) + m.data(4)*m.data(11)*m.data(14) + m.data(6)*m.data(8)*m.data(15) - m.data(4)*m.data(10)*m.data(15); - r.data( 5) = m.data(2)*m.data(11)*m.data(12) - m.data(3)*m.data(10)*m.data(12) + m.data(3)*m.data(8)*m.data(14) - m.data(0)*m.data(11)*m.data(14) - m.data(2)*m.data(8)*m.data(15) + m.data(0)*m.data(10)*m.data(15); - r.data( 6) = m.data(3)*m.data( 6)*m.data(12) - m.data(2)*m.data( 7)*m.data(12) - m.data(3)*m.data(4)*m.data(14) + m.data(0)*m.data( 7)*m.data(14) + m.data(2)*m.data(4)*m.data(15) - m.data(0)*m.data( 6)*m.data(15); - r.data( 7) = m.data(2)*m.data( 7)*m.data( 8) - m.data(3)*m.data( 6)*m.data( 8) + m.data(3)*m.data(4)*m.data(10) - m.data(0)*m.data( 7)*m.data(10) - m.data(2)*m.data(4)*m.data(11) + m.data(0)*m.data( 6)*m.data(11); - r.data( 8) = m.data(5)*m.data(11)*m.data(12) - m.data(7)*m.data( 9)*m.data(12) + m.data(7)*m.data(8)*m.data(13) - m.data(4)*m.data(11)*m.data(13) - m.data(5)*m.data(8)*m.data(15) + m.data(4)*m.data( 9)*m.data(15); - r.data( 9) = m.data(3)*m.data( 9)*m.data(12) - m.data(1)*m.data(11)*m.data(12) - m.data(3)*m.data(8)*m.data(13) + m.data(0)*m.data(11)*m.data(13) + m.data(1)*m.data(8)*m.data(15) - m.data(0)*m.data( 9)*m.data(15); - r.data(10) = m.data(1)*m.data( 7)*m.data(12) - m.data(3)*m.data( 5)*m.data(12) + m.data(3)*m.data(4)*m.data(13) - m.data(0)*m.data( 7)*m.data(13) - m.data(1)*m.data(4)*m.data(15) + m.data(0)*m.data( 5)*m.data(15); - r.data(11) = m.data(3)*m.data( 5)*m.data( 8) - m.data(1)*m.data( 7)*m.data( 8) - m.data(3)*m.data(4)*m.data( 9) + m.data(0)*m.data( 7)*m.data( 9) + m.data(1)*m.data(4)*m.data(11) - m.data(0)*m.data( 5)*m.data(11); - r.data(12) = m.data(6)*m.data( 9)*m.data(12) - m.data(5)*m.data(10)*m.data(12) - m.data(6)*m.data(8)*m.data(13) + m.data(4)*m.data(10)*m.data(13) + m.data(5)*m.data(8)*m.data(14) - m.data(4)*m.data( 9)*m.data(14); - r.data(13) = m.data(1)*m.data(10)*m.data(12) - m.data(2)*m.data( 9)*m.data(12) + m.data(2)*m.data(8)*m.data(13) - m.data(0)*m.data(10)*m.data(13) - m.data(1)*m.data(8)*m.data(14) + m.data(0)*m.data( 9)*m.data(14); - r.data(14) = m.data(2)*m.data( 5)*m.data(12) - m.data(1)*m.data( 6)*m.data(12) - m.data(2)*m.data(4)*m.data(13) + m.data(0)*m.data( 6)*m.data(13) + m.data(1)*m.data(4)*m.data(14) - m.data(0)*m.data( 5)*m.data(14); - r.data(15) = m.data(1)*m.data( 6)*m.data( 8) - m.data(2)*m.data( 5)*m.data( 8) + m.data(2)*m.data(4)*m.data( 9) - m.data(0)*m.data( 6)*m.data( 9) - m.data(1)*m.data(4)*m.data(10) + m.data(0)*m.data( 5)*m.data(10); - r.scale(1.0f / m.determinant()); - return r; -} +// - Matrices are stored in memory in *column major* order. +// - Points are to be though of as column vectors. +// - Transformation of a point p by a matrix M is: p' = M * p -inline Matrix isometryInverse(Matrix::Arg m) -{ - Matrix r(identity); - - // transposed 3x3 upper left matrix - for (int i = 0; i < 3; i++) - { - for (int j = 0; j < 3; j++) - { - r(i, j) = m(j, i); - } - } - - // translate by the negative offsets - r.translate(-Vector3(m.data(12), m.data(13), m.data(14))); - - return r; -} - -//Matrix affineInverse(Matrix::Arg m); - -/// Transform the given 3d point with the given matrix. -inline Vector3 transformPoint(Matrix::Arg m, Vector3::Arg p) -{ - return Vector3( - p.x() * m(0,0) + p.y() * m(0,1) + p.z() * m(0,2) + m(0,3), - p.x() * m(1,0) + p.y() * m(1,1) + p.z() * m(1,2) + m(1,3), - p.x() * m(2,0) + p.y() * m(2,1) + p.z() * m(2,2) + m(2,3)); -} - -/// Transform the given 3d vector with the given matrix. -inline Vector3 transformVector(Matrix::Arg m, Vector3::Arg p) +namespace nv { - return Vector3( - p.x() * m(0,0) + p.y() * m(0,1) + p.z() * m(0,2), - p.x() * m(1,0) + p.y() * m(1,1) + p.z() * m(1,2), - p.x() * m(2,0) + p.y() * m(2,1) + p.z() * m(2,2)); -} + enum identity_t { identity }; -/// Transform the given 4d vector with the given matrix. -inline Vector4 transform(Matrix::Arg m, Vector4::Arg p) -{ - return Vector4( - p.x() * m(0,0) + p.y() * m(0,1) + p.z() * m(0,2) + p.w() * m(0,3), - p.x() * m(1,0) + p.y() * m(1,1) + p.z() * m(1,2) + p.w() * m(1,3), - p.x() * m(2,0) + p.y() * m(2,1) + p.z() * m(2,2) + p.w() * m(2,3), - p.x() * m(3,0) + p.y() * m(3,1) + p.z() * m(3,2) + p.w() * m(3,3)); -} - -inline Matrix mul(Matrix::Arg a, Matrix::Arg b) -{ - // @@ Is this the right order? mul(a, b) = b * a - Matrix m = a; - m.apply(b); - return m; -} + // 3x3 matrix. + class NVMATH_CLASS Matrix3 + { + public: + Matrix3(); + explicit Matrix3(float f); + explicit Matrix3(identity_t); + Matrix3(const Matrix3 & m); + Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2); + + float data(uint idx) const; + float & data(uint idx); + float get(uint row, uint col) const; + float operator()(uint row, uint col) const; + float & operator()(uint row, uint col); + + Vector3 row(uint i) const; + Vector3 column(uint i) const; + + void operator*=(float s); + void operator/=(float s); + void operator+=(const Matrix3 & m); + void operator-=(const Matrix3 & m); + + void scale(float s); + void scale(Vector3::Arg s); + float determinant() const; + + private: + float m_data[9]; + }; + + // Solve equation system using LU decomposition and back-substitution. + extern bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x); + + // Solve equation system using Cramer's inverse. + extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x); + + + // 4x4 matrix. + class NVMATH_CLASS Matrix + { + public: + typedef Matrix const & Arg; + + Matrix(); + explicit Matrix(float f); + explicit Matrix(identity_t); + Matrix(const Matrix3 & m); + Matrix(const Matrix & m); + Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3); + //explicit Matrix(const float m[]); // m is assumed to contain 16 elements + + float data(uint idx) const; + float & data(uint idx); + float get(uint row, uint col) const; + float operator()(uint row, uint col) const; + float & operator()(uint row, uint col); + const float * ptr() const; + + Vector4 row(uint i) const; + Vector4 column(uint i) const; + + void zero(); + void identity(); + + void scale(float s); + void scale(Vector3::Arg s); + void translate(Vector3::Arg t); + void rotate(float theta, float v0, float v1, float v2); + float determinant() const; + + void operator+=(const Matrix & m); + void operator-=(const Matrix & m); + + void apply(Matrix::Arg m); + + private: + float m_data[16]; + }; + + // Solve equation system using LU decomposition and back-substitution. + extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x); + + // Solve equation system using Cramer's inverse. + extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x); + + // Compute inverse using LU decomposition. + extern Matrix inverseLU(const Matrix & m); + + // Compute inverse using Gaussian elimination and partial pivoting. + extern Matrix inverse(const Matrix & m); + extern Matrix3 inverse(const Matrix3 & m); } // nv namespace - - - -#if 0 - /** @name Special matrices. */ - //@{ - /** Generate a translation matrix. */ - void TranslationMatrix(const Vec3 & v) { - data[0] = 1; data[1] = 0; data[2] = 0; data[3] = 0; - data[4] = 0; data[5] = 1; data[6] = 0; data[7] = 0; - data[8] = 0; data[9] = 0; data[10] = 1; data[11] = 0; - data[12] = v.x; data[13] = v.y; data[14] = v.z; data[15] = 1; - } - - /** Rotate theta degrees around v. */ - void RotationMatrix( scalar theta, scalar v0, scalar v1, scalar v2 ) { - scalar cost = cos(theta); - scalar sint = sin(theta); - - if( 1 == v0 && 0 == v1 && 0 == v2 ) { - data[0] = 1.0f; data[1] = 0.0f; data[2] = 0.0f; data[3] = 0.0f; - data[4] = 0.0f; data[5] = cost; data[6] = -sint;data[7] = 0.0f; - data[8] = 0.0f; data[9] = sint; data[10] = cost;data[11] = 0.0f; - data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; - } - else if( 0 == v0 && 1 == v1 && 0 == v2 ) { - data[0] = cost; data[1] = 0.0f; data[2] = sint; data[3] = 0.0f; - data[4] = 0.0f; data[5] = 1.0f; data[6] = 0.0f; data[7] = 0.0f; - data[8] = -sint;data[9] = 0.0f;data[10] = cost; data[11] = 0.0f; - data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; - } - else if( 0 == v0 && 0 == v1 && 1 == v2 ) { - data[0] = cost; data[1] = -sint;data[2] = 0.0f; data[3] = 0.0f; - data[4] = sint; data[5] = cost; data[6] = 0.0f; data[7] = 0.0f; - data[8] = 0.0f; data[9] = 0.0f; data[10] = 1.0f;data[11] = 0.0f; - data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; - } - else { - //we need scale a,b,c to unit length. - scalar a2, b2, c2; - a2 = v0 * v0; - b2 = v1 * v1; - c2 = v2 * v2; - - scalar iscale = 1.0f / sqrtf(a2 + b2 + c2); - v0 *= iscale; - v1 *= iscale; - v2 *= iscale; - - scalar abm, acm, bcm; - scalar mcos, asin, bsin, csin; - mcos = 1.0f - cost; - abm = v0 * v1 * mcos; - acm = v0 * v2 * mcos; - bcm = v1 * v2 * mcos; - asin = v0 * sint; - bsin = v1 * sint; - csin = v2 * sint; - data[0] = a2 * mcos + cost; - data[1] = abm - csin; - data[2] = acm + bsin; - data[3] = abm + csin; - data[4] = 0.0f; - data[5] = b2 * mcos + cost; - data[6] = bcm - asin; - data[7] = acm - bsin; - data[8] = 0.0f; - data[9] = bcm + asin; - data[10] = c2 * mcos + cost; - data[11] = 0.0f; - data[12] = 0.0f; - data[13] = 0.0f; - data[14] = 0.0f; - data[15] = 1.0f; - } - } - - /* - void SkewMatrix(scalar angle, const Vec3 & v1, const Vec3 & v2) { - v1.Normalize(); - v2.Normalize(); - - Vec3 v3; - v3.Cross(v1, v2); - v3.Normalize(); - - // Get skew factor. - scalar costheta = Vec3DotProduct(v1, v2); - scalar sintheta = Real.Sqrt(1 - costheta * costheta); - scalar skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta; - - // Build orthonormal matrix. - v1 = FXVector3.Cross(v3, v2); - v1.Normalize(); - - Matrix R = Matrix::Identity; - R[0, 0] = v3.X; // Not sure this is in the correct order... - R[1, 0] = v3.Y; - R[2, 0] = v3.Z; - R[0, 1] = v1.X; - R[1, 1] = v1.Y; - R[2, 1] = v1.Z; - R[0, 2] = v2.X; - R[1, 2] = v2.Y; - R[2, 2] = v2.Z; - - // Build skew matrix. - Matrix S = Matrix::Identity; - S[2, 1] = -skew; - - // Return skew transform. - return R * S * R.Transpose; // Not sure this is in the correct order... - } - */ - - /** - * Generate rotation matrix for the euler angles. This is the same as computing - * 3 rotation matrices and multiplying them together in our custom order. - * - * @todo Have to recompute this code for our new convention. - **/ - void RotationMatrix( scalar yaw, scalar pitch, scalar roll ) { - scalar sy = sin(yaw+ToRadian(90)); - scalar cy = cos(yaw+ToRadian(90)); - scalar sp = sin(pitch-ToRadian(90)); - scalar cp = cos(pitch-ToRadian(90)); - scalar sr = sin(roll); - scalar cr = cos(roll); - - data[0] = cr*cy + sr*sp*sy; - data[1] = cp*sy; - data[2] = -sr*cy + cr*sp*sy; - data[3] = 0; - - data[4] = -cr*sy + sr*sp*cy; - data[5] = cp*cy; - data[6] = sr*sy + cr*sp*cy; - data[7] = 0; - - data[8] = sr*cp; - data[9] = -sp; - data[10] = cr*cp; - data[11] = 0; - - data[12] = 0; - data[13] = 0; - data[14] = 0; - data[15] = 1; - } - - /** Create a frustum matrix with the far plane at the infinity. */ - void Frustum( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar ) { - scalar one_deltax, one_deltay, one_deltaz, doubleznear; - - doubleznear = 2.0f * zNear; - one_deltax = 1.0f / (xmax - xmin); - one_deltay = 1.0f / (ymax - ymin); - one_deltaz = 1.0f / (zFar - zNear); - - data[0] = (scalar)(doubleznear * one_deltax); - data[1] = 0.0f; - data[2] = 0.0f; - data[3] = 0.0f; - data[4] = 0.0f; - data[5] = (scalar)(doubleznear * one_deltay); - data[6] = 0.f; - data[7] = 0.f; - data[8] = (scalar)((xmax + xmin) * one_deltax); - data[9] = (scalar)((ymax + ymin) * one_deltay); - data[10] = (scalar)(-(zFar + zNear) * one_deltaz); - data[11] = -1.f; - data[12] = 0.f; - data[13] = 0.f; - data[14] = (scalar)(-(zFar * doubleznear) * one_deltaz); - data[15] = 0.f; - } - - /** Create a frustum matrix with the far plane at the infinity. */ - void FrustumInf( scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear ) { - scalar one_deltax, one_deltay, doubleznear, nudge; - - doubleznear = 2.0f * zNear; - one_deltax = 1.0f / (xmax - xmin); - one_deltay = 1.0f / (ymax - ymin); - nudge = 1.0; // 0.999; - - data[0] = doubleznear * one_deltax; - data[1] = 0.0f; - data[2] = 0.0f; - data[3] = 0.0f; - - data[4] = 0.0f; - data[5] = doubleznear * one_deltay; - data[6] = 0.f; - data[7] = 0.f; - - data[8] = (xmax + xmin) * one_deltax; - data[9] = (ymax + ymin) * one_deltay; - data[10] = -1.0f * nudge; - data[11] = -1.0f; - - data[12] = 0.f; - data[13] = 0.f; - data[14] = -doubleznear * nudge; - data[15] = 0.f; - } - - /** Create an inverse frustum matrix with the far plane at the infinity. */ - void FrustumInfInv( scalar left, scalar right, scalar bottom, scalar top, scalar zNear ) { - // this matrix is wrong (not tested scalarly) I think it should be transposed. - data[0] = (right - left) / (2 * zNear); - data[1] = 0; - data[2] = 0; - data[3] = (right + left) / (2 * zNear); - data[4] = 0; - data[5] = (top - bottom) / (2 * zNear); - data[6] = 0; - data[7] = (top + bottom) / (2 * zNear); - data[8] = 0; - data[9] = 0; - data[10] = 0; - data[11] = -1; - data[12] = 0; - data[13] = 0; - data[14] = -1 / (2 * zNear); - data[15] = 1 / (2 * zNear); - } - - /** Create an homogeneous projection matrix. */ - void Perspective( scalar fov, scalar aspect, scalar zNear, scalar zFar ) { - scalar xmin, xmax, ymin, ymax; - - xmax = zNear * tan( fov/2 ); - xmin = -xmax; - - ymax = xmax / aspect; - ymin = -ymax; - - Frustum(xmin, xmax, ymin, ymax, zNear, zFar); - } - - /** Create a projection matrix with the far plane at the infinity. */ - void PerspectiveInf( scalar fov, scalar aspect, scalar zNear ) { - scalar x = zNear * tan( fov/2 ); - scalar y = x / aspect; - FrustumInf( -x, x, -y, y, zNear ); - } - - /** Create an inverse projection matrix with far plane at the infinity. */ - void PerspectiveInfInv( scalar fov, scalar aspect, scalar zNear ) { - scalar x = zNear * tan( fov/2 ); - scalar y = x / aspect; - FrustumInfInv( -x, x, -y, y, zNear ); - } - - /** Build bone matrix from quatertion and offset. */ - void BoneMatrix(const Quat & q, const Vec3 & offset) { - scalar x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz; - - // calculate coefficients - x2 = q.x + q.x; - y2 = q.y + q.y; - z2 = q.z + q.z; - - xx = q.x * x2; xy = q.x * y2; xz = q.x * z2; - yy = q.y * y2; yz = q.y * z2; zz = q.z * z2; - wx = q.w * x2; wy = q.w * y2; wz = q.w * z2; - - data[0] = 1.0f - (yy + zz); - data[1] = xy - wz; - data[2] = xz + wy; - data[3] = 0.0f; - - data[4] = xy + wz; - data[5] = 1.0f - (xx + zz); - data[6] = yz - wx; - data[7] = 0.0f; - - data[8] = xz - wy; - data[9] = yz + wx; - data[10] = 1.0f - (xx + yy); - data[11] = 0.0f; - - data[12] = offset.x; - data[13] = offset.y; - data[14] = offset.z; - data[15] = 1.0f; - } - - //@} - - - /** @name Transformations: */ - //@{ - - /** Apply a general scale. */ - void Scale( scalar x, scalar y, scalar z ) { - data[0] *= x; data[4] *= y; data[8] *= z; - data[1] *= x; data[5] *= y; data[9] *= z; - data[2] *= x; data[6] *= y; data[10] *= z; - data[3] *= x; data[7] *= y; data[11] *= z; - } - - /** Apply a rotation of theta degrees around the axis v*/ - void Rotate( scalar theta, const Vec3 & v ) { - Matrix b; - b.RotationMatrix( theta, v[0], v[1], v[2] ); - Multiply4x3( b ); - } - - /** Apply a rotation of theta degrees around the axis v*/ - void Rotate( scalar theta, scalar v0, scalar v1, scalar v2 ) { - Matrix b; - b.RotationMatrix( theta, v0, v1, v2 ); - Multiply4x3( b ); - } - - /** - * Translate the matrix by t. This is the same as multiplying by a - * translation matrix with the given offset. - * this = T * this - */ - void Translate( const Vec3 &t ) { - data[12] = data[0] * t.x + data[4] * t.y + data[8] * t.z + data[12]; - data[13] = data[1] * t.x + data[5] * t.y + data[9] * t.z + data[13]; - data[14] = data[2] * t.x + data[6] * t.y + data[10] * t.z + data[14]; - data[15] = data[3] * t.x + data[7] * t.y + data[11] * t.z + data[15]; - } - - /** - * Translate the matrix by x, y, z. This is the same as multiplying by a - * translation matrix with the given offsets. - */ - void Translate( scalar x, scalar y, scalar z ) { - data[12] = data[0] * x + data[4] * y + data[8] * z + data[12]; - data[13] = data[1] * x + data[5] * y + data[9] * z + data[13]; - data[14] = data[2] * x + data[6] * y + data[10] * z + data[14]; - data[15] = data[3] * x + data[7] * y + data[11] * z + data[15]; - } - - /** Compute the transposed matrix. */ - void Transpose() { - piSwap(data[1], data[4]); - piSwap(data[2], data[8]); - piSwap(data[6], data[9]); - piSwap(data[3], data[12]); - piSwap(data[7], data[13]); - piSwap(data[11], data[14]); - } - - /** Compute the inverse of a rigid-body/isometry/orthonormal matrix. */ - void IsometryInverse() { - // transposed 3x3 upper left matrix - piSwap(data[1], data[4]); - piSwap(data[2], data[8]); - piSwap(data[6], data[9]); - - // translate by the negative offsets - Vec3 v(-data[12], -data[13], -data[14]); - data[12] = data[13] = data[14] = 0; - Translate(v); - } - - /** Compute the inverse of the affine portion of this matrix. */ - void AffineInverse() { - data[12] = data[13] = data[14] = 0; - Transpose(); - } - //@} - - /** @name Matrix operations: */ - //@{ - - /** Return the determinant of this matrix. */ - scalar Determinant() const { - return data[0] * data[5] * data[10] * data[15] + - data[1] * data[6] * data[11] * data[12] + - data[2] * data[7] * data[ 8] * data[13] + - data[3] * data[4] * data[ 9] * data[14] - - data[3] * data[6] * data[ 9] * data[12] - - data[2] * data[5] * data[ 8] * data[15] - - data[1] * data[4] * data[11] * data[14] - - data[0] * data[7] * data[10] * data[12]; - } - - - /** Standard matrix product: this *= B. */ - void Multiply4x4( const Matrix & restrict B ) { - Multiply4x4(*this, B); - } - - /** Standard matrix product: this = A * B. this != B*/ - void Multiply4x4( const Matrix & A, const Matrix & restrict B ) { - piDebugCheck(this != &B); - - for(int i = 0; i < 4; i++) { - const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); - GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); - GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); - GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); - GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); - } - - /* Unrolled but does not allow this == A - data[0] = A.data[0] * B.data[0] + A.data[4] * B.data[1] + A.data[8] * B.data[2] + A.data[12] * B.data[3]; - data[1] = A.data[1] * B.data[0] + A.data[5] * B.data[1] + A.data[9] * B.data[2] + A.data[13] * B.data[3]; - data[2] = A.data[2] * B.data[0] + A.data[6] * B.data[1] + A.data[10] * B.data[2] + A.data[14] * B.data[3]; - data[3] = A.data[3] * B.data[0] + A.data[7] * B.data[1] + A.data[11] * B.data[2] + A.data[15] * B.data[3]; - data[4] = A.data[0] * B.data[4] + A.data[4] * B.data[5] + A.data[8] * B.data[6] + A.data[12] * B.data[7]; - data[5] = A.data[1] * B.data[4] + A.data[5] * B.data[5] + A.data[9] * B.data[6] + A.data[13] * B.data[7]; - data[6] = A.data[2] * B.data[4] + A.data[6] * B.data[5] + A.data[10] * B.data[6] + A.data[14] * B.data[7]; - data[7] = A.data[3] * B.data[4] + A.data[7] * B.data[5] + A.data[11] * B.data[6] + A.data[15] * B.data[7]; - data[8] = A.data[0] * B.data[8] + A.data[4] * B.data[9] + A.data[8] * B.data[10] + A.data[12] * B.data[11]; - data[9] = A.data[1] * B.data[8] + A.data[5] * B.data[9] + A.data[9] * B.data[10] + A.data[13] * B.data[11]; - data[10]= A.data[2] * B.data[8] + A.data[6] * B.data[9] + A.data[10] * B.data[10] + A.data[14] * B.data[11]; - data[11]= A.data[3] * B.data[8] + A.data[7] * B.data[9] + A.data[11] * B.data[10] + A.data[15] * B.data[11]; - data[12]= A.data[0] * B.data[12] + A.data[4] * B.data[13] + A.data[8] * B.data[14] + A.data[12] * B.data[15]; - data[13]= A.data[1] * B.data[12] + A.data[5] * B.data[13] + A.data[9] * B.data[14] + A.data[13] * B.data[15]; - data[14]= A.data[2] * B.data[12] + A.data[6] * B.data[13] + A.data[10] * B.data[14] + A.data[14] * B.data[15]; - data[15]= A.data[3] * B.data[12] + A.data[7] * B.data[13] + A.data[11] * B.data[14] + A.data[15] * B.data[15]; - */ - } - - /** Standard matrix product: this *= B. */ - void Multiply4x3( const Matrix & restrict B ) { - Multiply4x3(*this, B); - } - - /** Standard product of matrices, where the last row is [0 0 0 1]. */ - void Multiply4x3( const Matrix & A, const Matrix & restrict B ) { - piDebugCheck(this != &B); - - for(int i = 0; i < 3; i++) { - const scalar ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); - GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); - GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); - GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); - GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); - } - data[3] = 0.0f; data[7] = 0.0f; data[11] = 0.0f; data[15] = 1.0f; - - /* Unrolled but does not allow this == A - data[0] = a.data[0] * b.data[0] + a.data[4] * b.data[1] + a.data[8] * b.data[2] + a.data[12] * b.data[3]; - data[1] = a.data[1] * b.data[0] + a.data[5] * b.data[1] + a.data[9] * b.data[2] + a.data[13] * b.data[3]; - data[2] = a.data[2] * b.data[0] + a.data[6] * b.data[1] + a.data[10] * b.data[2] + a.data[14] * b.data[3]; - data[3] = 0.0f; - data[4] = a.data[0] * b.data[4] + a.data[4] * b.data[5] + a.data[8] * b.data[6] + a.data[12] * b.data[7]; - data[5] = a.data[1] * b.data[4] + a.data[5] * b.data[5] + a.data[9] * b.data[6] + a.data[13] * b.data[7]; - data[6] = a.data[2] * b.data[4] + a.data[6] * b.data[5] + a.data[10] * b.data[6] + a.data[14] * b.data[7]; - data[7] = 0.0f; - data[8] = a.data[0] * b.data[8] + a.data[4] * b.data[9] + a.data[8] * b.data[10] + a.data[12] * b.data[11]; - data[9] = a.data[1] * b.data[8] + a.data[5] * b.data[9] + a.data[9] * b.data[10] + a.data[13] * b.data[11]; - data[10]= a.data[2] * b.data[8] + a.data[6] * b.data[9] + a.data[10] * b.data[10] + a.data[14] * b.data[11]; - data[11]= 0.0f; - data[12]= a.data[0] * b.data[12] + a.data[4] * b.data[13] + a.data[8] * b.data[14] + a.data[12] * b.data[15]; - data[13]= a.data[1] * b.data[12] + a.data[5] * b.data[13] + a.data[9] * b.data[14] + a.data[13] * b.data[15]; - data[14]= a.data[2] * b.data[12] + a.data[6] * b.data[13] + a.data[10] * b.data[14] + a.data[14] * b.data[15]; - data[15]= 1.0f; - */ - } - //@} - - - /** @name Vector operations: */ - //@{ - - /** Transform 3d vector (w=0). */ - void TransformVec3(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10]; - } - /** Transform 3d vector by the transpose (w=0). */ - void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2]; - dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6]; - dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10]; - } - - /** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */ - void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; - } - - /** Transform a point, normalize it, and return w. */ - scalar TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - scalar w; - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; - w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]); - *dest *= w; - return w; - } - - /** Transform a point and return w. */ - scalar TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const { - piDebugCheck(&orig != dest); - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; - return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; - } - - /** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */ - void TransformVec4(const Vec3 & orig, Vec4 * dest) const { - dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; - dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; - dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; - dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; - } - //@} - - /** @name Matrix analysis. */ - //@{ - - /** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */ - void GetEulerAnglesZYZ(scalar * s, scalar * t, scalar * r) const { - if( GetElem(2,2) < 1.0f ) { - if( GetElem(2,2) > -1.0f ) { - // cs*ct*cr-ss*sr -ss*ct*cr-cs*sr st*cr - // cs*ct*sr+ss*cr -ss*ct*sr+cs*cr st*sr - // -cs*st ss*st ct - *s = atan2(GetElem(1,2), -GetElem(0,2)); - *t = acos(GetElem(2,2)); - *r = atan2(GetElem(2,1), GetElem(2,0)); - } - else { - // -c(s-r) s(s-r) 0 - // s(s-r) c(s-r) 0 - // 0 0 -1 - *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r - *t = PI; - *r = 0; - } - } - else { - // c(s+r) -s(s+r) 0 - // s(s+r) c(s+r) 0 - // 0 0 1 - *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r - *t = 0; - *r = 0; - } - } - - //@} - - MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m ); - - /** Print to debug output. */ - void Print() const { - piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] ); - piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] ); - piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] ); - piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] ); - } - - -public: - - scalar data[16]; - -}; -#endif - - - - #endif // NV_MATH_MATRIX_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.cpp @@ -0,0 +1,441 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Matrix.inl" +#include "Vector.inl" + +#include "nvcore/Array.inl" + +#include + +#if !NV_CC_MSVC && !NV_OS_ORBIS +#include +#endif + +using namespace nv; + + +// Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise +// permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above; +// indx[1..n] is an output vector that records the row permutation effected by the partial +// pivoting; d is output as -1 depending on whether the number of row interchanges was even +// or odd, respectively. This routine is used in combination with lubksb to solve linear equations +// or invert a matrix. +static bool ludcmp(float **a, int n, int *indx, float *d) +{ + const float TINY = 1.0e-20f; + + float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row. + + *d = 1.0; // No row interchanges yet. + for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information. + + float big = 0.0; + for (int j = 0; j < n; j++) { + big = max(big, fabsf(a[i][j])); + } + if (big == 0) { + return false; // Singular matrix + } + + // No nonzero largest element. + vv[i] = 1.0f / big; // Save the scaling. + } + + for (int j = 0; j < n; j++) { // This is the loop over columns of Crout's method. + for (int i = 0; i < j; i++) { // This is equation (2.3.12) except for i = j. + float sum = a[i][j]; + for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j]; + a[i][j] = sum; + } + + int imax = -1; + float big = 0.0; // Initialize for the search for largest pivot element. + for (int i = j; i < n; i++) { // This is i = j of equation (2.3.12) and i = j+ 1 : : : N + float sum = a[i][j]; // of equation (2.3.13). + for (int k = 0; k < j; k++) { + sum -= a[i][k]*a[k][j]; + } + a[i][j]=sum; + + float dum = vv[i]*fabs(sum); + if (dum >= big) { + // Is the figure of merit for the pivot better than the best so far? + big = dum; + imax = i; + } + } + nvDebugCheck(imax != -1); + + if (j != imax) { // Do we need to interchange rows? + for (int k = 0; k < n; k++) { // Yes, do so... + swap(a[imax][k], a[j][k]); + } + *d = -(*d); // ...and change the parity of d. + vv[imax]=vv[j]; // Also interchange the scale factor. + } + + indx[j]=imax; + if (a[j][j] == 0.0) a[j][j] = TINY; + + // If the pivot element is zero the matrix is singular (at least to the precision of the + // algorithm). For some applications on singular matrices, it is desirable to substitute + // TINY for zero. + if (j != n-1) { // Now, finally, divide by the pivot element. + float dum = 1.0f / a[j][j]; + for (int i = j+1; i < n; i++) a[i][j] *= dum; + } + } // Go back for the next column in the reduction. + + return true; +} + + +// Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix +// A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input +// as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector +// B, and returns with the solution vector X. a, n, and indx are not modified by this routine +// and can be left in place for successive calls with different right-hand sides b. This routine takes +// into account the possibility that b will begin with many zero elements, so it is efficient for use +// in matrix inversion. +static void lubksb(float **a, int n, int *indx, float b[]) +{ + int ii = 0; + for (int i=0; i=0; i--) { // Now we do the backsubstitution, equation (2.3.7). + float sum = b[i]; + for (int j = i+1; j < n; j++) { + sum -= a[i][j]*b[j]; + } + b[i] = sum/a[i][i]; // Store a component of the solution vector X. + } // All done! +} + + +bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x) +{ + nvDebugCheck(x != NULL); + + float m[4][4]; + float *a[4] = {m[0], m[1], m[2], m[3]}; + int idx[4]; + float d; + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + a[x][y] = A(x, y); + } + } + + // Create LU decomposition. + if (!ludcmp(a, 4, idx, &d)) { + // Singular matrix. + return false; + } + + // Init solution. + *x = b; + + // Do back substitution. + lubksb(a, 4, idx, x->component); + + return true; +} + +// @@ Not tested. +Matrix nv::inverseLU(const Matrix & A) +{ + Vector4 Ai[4]; + + solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]); + solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]); + solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]); + solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]); + + return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]); +} + + + +bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x) +{ + nvDebugCheck(x != NULL); + + float m[3][3]; + float *a[3] = {m[0], m[1], m[2]}; + int idx[3]; + float d; + + for (int y = 0; y < 3; y++) { + for (int x = 0; x < 3; x++) { + a[x][y] = A(x, y); + } + } + + // Create LU decomposition. + if (!ludcmp(a, 3, idx, &d)) { + // Singular matrix. + return false; + } + + // Init solution. + *x = b; + + // Do back substitution. + lubksb(a, 3, idx, x->component); + + return true; +} + + +bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x) +{ + nvDebugCheck(x != NULL); + + *x = transform(inverseCramer(A), b); + + return true; // @@ Return false if determinant(A) == 0 ! +} + +bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x) +{ + nvDebugCheck(x != NULL); + + const float det = A.determinant(); + if (equal(det, 0.0f)) { // @@ Use input epsilon. + return false; + } + + Matrix3 Ai = inverseCramer(A); + + *x = transform(Ai, b); + + return true; +} + + + +// Inverse using gaussian elimination. From Jon's code. +Matrix nv::inverse(const Matrix & m) { + + Matrix A = m; + Matrix B(identity); + + int i, j, k; + float max, t, det, pivot; + + det = 1.0; + for (i=0; i<4; i++) { /* eliminate in column i, below diag */ + max = -1.; + for (k=i; k<4; k++) /* find pivot for column i */ + if (fabs(A(k, i)) > max) { + max = fabs(A(k, i)); + j = k; + } + if (max<=0.) return B; /* if no nonzero pivot, PUNT */ + if (j!=i) { /* swap rows i and j */ + for (k=i; k<4; k++) + swap(A(i, k), A(j, k)); + for (k=0; k<4; k++) + swap(B(i, k), B(j, k)); + det = -det; + } + pivot = A(i, i); + det *= pivot; + for (k=i+1; k<4; k++) /* only do elems to right of pivot */ + A(i, k) /= pivot; + for (k=0; k<4; k++) + B(i, k) /= pivot; + /* we know that A(i, i) will be set to 1, so don't bother to do it */ + + for (j=i+1; j<4; j++) { /* eliminate in rows below i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ + A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ + for (k=0; k<4; k++) + B(j, k) -= B(i, k)*t; + } + } + + /*---------- backward elimination ----------*/ + + for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */ + for (j=0; j max) { + max = fabs(A(k, i)); + j = k; + } + if (max<=0.) return B; /* if no nonzero pivot, PUNT */ + if (j!=i) { /* swap rows i and j */ + for (k=i; k<3; k++) + swap(A(i, k), A(j, k)); + for (k=0; k<3; k++) + swap(B(i, k), B(j, k)); + det = -det; + } + pivot = A(i, i); + det *= pivot; + for (k=i+1; k<3; k++) /* only do elems to right of pivot */ + A(i, k) /= pivot; + for (k=0; k<3; k++) + B(i, k) /= pivot; + /* we know that A(i, i) will be set to 1, so don't bother to do it */ + + for (j=i+1; j<3; j++) { /* eliminate in rows below i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=i+1; k<3; k++) /* subtract scaled row i from row j */ + A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ + for (k=0; k<3; k++) + B(j, k) -= B(i, k)*t; + } + } + + /*---------- backward elimination ----------*/ + + for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */ + for (j=0; j. +// +// Returns determinant of A, and B=inverse(A) +// If matrix A is singular, returns 0 and leaves trash in B. +// +#define SWAP(a, b, t) {t = a; a = b; b = t;} +double invert(Mat4& B, const Mat4& m) +{ + Mat4 A = m; + int i, j, k; + double max, t, det, pivot; + + /*---------- forward elimination ----------*/ + + for (i=0; i<4; i++) /* put identity matrix in B */ + for (j=0; j<4; j++) + B(i, j) = (double)(i==j); + + det = 1.0; + for (i=0; i<4; i++) { /* eliminate in column i, below diag */ + max = -1.; + for (k=i; k<4; k++) /* find pivot for column i */ + if (fabs(A(k, i)) > max) { + max = fabs(A(k, i)); + j = k; + } + if (max<=0.) return 0.; /* if no nonzero pivot, PUNT */ + if (j!=i) { /* swap rows i and j */ + for (k=i; k<4; k++) + SWAP(A(i, k), A(j, k), t); + for (k=0; k<4; k++) + SWAP(B(i, k), B(j, k), t); + det = -det; + } + pivot = A(i, i); + det *= pivot; + for (k=i+1; k<4; k++) /* only do elems to right of pivot */ + A(i, k) /= pivot; + for (k=0; k<4; k++) + B(i, k) /= pivot; + /* we know that A(i, i) will be set to 1, so don't bother to do it */ + + for (j=i+1; j<4; j++) { /* eliminate in rows below i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ + A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ + for (k=0; k<4; k++) + B(j, k) -= B(i, k)*t; + } + } + + /*---------- backward elimination ----------*/ + + for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */ + for (j=0; jx = orig.x * data[0] + orig.y * data[4] + orig.z * data[8]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10]; +} +/** Transform 3d vector by the transpose (w=0). */ +void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2]; + dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6]; + dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10]; +} + +/** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */ +void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; +} + +/** Transform a point, normalize it, and return w. */ +float TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + float w; + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; + w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]); + *dest *= w; + return w; +} + +/** Transform a point and return w. */ +float TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const { + piDebugCheck(&orig != dest); + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; + return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; +} + +/** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */ +void TransformVec4(const Vec3 & orig, Vec4 * dest) const { + dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; + dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; + dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; + dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; +} +//@} + +/** @name Matrix analysis. */ +//@{ + +/** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */ +void GetEulerAnglesZYZ(float * s, float * t, float * r) const { + if( GetElem(2,2) < 1.0f ) { + if( GetElem(2,2) > -1.0f ) { + // cs*ct*cr-ss*sr -ss*ct*cr-cs*sr st*cr + // cs*ct*sr+ss*cr -ss*ct*sr+cs*cr st*sr + // -cs*st ss*st ct + *s = atan2(GetElem(1,2), -GetElem(0,2)); + *t = acos(GetElem(2,2)); + *r = atan2(GetElem(2,1), GetElem(2,0)); + } + else { + // -c(s-r) s(s-r) 0 + // s(s-r) c(s-r) 0 + // 0 0 -1 + *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r + *t = PI; + *r = 0; + } + } + else { + // c(s+r) -s(s+r) 0 + // s(s+r) c(s+r) 0 + // 0 0 1 + *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r + *t = 0; + *r = 0; + } +} + +//@} + +MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m ); + +/** Print to debug output. */ +void Print() const { + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] ); + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] ); + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] ); + piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] ); +} + + +public: + + float data[16]; + +}; +#endif + + +#endif // NV_MATH_MATRIX_INL Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Montecarlo.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Montecarlo.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Montecarlo.h @@ -1,84 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_MATH_MONTECARLO_H -#define NV_MATH_MONTECARLO_H - -#include -#include - -namespace nv -{ - -/// A random sample distribution. -class SampleDistribution -{ -public: - - // Sampling method. - enum Method { - Method_Random, - Method_Stratified, - Method_NRook - }; - - // Distribution functions. - enum Distribution { - Distribution_Uniform, - Distribution_Cosine - }; - - /// Constructor. - SampleDistribution(int num) - { - m_sampleArray.resize(num); - } - - void redistribute(Method method=Method_NRook, Distribution dist=Distribution_Cosine); - - /// Get parametric coordinates of the sample. - Vector2 sample(int i) { return m_sampleArray[i].uv; } - - /// Get sample direction. - Vector3 sampleDir(int i) { return m_sampleArray[i].dir; } - - /// Get number of samples. - uint sampleCount() const { return m_sampleArray.count(); } - -private: - - void redistributeRandom(const Distribution dist); - void redistributeStratified(const Distribution dist); - void multiStageNRooks(const int size, int* cells); - void redistributeNRook(const Distribution dist); - - - /// A sample of the random distribution. - struct Sample - { - /// Set sample given the 3d coordinates. - void setDir(float x, float y, float z) { - dir.set(x, y, z); - uv.set(acosf(z), atan2f(y, x)); - } - - /// Set sample given the 2d parametric coordinates. - void setUV(float u, float v) { - uv.set(u, v); - dir.set(sinf(u) * cosf(v), sinf(u) * sinf(v), cosf(u)); - } - - Vector2 uv; - Vector3 dir; - }; - - /// Random seed. - MTRand m_rand; - - /// Samples. - Array m_sampleArray; - -}; - -} // nv namespace - -#endif // NV_MATH_MONTECARLO_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Montecarlo.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Montecarlo.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Montecarlo.cpp @@ -1,156 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include - -using namespace nv; - - -void SampleDistribution::redistribute(Method method/*=Method_NRook*/, Distribution dist/*=Distribution_Cosine*/) -{ - switch(method) - { - case Method_Random: - redistributeRandom(dist); - break; - case Method_Stratified: - redistributeStratified(dist); - break; - case Method_NRook: - redistributeNRook(dist); - break; - }; -} - -void SampleDistribution::redistributeRandom(const Distribution dist) -{ - const uint sampleCount = m_sampleArray.count(); - - // This is the worst method possible! - for(uint i = 0; i < sampleCount; i++) - { - float x = m_rand.getFloat(); - float y = m_rand.getFloat(); - - // Map uniform distribution in the square to the (hemi)sphere. - if( dist == Distribution_Uniform ) { - m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y); - } - else { - nvDebugCheck(dist == Distribution_Cosine); - m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y); - } - } -} - - -void SampleDistribution::redistributeStratified(const Distribution dist) -{ - const uint sampleCount = m_sampleArray.count(); - const uint sqrtSampleCount = uint(sqrtf(float(sampleCount))); - - nvDebugCheck(sqrtSampleCount*sqrtSampleCount == sampleCount); // Must use exact powers! - - // Create a uniform distribution of points on the hemisphere with low variance. - for(uint v = 0, i = 0; v < sqrtSampleCount; v++) { - for(uint u = 0; u < sqrtSampleCount; u++, i++) { - float x = (u + m_rand.getFloat()) / float(sqrtSampleCount); - float y = (v + m_rand.getFloat()) / float(sqrtSampleCount); - - // Map uniform distribution in the square to the (hemi)sphere. - if( dist == Distribution_Uniform ) { - m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y); - } - else { - nvDebugCheck(dist == Distribution_Cosine); - m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y); - } - } - } -} - - -/** Multi-Stage N-rooks Sampling Method. - * See: http://www.acm.org/jgt/papers/WangSung9/9 - */ -void SampleDistribution::multiStageNRooks(const int size, int* cells) -{ - if (size == 1) { - return; - } - - int size1 = size >> 1; - int size2 = size >> 1; - - if (size & 1) { - if (m_rand.getFloat() > 0.5) { - size1++; - } - else { - size2++; - } - } - - int* upper_cells = new int[size1]; - int* lower_cells = new int[size2]; - - int i, j; - for(i = 0, j = 0; i < size - 1; i += 2, j++) { - if (m_rand.get() & 1) { - upper_cells[j] = cells[i]; - lower_cells[j] = cells[i + 1]; - } - else { - upper_cells[j] = cells[i + 1]; - lower_cells[j] = cells[i]; - } - } - - if (size1 != size2) { - if (size1 > size2) { - upper_cells[j] = cells[i]; - } - else { - lower_cells[j] = cells[i]; - } - } - - multiStageNRooks(size1, upper_cells); - memcpy(cells, upper_cells, size1 * sizeof(int)); - delete [] upper_cells; - - multiStageNRooks(size2, lower_cells); - memcpy(cells + size1, lower_cells, size2 * sizeof(int)); - delete [] lower_cells; -} - - -void SampleDistribution::redistributeNRook(const Distribution dist) -{ - const uint sampleCount = m_sampleArray.count(); - - // Generate nrook cells - int * cells = new int[sampleCount]; - for(uint32 i = 0; i < sampleCount; i++) - { - cells[i] = i; - } - multiStageNRooks(sampleCount, cells); - - for(uint i = 0; i < sampleCount; i++) - { - float x = (i + m_rand.getFloat()) / sampleCount; - float y = (cells[i] + m_rand.getFloat()) / sampleCount; - - // Map uniform distribution in the square to the (hemi)sphere. - if( dist == Distribution_Uniform ) { - m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y); - } - else { - nvDebugCheck(dist == Distribution_Cosine); - m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y); - } - } - - delete [] cells; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.h @@ -0,0 +1,79 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_MATH_PACKEDFLOAT_H +#define NV_MATH_PACKEDFLOAT_H + +#include "nvmath.h" +#include "Vector.h" + +namespace nv +{ + + union FloatRGB9E5 { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 e : 5; + uint32 zm : 9; + uint32 ym : 9; + uint32 xm : 9; + #else + uint32 xm : 9; + uint32 ym : 9; + uint32 zm : 9; + uint32 e : 5; + #endif + }; + }; + + union FloatR11G11B10 { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 ze : 5; + uint32 zm : 5; + uint32 ye : 5; + uint32 ym : 6; + uint32 xe : 5; + uint32 xm : 6; + #else + uint32 xm : 6; + uint32 xe : 5; + uint32 ym : 6; + uint32 ye : 5; + uint32 zm : 5; + uint32 ze : 5; + #endif + }; + }; + + union FloatRGBE8 { + uint32 v; + struct { + #if NV_LITTLE_ENDIAN + uint8 r, g, b, e; + #else + uint8 e: 8; + uint8 b: 8; + uint8 g: 8; + uint8 r: 8; + #endif + }; + }; + + NVMATH_API Vector3 rgb9e5_to_vector3(FloatRGB9E5 v); + NVMATH_API FloatRGB9E5 vector3_to_rgb9e5(const Vector3 & v); + + NVMATH_API float float11_to_float32(uint v); + NVMATH_API float float10_to_float32(uint v); + + NVMATH_API Vector3 r11g11b10_to_vector3(FloatR11G11B10 v); + NVMATH_API FloatR11G11B10 vector3_to_r11g11b10(const Vector3 & v); + + NVMATH_API Vector3 rgbe8_to_vector3(FloatRGBE8 v); + NVMATH_API FloatRGBE8 vector3_to_rgbe8(const Vector3 & v); + +} // nv + +#endif // NV_MATH_PACKEDFLOAT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.cpp @@ -0,0 +1,61 @@ +// This code is in the public domain -- Ignacio Castaño + +#include "PackedFloat.h" +#include "Vector.inl" +#include "ftoi.h" + +using namespace nv; + +Vector3 nv::rgb9e5_to_vector3(FloatRGB9E5 v) { +} + +FloatRGB9E5 nv::vector3_to_rgb9e5(const Vector3 & v) { +} + + +float nv::float11_to_float32(uint v) { +} + +float nv::float10_to_float32(uint v) { +} + +Vector3 nv::r11g11b10_to_vector3(FloatR11G11B10 v) { +} + +FloatR11G11B10 nv::vector3_to_r11g11b10(const Vector3 & v) { +} + +// These are based on: +// http://www.graphics.cornell.edu/~bjw/rgbe/rgbe.c +// While this may not be the best way to encode/decode RGBE8, I'm not making any changes to maintain compatibility. +FloatRGBE8 nv::vector3_to_rgbe8(const Vector3 & v) { + + float m = max3(v.x, v.y, v.z); + + FloatRGBE8 rgbe; + + if (m < 1e-32) { + rgbe.v = 0; + } + else { + int e; + float scale = frexpf(m, &e) * 256.0f / m; + rgbe.r = U8(ftoi_round(v.x * scale)); + rgbe.g = U8(ftoi_round(v.y * scale)); + rgbe.b = U8(ftoi_round(v.z * scale)); + rgbe.e = U8(e + 128); + } + + return rgbe; +} + + +Vector3 nv::rgbe8_to_vector3(FloatRGBE8 v) { + if (v.e != 0) { + float scale = ldexpf(1.0f, v.e-(int)(128+8)); // +8 to divide by 256. @@ Shouldn't we divide by 255 instead? + return scale * Vector3(float(v.r), float(v.g), float(v.b)); + } + + return Vector3(0); +} + Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.h @@ -1,77 +1,45 @@ -// This code is in the public domain -- castanyo@yahoo.es +// This code is in the public domain -- Ignacio Castaño +#pragma once #ifndef NV_MATH_PLANE_H #define NV_MATH_PLANE_H -#include -#include +#include "nvmath.h" +#include "Vector.h" + +#if NV_USE_ALTIVEC +#undef vector +#endif namespace nv { - class Matrix; + class Matrix; + + class NVMATH_CLASS Plane + { + public: + Plane(); + Plane(float x, float y, float z, float w); + Plane(const Vector4 & v); + Plane(const Vector3 & v, float d); + Plane(const Vector3 & normal, const Vector3 & point); + Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2); + + const Plane & operator=(const Plane & v); + + Vector3 vector() const; + float offset() const; + + void operator*=(float s); + + Vector4 v; + }; + + Plane transformPlane(const Matrix &, const Plane &); + Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c); - class NVMATH_CLASS Plane - { - public: - typedef Plane const & Arg; - - Plane(); - Plane(float x, float y, float z, float w); - Plane(Vector4::Arg v); - Plane(Vector3::Arg v, float d); - Plane(Vector3::Arg normal, Vector3::Arg point); - - const Plane & operator=(Plane::Arg v); - - Vector3 vector() const; - scalar offset() const; - - const Vector4 & asVector() const; - Vector4 & asVector(); - - void operator*=(scalar s); - - private: - Vector4 p; - }; - - inline Plane::Plane() {} - inline Plane::Plane(float x, float y, float z, float w) : p(x, y, z, w) {} - inline Plane::Plane(Vector4::Arg v) : p(v) {} - inline Plane::Plane(Vector3::Arg v, float d) : p(v, d) {} - inline Plane::Plane(Vector3::Arg normal, Vector3::Arg point) : p(normal, dot(normal, point)) {} - - inline const Plane & Plane::operator=(Plane::Arg v) { p = v.p; return *this; } - - inline Vector3 Plane::vector() const { return p.xyz(); } - inline scalar Plane::offset() const { return p.w(); } - - inline const Vector4 & Plane::asVector() const { return p; } - inline Vector4 & Plane::asVector() { return p; } - - // Normalize plane. - inline Plane normalize(Plane::Arg plane, float epsilon = NV_EPSILON) - { - const float len = length(plane.vector()); - nvDebugCheck(!isZero(len, epsilon)); - const float inv = 1.0f / len; - return Plane(plane.asVector() * inv); - } - - // Get the distance from the given point to this plane. - inline float distance(Plane::Arg plane, Vector3::Arg point) - { - return dot(plane.vector(), point) - plane.offset(); - } - - inline void Plane::operator*=(scalar s) - { - scale(p, s); - } - Plane transformPlane(const Matrix&, Plane::Arg); - } // nv namespace #endif // NV_MATH_PLANE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.cpp @@ -1,17 +1,27 @@ // This code is in the public domain -- castanyo@yahoo.es #include "Plane.h" -#include "Matrix.h" +#include "Plane.inl" +#include "Matrix.inl" namespace nv { - Plane transformPlane(const Matrix& m, Plane::Arg p) - { - Vector3 newVec = transformVector(m, p.vector()); - - Vector3 ptInPlane = p.offset() * p.vector(); - ptInPlane = transformPoint(m, ptInPlane); - - return Plane(newVec, ptInPlane); - } -} + Plane transformPlane(const Matrix & m, const Plane & p) + { + Vector3 newVec = transformVector(m, p.vector()); + + Vector3 ptInPlane = p.offset() * p.vector(); + ptInPlane = transformPoint(m, ptInPlane); + + return Plane(newVec, ptInPlane); + } + + Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c) + { + return dot(a.vector(), cross(b.vector(), c.vector())) * ( + a.offset() * cross(b.vector(), c.vector()) + + c.offset() * cross(a.vector(), b.vector()) + + b.offset() * cross(c.vector(), a.vector())); + } + +} // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.inl =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.inl +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.inl @@ -0,0 +1,49 @@ +// This code is in the public domain -- Ignacio Castaño + +#pragma once +#ifndef NV_MATH_PLANE_INL +#define NV_MATH_PLANE_INL + +#include "Plane.h" +#include "Vector.inl" + +namespace nv +{ + inline Plane::Plane() {} + inline Plane::Plane(float x, float y, float z, float w) : v(x, y, z, w) {} + inline Plane::Plane(const Vector4 & v) : v(v) {} + inline Plane::Plane(const Vector3 & v, float d) : v(v, d) {} + inline Plane::Plane(const Vector3 & normal, const Vector3 & point) : v(normal, -dot(normal, point)) {} + inline Plane::Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2) { + Vector3 n = cross(v1-v0, v2-v0); + float d = -dot(n, v0); + v = Vector4(n, d); + } + + inline const Plane & Plane::operator=(const Plane & p) { v = p.v; return *this; } + + inline Vector3 Plane::vector() const { return v.xyz(); } + inline float Plane::offset() const { return v.w; } + + // Normalize plane. + inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON) + { + const float len = length(plane.vector()); + const float inv = isZero(len, epsilon) ? 0 : 1.0f / len; + return Plane(plane.v * inv); + } + + // Get the signed distance from the given point to this plane. + inline float distance(const Plane & plane, const Vector3 & point) + { + return dot(plane.vector(), point) + plane.offset(); + } + + inline void Plane::operator*=(float s) + { + v *= s; + } + +} // nv namespace + +#endif // NV_MATH_PLANE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Quaternion.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Quaternion.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Quaternion.h @@ -1,128 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_MATH_QUATERNION_H -#define NV_MATH_QUATERNION_H - -#include -#include - -namespace nv -{ - - class NVMATH_CLASS Quaternion - { - public: - typedef Quaternion const & Arg; - - Quaternion(); - explicit Quaternion(zero_t); - Quaternion(float x, float y, float z, float w); - Quaternion(Vector4::Arg v); - - const Quaternion & operator=(Quaternion::Arg v); - - scalar x() const; - scalar y() const; - scalar z() const; - scalar w() const; - - const Vector4 & asVector() const; - Vector4 & asVector(); - - private: - Vector4 q; - }; - - inline Quaternion::Quaternion() {} - inline Quaternion::Quaternion(zero_t) : q(zero) {} - inline Quaternion::Quaternion(float x, float y, float z, float w) : q(x, y, z, w) {} - inline Quaternion::Quaternion(Vector4::Arg v) : q(v) {} - - inline const Quaternion & Quaternion::operator=(Quaternion::Arg v) { q = v.q; return *this; } - - inline scalar Quaternion::x() const { return q.x(); } - inline scalar Quaternion::y() const { return q.y(); } - inline scalar Quaternion::z() const { return q.z(); } - inline scalar Quaternion::w() const { return q.w(); } - - inline const Vector4 & Quaternion::asVector() const { return q; } - inline Vector4 & Quaternion::asVector() { return q; } - - - inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b) - { - // @@ Efficient SIMD implementation? - return Quaternion( - + a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(), - - a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(), - + a.x() * b.y() - a.y()*b.x() + a.z()*b.w() + a.w()*b.z(), - - a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w()); - } - - inline Quaternion scale(Quaternion::Arg q, float s) - { - return scale(q.asVector(), s); - } - inline Quaternion operator *(Quaternion::Arg q, float s) - { - return scale(q, s); - } - inline Quaternion operator *(float s, Quaternion::Arg q) - { - return scale(q, s); - } - - inline Quaternion scale(Quaternion::Arg q, Vector4::Arg s) - { - return scale(q.asVector(), s); - } - /*inline Quaternion operator *(Quaternion::Arg q, Vector4::Arg s) - { - return scale(q, s); - } - inline Quaternion operator *(Vector4::Arg s, Quaternion::Arg q) - { - return scale(q, s); - }*/ - - inline Quaternion conjugate(Quaternion::Arg q) - { - return scale(q, Vector4(-1, -1, -1, 1)); - } - - inline float length(Quaternion::Arg q) - { - return length(q.asVector()); - } - - inline bool isNormalized(Quaternion::Arg q, float epsilon = NV_NORMAL_EPSILON) - { - return equal(length(q), 1, epsilon); - } - - inline Quaternion normalize(Quaternion::Arg q, float epsilon = NV_EPSILON) - { - float l = length(q); - nvDebugCheck(!isZero(l, epsilon)); - Quaternion n = scale(q, 1.0f / l); - nvDebugCheck(isNormalized(n)); - return n; - } - - inline Quaternion inverse(Quaternion::Arg q) - { - return conjugate(normalize(q)); - } - - /// Create a rotation quaternion for @a angle alpha around normal vector @a v. - inline Quaternion axisAngle(Vector3::Arg v, float alpha) - { - float s = sinf(alpha * 0.5f); - float c = cosf(alpha * 0.5f); - return Quaternion(Vector4(v * s, c)); - } - - -} // nv namespace - -#endif // NV_MATH_QUATERNION_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Random.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Random.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Random.h @@ -1,368 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#ifndef NV_MATH_RANDOM_H -#define NV_MATH_RANDOM_H - -#include // nextPowerOfTwo -#include - -namespace nv -{ - -/// Interface of the random number generators. -class Rand -{ -public: - - virtual ~Rand() {} - - enum time_e { Time }; - - /// Provide a new seed. - virtual void seed( uint s ) { /* empty */ }; - - /// Get an integer random number. - virtual uint get() = 0; - - /// Get a random number on [0, max] interval. - uint getRange( uint max ) - { - uint n; - // uint mask = Bitmask( max ); - // do { n = Get() & mask; } while( n > max ); - uint np2 = nextPowerOfTwo( max ); - do { n = get() & (np2-1); } while( n > max ); - return n; - } - - /// Random number on [0.0, 1.0] interval. - float getFloat() - { - union - { - uint32 i; - float f; - } pun; - - pun.i = 0x3f800000UL | (get() & 0x007fffffUL); - return pun.f - 1.0f; - } - - /* - /// Random number on [0.0, 1.0] interval. - double getReal() - { - return double(get()) * (1.0/4294967295.0); // 2^32-1 - } - - /// Random number on [0.0, 1.0) interval. - double getRealExclusive() - { - return double(get()) * (1.0/4294967296.0); // 2^32 - } - */ - - /// Get the max value of the random number. - uint max() const { return 4294967295U; } - - // Get a random seed. - static uint randomSeed(); - -}; - - -/// Very simple random number generator with low storage requirements. -class SimpleRand : public Rand -{ -public: - - /// Constructor that uses the current time as the seed. - SimpleRand( time_e ) - { - seed(randomSeed()); - } - - /// Constructor that uses the given seed. - SimpleRand( uint s = 0 ) - { - seed(s); - } - - /// Set the given seed. - virtual void seed( uint s ) - { - current = s; - } - - /// Get a random number. - virtual uint get() - { - return current = current * 1103515245 + 12345; - } - -private: - - uint current; - -}; - - -/// Mersenne twister random number generator. -class MTRand : public Rand -{ -public: - - enum { N = 624 }; // length of state vector - enum { M = 397 }; - - /// Constructor that uses the current time as the seed. - MTRand( time_e ) - { - seed(randomSeed()); - } - - /// Constructor that uses the given seed. - MTRand( uint s = 0 ) - { - seed(s); - } - - /// Constructor that uses the given seeds. - NVMATH_API MTRand( const uint * seed_array, uint length ); - - - /// Provide a new seed. - virtual void seed( uint s ) - { - initialize(s); - reload(); - } - - /// Get a random number between 0 - 65536. - virtual uint get() - { - // Pull a 32-bit integer from the generator state - // Every other access function simply transforms the numbers extracted here - if( left == 0 ) { - reload(); - } - left--; - - uint s1; - s1 = *next++; - s1 ^= (s1 >> 11); - s1 ^= (s1 << 7) & 0x9d2c5680U; - s1 ^= (s1 << 15) & 0xefc60000U; - return ( s1 ^ (s1 >> 18) ); - }; - - -private: - - NVMATH_API void initialize( uint32 seed ); - NVMATH_API void reload(); - - uint hiBit( uint u ) const { return u & 0x80000000U; } - uint loBit( uint u ) const { return u & 0x00000001U; } - uint loBits( uint u ) const { return u & 0x7fffffffU; } - uint mixBits( uint u, uint v ) const { return hiBit(u) | loBits(v); } - uint twist( uint m, uint s0, uint s1 ) const { return m ^ (mixBits(s0,s1)>>1) ^ ((~loBit(s1)+1) & 0x9908b0dfU); } - -private: - - uint state[N]; // internal state - uint * next; // next value to get from state - int left; // number of values left before reload needed - -}; - - - -/** George Marsaglia's random number generator. - * Code based on Thatcher Ulrich public domain source code: - * http://cvs.sourceforge.net/viewcvs.py/tu-testbed/tu-testbed/base/tu_random.cpp?rev=1.7&view=auto - * - * PRNG code adapted from the complimentary-multiply-with-carry - * code in the article: George Marsaglia, "Seeds for Random Number - * Generators", Communications of the ACM, May 2003, Vol 46 No 5, - * pp90-93. - * - * The article says: - * - * "Any one of the choices for seed table size and multiplier will - * provide a RNG that has passed extensive tests of randomness, - * particularly those in [3], yet is simple and fast -- - * approximately 30 million random 32-bit integers per second on a - * 850MHz PC. The period is a*b^n, where a is the multiplier, n - * the size of the seed table and b=2^32-1. (a is chosen so that - * b is a primitive root of the prime a*b^n + 1.)" - * - * [3] Marsaglia, G., Zaman, A., and Tsang, W. Toward a universal - * random number generator. _Statistics and Probability Letters - * 8_ (1990), 35-39. - */ -class GMRand : public Rand -{ -public: - - enum { SEED_COUNT = 8 }; - -// const uint64 a = 123471786; // for SEED_COUNT=1024 -// const uint64 a = 123554632; // for SEED_COUNT=512 -// const uint64 a = 8001634; // for SEED_COUNT=255 -// const uint64 a = 8007626; // for SEED_COUNT=128 -// const uint64 a = 647535442; // for SEED_COUNT=64 -// const uint64 a = 547416522; // for SEED_COUNT=32 -// const uint64 a = 487198574; // for SEED_COUNT=16 -// const uint64 a = 716514398U; // for SEED_COUNT=8 - enum { a = 716514398U }; - - - GMRand( time_e ) - { - seed(randomSeed()); - } - - GMRand(uint s = 987654321) - { - seed(s); - } - - - /// Provide a new seed. - virtual void seed( uint s ) - { - c = 362436; - i = SEED_COUNT - 1; - - for(int i = 0; i < SEED_COUNT; i++) { - s = s ^ (s << 13); - s = s ^ (s >> 17); - s = s ^ (s << 5); - Q[i] = s; - } - } - - /// Get a random number between 0 - 65536. - virtual uint get() - { - const uint32 r = 0xFFFFFFFE; - - uint64 t; - uint32 x; - - i = (i + 1) & (SEED_COUNT - 1); - t = a * Q[i] + c; - c = uint32(t >> 32); - x = uint32(t + c); - - if( x < c ) { - x++; - c++; - } - - uint32 val = r - x; - Q[i] = val; - return val; - }; - - -private: - - uint32 c; - uint32 i; - uint32 Q[8]; - -}; - - -/** Random number implementation from the GNU Sci. Lib. (GSL). - * Adapted from Nicholas Chapman version: - * - * Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough - * This is the Unix rand48() generator. The generator returns the - * upper 32 bits from each term of the sequence, - * - * x_{n+1} = (a x_n + c) mod m - * - * using 48-bit unsigned arithmetic, with a = 0x5DEECE66D , c = 0xB - * and m = 2^48. The seed specifies the upper 32 bits of the initial - * value, x_1, with the lower 16 bits set to 0x330E. - * - * The theoretical value of x_{10001} is 244131582646046. - * - * The period of this generator is ? FIXME (probably around 2^48). - */ -class Rand48 : public Rand -{ -public: - - Rand48( time_e ) - { - seed(randomSeed()); - } - - Rand48( uint s = 0x1234ABCD ) - { - seed(s); - } - - - /** Set the given seed. */ - virtual void seed( uint s ) { - vstate.x0 = 0x330E; - vstate.x1 = uint16(s & 0xFFFF); - vstate.x2 = uint16((s >> 16) & 0xFFFF); - } - - /** Get a random number. */ - virtual uint get() { - - advance(); - - uint x1 = vstate.x1; - uint x2 = vstate.x2; - return (x2 << 16) + x1; - } - - -private: - - void advance() - { - /* work with unsigned long ints throughout to get correct integer - promotions of any unsigned short ints */ - const uint32 x0 = vstate.x0; - const uint32 x1 = vstate.x1; - const uint32 x2 = vstate.x2; - - uint32 a; - a = a0 * x0 + c0; - - vstate.x0 = uint16(a & 0xFFFF); - a >>= 16; - - /* although the next line may overflow we only need the top 16 bits - in the following stage, so it does not matter */ - - a += a0 * x1 + a1 * x0; - vstate.x1 = uint16(a & 0xFFFF); - - a >>= 16; - a += a0 * x2 + a1 * x1 + a2 * x0; - vstate.x2 = uint16(a & 0xFFFF); - } - - -private: - NVMATH_API static const uint16 a0, a1, a2, c0; - - struct rand48_state_t { - uint16 x0, x1, x2; - } vstate; - -}; - -} // nv namespace - -#endif // NV_MATH_RANDOM_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Random.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Random.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Random.cpp @@ -1,54 +0,0 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include -#include - -using namespace nv; - -// Statics -const uint16 Rand48::a0 = 0xE66D; -const uint16 Rand48::a1 = 0xDEEC; -const uint16 Rand48::a2 = 0x0005; -const uint16 Rand48::c0 = 0x000B; - - -/// Get a random seed based on the current time. -uint Rand::randomSeed() -{ - return (uint)time(NULL); -} - - -void MTRand::initialize( uint32 seed ) -{ - // Initialize generator state with seed - // See Knuth TAOCP Vol 2, 3rd Ed, p.106 for multiplier. - // In previous versions, most significant bits (MSBs) of the seed affect - // only MSBs of the state array. Modified 9 Jan 2002 by Makoto Matsumoto. - uint32 *s = state; - uint32 *r = state; - int i = 1; - *s++ = seed & 0xffffffffUL; - for( ; i < N; ++i ) - { - *s++ = ( 1812433253UL * ( *r ^ (*r >> 30) ) + i ) & 0xffffffffUL; - r++; - } -} - - -void MTRand::reload() -{ - // Generate N new values in state - // Made clearer and faster by Matthew Bellew (matthew.bellew@home.com) - uint32 *p = state; - int i; - for( i = N - M; i--; ++p ) - *p = twist( p[M], p[0], p[1] ); - for( i = M; --i; ++p ) - *p = twist( p[M-N], p[0], p[1] ); - *p = twist( p[M-N], p[0], state[0] ); - - left = N, next = state; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector.h @@ -0,0 +1,12 @@ +// This code is in the public domain -- Ignacio Castaño + +#include "Vector.h" // Vector3, Vector4 + + +#if NV_USE_ALTIVEC +# include "SimdVector_VE.h" +#endif + +#if NV_USE_SSE +# include "SimdVector_SSE.h" +#endif Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector_SSE.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector_SSE.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector_SSE.h @@ -0,0 +1,216 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef NV_SIMD_VECTOR_SSE_H +#define NV_SIMD_VECTOR_SSE_H + +#include "nvcore/Memory.h" + +#include +#if (NV_USE_SSE > 1) +#include +#endif + +// See this for ideas: +// http://molecularmusings.wordpress.com/2011/10/18/simdifying-multi-platform-math/ + + +namespace nv { + +#define NV_SIMD_NATIVE NV_FORCEINLINE +#define NV_SIMD_INLINE inline + + class SimdVector + { + public: + __m128 vec; + + typedef SimdVector const& Arg; + + NV_SIMD_NATIVE SimdVector() {} + + NV_SIMD_NATIVE explicit SimdVector(__m128 v) : vec(v) {} + + NV_SIMD_NATIVE explicit SimdVector(float f) { + vec = _mm_set1_ps(f); + } + + NV_SIMD_NATIVE explicit SimdVector(const float * v) + { + vec = _mm_load_ps( v ); + } + + NV_SIMD_NATIVE SimdVector(float x, float y, float z, float w) + { + vec = _mm_setr_ps( x, y, z, w ); + } + + NV_SIMD_NATIVE SimdVector(const SimdVector & arg) : vec(arg.vec) {} + + NV_SIMD_NATIVE SimdVector & operator=(const SimdVector & arg) + { + vec = arg.vec; + return *this; + } + + NV_SIMD_INLINE float toFloat() const + { + NV_ALIGN_16 float f; + _mm_store_ss(&f, vec); + return f; + } + + NV_SIMD_INLINE Vector3 toVector3() const + { + NV_ALIGN_16 float c[4]; + _mm_store_ps( c, vec ); + return Vector3( c[0], c[1], c[2] ); + } + + NV_SIMD_INLINE Vector4 toVector4() const + { + NV_ALIGN_16 float c[4]; + _mm_store_ps( c, vec ); + return Vector4( c[0], c[1], c[2], c[3] ); + } + +#define SSE_SPLAT( a ) ((a) | ((a) << 2) | ((a) << 4) | ((a) << 6)) + NV_SIMD_NATIVE SimdVector splatX() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 0 ) ) ); } + NV_SIMD_NATIVE SimdVector splatY() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 1 ) ) ); } + NV_SIMD_NATIVE SimdVector splatZ() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 2 ) ) ); } + NV_SIMD_NATIVE SimdVector splatW() const { return SimdVector( _mm_shuffle_ps( vec, vec, SSE_SPLAT( 3 ) ) ); } +#undef SSE_SPLAT + + NV_SIMD_NATIVE SimdVector& operator+=( Arg v ) + { + vec = _mm_add_ps( vec, v.vec ); + return *this; + } + + NV_SIMD_NATIVE SimdVector& operator-=( Arg v ) + { + vec = _mm_sub_ps( vec, v.vec ); + return *this; + } + + NV_SIMD_NATIVE SimdVector& operator*=( Arg v ) + { + vec = _mm_mul_ps( vec, v.vec ); + return *this; + } + }; + + + NV_SIMD_NATIVE SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_add_ps( left.vec, right.vec ) ); + } + + NV_SIMD_NATIVE SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_sub_ps( left.vec, right.vec ) ); + } + + NV_SIMD_NATIVE SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_mul_ps( left.vec, right.vec ) ); + } + + // Returns a*b + c + NV_SIMD_INLINE SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + { + return SimdVector( _mm_add_ps( _mm_mul_ps( a.vec, b.vec ), c.vec ) ); + } + + // Returns -( a*b - c ) + NV_SIMD_INLINE SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + { + return SimdVector( _mm_sub_ps( c.vec, _mm_mul_ps( a.vec, b.vec ) ) ); + } + + NV_SIMD_INLINE SimdVector reciprocal( SimdVector::Arg v ) + { + // get the reciprocal estimate + __m128 estimate = _mm_rcp_ps( v.vec ); + + // one round of Newton-Rhaphson refinement + __m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.vec ) ); + return SimdVector( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) ); + } + + NV_SIMD_NATIVE SimdVector min( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_min_ps( left.vec, right.vec ) ); + } + + NV_SIMD_NATIVE SimdVector max( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_max_ps( left.vec, right.vec ) ); + } + + NV_SIMD_INLINE SimdVector truncate( SimdVector::Arg v ) + { +#if (NV_USE_SSE == 1) + // convert to ints + __m128 input = v.vec; + __m64 lo = _mm_cvttps_pi32( input ); + __m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) ); + + // convert to floats + __m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) ); + __m128 truncated = _mm_cvtpi32_ps( part, lo ); + + // clear out the MMX multimedia state to allow FP calls later + _mm_empty(); + return SimdVector( truncated ); +#else + // use SSE2 instructions + return SimdVector( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.vec ) ) ); +#endif + } + + NV_SIMD_NATIVE SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( _mm_cmpeq_ps( left.vec, right.vec ) ); + } + + NV_SIMD_INLINE SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits ) + { + __m128 a = _mm_andnot_ps( bits.vec, off.vec ); + __m128 b = _mm_and_ps( bits.vec, on.vec ); + + return SimdVector( _mm_or_ps( a, b ) ); + } + + NV_SIMD_INLINE bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) + { + __m128 bits = _mm_cmplt_ps( left.vec, right.vec ); + int value = _mm_movemask_ps( bits ); + return value != 0; + } + +} // namespace nv + +#endif // NV_SIMD_VECTOR_SSE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector_VE.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector_VE.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector_VE.h @@ -0,0 +1,189 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + Copyright (c) 2016 Raptor Engineering, LLC + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef NV_SIMD_VECTOR_VE_H +#define NV_SIMD_VECTOR_VE_H + +#ifndef __APPLE_ALTIVEC__ +#include +#undef bool +#endif + +namespace nv { + + class SimdVector + { + public: + vector float vec; + + typedef SimdVector Arg; + + SimdVector() {} + explicit SimdVector(float v) : vec(vec_splats(v)) {} + explicit SimdVector(vector float v) : vec(v) {} + SimdVector(const SimdVector & arg) : vec(arg.vec) {} + + SimdVector& operator=(const SimdVector & arg) + { + vec = arg.vec; + return *this; + } + + SimdVector(const float * v) + { + union { vector float v; float c[4]; } u; + u.c[0] = v[0]; + u.c[1] = v[1]; + u.c[2] = v[2]; + u.c[3] = v[3]; + vec = u.v; + } + + SimdVector(float x, float y, float z, float w) + { + union { vector float v; float c[4]; } u; + u.c[0] = x; + u.c[1] = y; + u.c[2] = z; + u.c[3] = w; + vec = u.v; + } + + float toFloat() const + { + union { vector float v; float c[4]; } u; + u.v = vec; + return u.c[0]; + } + + Vector3 toVector3() const + { + union { vector float v; float c[4]; } u; + u.v = vec; + return Vector3( u.c[0], u.c[1], u.c[2] ); + } + + Vector4 toVector4() const + { + union { vector float v; float c[4]; } u; + u.v = vec; + return Vector4( u.c[0], u.c[1], u.c[2], u.c[3] ); + } + + SimdVector splatX() const { return SimdVector( vec_splat( vec, 0 ) ); } + SimdVector splatY() const { return SimdVector( vec_splat( vec, 1 ) ); } + SimdVector splatZ() const { return SimdVector( vec_splat( vec, 2 ) ); } + SimdVector splatW() const { return SimdVector( vec_splat( vec, 3 ) ); } + + SimdVector& operator+=( Arg v ) + { + vec = vec_add( vec, v.vec ); + return *this; + } + + SimdVector& operator-=( Arg v ) + { + vec = vec_sub( vec, v.vec ); + return *this; + } + + SimdVector& operator*=( Arg v ) + { + vec = vec_madd( vec, v.vec, vec_splats( -0.0f ) ); + return *this; + } + }; + + inline SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_add( left.vec, right.vec ) ); + } + + inline SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_sub( left.vec, right.vec ) ); + } + + inline SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_madd( left.vec, right.vec, vec_splats( -0.0f ) ) ); + } + + // Returns a*b + c + inline SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + { + return SimdVector( vec_madd( a.vec, b.vec, c.vec ) ); + } + + // Returns -( a*b - c ) + inline SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + { + return SimdVector( vec_nmsub( a.vec, b.vec, c.vec ) ); + } + + inline SimdVector reciprocal( SimdVector::Arg v ) + { + // get the reciprocal estimate + vector float estimate = vec_re( v.vec ); + + // one round of Newton-Rhaphson refinement + vector float diff = vec_nmsub( estimate, v.vec, vec_splats( 1.0f ) ); + return SimdVector( vec_madd( diff, estimate, estimate ) ); + } + + inline SimdVector min( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_min( left.vec, right.vec ) ); + } + + inline SimdVector max( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( vec_max( left.vec, right.vec ) ); + } + + inline SimdVector truncate( SimdVector::Arg v ) + { + return SimdVector( vec_trunc( v.vec ) ); + } + + inline SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right ) + { + return SimdVector( ( vector float )vec_cmpeq( left.vec, right.vec ) ); + } + + inline SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits ) + { + return SimdVector( vec_sel( off.vec, on.vec, ( vector unsigned int )bits.vec ) ); + } + + inline bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) + { + return vec_any_lt( left.vec, right.vec ) != 0; + } + +} // namespace nv + +#endif // NV_SIMD_VECTOR_VE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/SphericalHarmonic.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/SphericalHarmonic.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/SphericalHarmonic.h @@ -3,415 +3,415 @@ #ifndef NV_MATH_SPHERICALHARMONIC_H #define NV_MATH_SPHERICALHARMONIC_H -#include +#include "nvmath.h" -namespace nv -{ - - NVMATH_API float legendrePolynomial( int l, int m, float x ) NV_CONST; - NVMATH_API float y( int l, int m, float theta, float phi ) NV_CONST; - NVMATH_API float y( int l, int m, Vector3::Arg v ) NV_CONST; - NVMATH_API float hy( int l, int m, float theta, float phi ) NV_CONST; - NVMATH_API float hy( int l, int m, Vector3::Arg v ) NV_CONST; - - class Sh; - float dot(const Sh & a, const Sh & b) NV_CONST; - - - /// Spherical harmonic class. - class Sh - { - friend class Sh2; - friend class ShMatrix; - public: - - /// Construct a spherical harmonic of the given order. - Sh(int o) : m_order(o) - { - m_elemArray = new float[basisNum()]; - } - - /// Copy constructor. - Sh(const Sh & sh) : m_order(sh.order()) - { - m_elemArray = new float[basisNum()]; - memcpy(m_elemArray, sh.m_elemArray, sizeof(float) * basisNum()); - } - - /// Destructor. - ~Sh() - { - delete [] m_elemArray; - m_elemArray = NULL; - } - - /// Get number of bands. - static int bandNum(int order) { - return order + 1; - } - - /// Get number of sh basis. - static int basisNum(int order) { - return (order + 1) * (order + 1); - } - - /// Get the index for the given coefficients. - static int index( int l, int m ) { - return l * l + l + m; - } - - /// Get sh order. - int order() const - { - return m_order; - } - - /// Get sh order. - int bandNum() const - { - return bandNum(m_order); - } - - /// Get sh order. - int basisNum() const - { - return basisNum(m_order); - } - - /// Get sh coefficient indexed by l,m. - float elem( int l, int m ) const - { - return m_elemArray[index(l, m)]; - } - - /// Get sh coefficient indexed by l,m. - float & elem( int l, int m ) - { - return m_elemArray[index(l, m)]; - } - - - /// Get sh coefficient indexed by i. - float elemAt( int i ) const { - return m_elemArray[i]; - } - - /// Get sh coefficient indexed by i. - float & elemAt( int i ) - { - return m_elemArray[i]; - } - - - /// Reset the sh coefficients. - void reset() - { - for( int i = 0; i < basisNum(); i++ ) { - m_elemArray[i] = 0.0f; - } - } - - /// Copy spherical harmonic. - void operator= ( const Sh & sh ) - { - nvDebugCheck(order() <= sh.order()); - - for(int i = 0; i < basisNum(); i++) { - m_elemArray[i] = sh.m_elemArray[i]; - } - } - - /// Add spherical harmonics. - void operator+= ( const Sh & sh ) - { - nvDebugCheck(order() == sh.order()); - - for(int i = 0; i < basisNum(); i++) { - m_elemArray[i] += sh.m_elemArray[i]; - } - } - - /// Substract spherical harmonics. - void operator-= ( const Sh & sh ) - { - nvDebugCheck(order() == sh.order()); - - for(int i = 0; i < basisNum(); i++) { - m_elemArray[i] -= sh.m_elemArray[i]; - } - } - - // Not exactly convolution, nor product. - void operator*= ( const Sh & sh ) - { - nvDebugCheck(order() == sh.order()); - - for(int i = 0; i < basisNum(); i++) { - m_elemArray[i] *= sh.m_elemArray[i]; - } - } - - /// Scale spherical harmonics. - void operator*= ( float f ) - { - for(int i = 0; i < basisNum(); i++) { - m_elemArray[i] *= f; - } - } - - /// Add scaled spherical harmonics. - void addScaled( const Sh & sh, float f ) - { - nvDebugCheck(order() == sh.order()); - - for(int i = 0; i < basisNum(); i++) { - m_elemArray[i] += sh.m_elemArray[i] * f; - } - } - - - /*/// Add a weighted sample to the sh coefficients. - void AddSample( const Vec3 & dir, const Color3f & color, float w=1.0f ) { - for(int l = 0; l <= order; l++) { - for(int m = -l; m <= l; m++) { - Color3f & elem = GetElem(l, m); - elem.Mad( elem, color, w * y(l, m, dir) ); - } - } - }*/ - - /// Evaluate - void eval(Vector3::Arg dir) - { - for(int l = 0; l <= m_order; l++) { - for(int m = -l; m <= l; m++) { - elem(l, m) = y(l, m, dir); - } - } - } - - - /// Evaluate the spherical harmonic function. - float sample(Vector3::Arg dir) const - { - Sh sh(order()); - sh.eval(dir); - - return dot(sh, *this); - } - - - protected: - - const int m_order; - float * m_elemArray; - - }; - - - /// Compute dot product of the spherical harmonics. - inline float dot(const Sh & a, const Sh & b) - { - nvDebugCheck(a.order() == b.order()); - - float sum = 0; - for( int i = 0; i < Sh::basisNum(a.order()); i++ ) { - sum += a.elemAt(i) * b.elemAt(i); - } - - return sum; - } - - - /// Second order spherical harmonic. - class Sh2 : public Sh - { - public: - - /// Constructor. - Sh2() : Sh(2) {} - - /// Copy constructor. - Sh2(const Sh2 & sh) : Sh(sh) {} - - /// Spherical harmonic resulting from projecting the clamped cosine transfer function to the SH basis. - void cosineTransfer() - { - const float c1 = 0.282095f; // K(0, 0) - const float c2 = 0.488603f; // K(1, 0) - const float c3 = 1.092548f; // sqrt(15.0f / PI) / 2.0f = K(2, -2) - const float c4 = 0.315392f; // sqrt(5.0f / PI) / 4.0f) = K(2, 0) - const float c5 = 0.546274f; // sqrt(15.0f / PI) / 4.0f) = K(2, 2) - - const float normalization = PI * 16.0f / 17.0f; - - const float const1 = c1 * normalization * 1.0f; - const float const2 = c2 * normalization * (2.0f / 3.0f); - const float const3 = c3 * normalization * (1.0f / 4.0f); - const float const4 = c4 * normalization * (1.0f / 4.0f); - const float const5 = c5 * normalization * (1.0f / 4.0f); - - m_elemArray[0] = const1; - - m_elemArray[1] = -const2; - m_elemArray[2] = const2; - m_elemArray[3] = -const2; - - m_elemArray[4] = const3; - m_elemArray[5] = -const3; - m_elemArray[6] = const4; - m_elemArray[7] = -const3; - m_elemArray[8] = const5; - } - }; - - +#include // memcpy -#if 0 -/// Spherical harmonic matrix. -class ShMatrix +namespace nv { -public: + class Vector3; + class Matrix; + + NVMATH_API float legendrePolynomial( int l, int m, float x ) NV_CONST; + NVMATH_API float shBasis( int l, int m, float theta, float phi ) NV_CONST; + NVMATH_API float shBasis( int l, int m, const Vector3 & v ) NV_CONST; + NVMATH_API float hshBasis( int l, int m, float theta, float phi ) NV_CONST; + NVMATH_API float hshBasis( int l, int m, const Vector3 & v ) NV_CONST; + + class Sh; + float dot(const Sh & a, const Sh & b) NV_CONST; + + + /// Spherical harmonic class. + class Sh + { + friend class Sh2; + friend class ShMatrix; + public: + + /// Construct a spherical harmonic of the given order. + Sh(int o) : m_order(o) + { + m_elemArray = new float[basisNum()]; + } + + /// Copy constructor. + Sh(const Sh & sh) : m_order(sh.order()) + { + m_elemArray = new float[basisNum()]; + memcpy(m_elemArray, sh.m_elemArray, sizeof(float) * basisNum()); + } + + /// Destructor. + ~Sh() + { + delete [] m_elemArray; + m_elemArray = NULL; + } + + /// Get number of bands. + static int bandNum(int m_order) { + return m_order + 1; + } + + /// Get number of sh basis. + static int basisNum(int m_order) { + return (m_order + 1) * (m_order + 1); + } + + /// Get the index for the given coefficients. + static int index( int l, int m ) { + return l * l + l + m; + } + + /// Get sh order. + int order() const + { + return m_order; + } + + /// Get sh order. + int bandNum() const + { + return bandNum(m_order); + } + + /// Get sh order. + int basisNum() const + { + return basisNum(m_order); + } + + /// Get sh coefficient indexed by l,m. + float elem( int l, int m ) const + { + return m_elemArray[index(l, m)]; + } + + /// Get sh coefficient indexed by l,m. + float & elem( int l, int m ) + { + return m_elemArray[index(l, m)]; + } + + + /// Get sh coefficient indexed by i. + float elemAt( int i ) const { + return m_elemArray[i]; + } + + /// Get sh coefficient indexed by i. + float & elemAt( int i ) + { + return m_elemArray[i]; + } + + + /// Reset the sh coefficients. + void reset() + { + for( int i = 0; i < basisNum(); i++ ) { + m_elemArray[i] = 0.0f; + } + } + + /// Copy spherical harmonic. + void operator= ( const Sh & sh ) + { + nvDebugCheck(order() <= sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] = sh.m_elemArray[i]; + } + } + + /// Add spherical harmonics. + void operator+= ( const Sh & sh ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] += sh.m_elemArray[i]; + } + } + + /// Substract spherical harmonics. + void operator-= ( const Sh & sh ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] -= sh.m_elemArray[i]; + } + } + + // Not exactly convolution, nor product. + void operator*= ( const Sh & sh ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] *= sh.m_elemArray[i]; + } + } + + /// Scale spherical harmonics. + void operator*= ( float f ) + { + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] *= f; + } + } + + /// Add scaled spherical harmonics. + void addScaled( const Sh & sh, float f ) + { + nvDebugCheck(order() == sh.order()); + + for(int i = 0; i < basisNum(); i++) { + m_elemArray[i] += sh.m_elemArray[i] * f; + } + } + + + /*/// Add a weighted sample to the sh coefficients. + void AddSample( const Vec3 & dir, const Color3f & color, float w=1.0f ) { + for(int l = 0; l <= order; l++) { + for(int m = -l; m <= l; m++) { + Color3f & elem = GetElem(l, m); + elem.Mad( elem, color, w * shBasis(l, m, dir) ); + } + } + }*/ + + /// Evaluate + void eval(const Vector3 & dir) + { + for(int l = 0; l <= m_order; l++) { + for(int m = -l; m <= l; m++) { + elem(l, m) = shBasis(l, m, dir); + } + } + } + + + /// Evaluate the spherical harmonic function. + float sample(const Vector3 & dir) const + { + Sh sh(order()); + sh.eval(dir); + + return dot(sh, *this); + } + + + protected: + + const int m_order; + float * m_elemArray; + + }; + + + /// Compute dot product of the spherical harmonics. + inline float dot(const Sh & a, const Sh & b) + { + nvDebugCheck(a.order() == b.order()); + + float sum = 0; + for( int i = 0; i < Sh::basisNum(a.order()); i++ ) { + sum += a.elemAt(i) * b.elemAt(i); + } + + return sum; + } + + + /// Second order spherical harmonic. + class Sh2 : public Sh + { + public: + + /// Constructor. + Sh2() : Sh(2) {} + + /// Copy constructor. + Sh2(const Sh2 & sh) : Sh(sh) {} + + /// Spherical harmonic resulting from projecting the clamped cosine transfer function to the SH basis. + void cosineTransfer() + { + const float c1 = 0.282095f; // K(0, 0) + const float c2 = 0.488603f; // K(1, 0) + const float c3 = 1.092548f; // sqrt(15.0f / PI) / 2.0f = K(2, -2) + const float c4 = 0.315392f; // sqrt(5.0f / PI) / 4.0f) = K(2, 0) + const float c5 = 0.546274f; // sqrt(15.0f / PI) / 4.0f) = K(2, 2) + + const float normalization = PI * 16.0f / 17.0f; + + const float const1 = c1 * normalization * 1.0f; + const float const2 = c2 * normalization * (2.0f / 3.0f); + const float const3 = c3 * normalization * (1.0f / 4.0f); + const float const4 = c4 * normalization * (1.0f / 4.0f); + const float const5 = c5 * normalization * (1.0f / 4.0f); + + m_elemArray[0] = const1; + + m_elemArray[1] = -const2; + m_elemArray[2] = const2; + m_elemArray[3] = -const2; + + m_elemArray[4] = const3; + m_elemArray[5] = -const3; + m_elemArray[6] = const4; + m_elemArray[7] = -const3; + m_elemArray[8] = const5; + } + }; + + + + /// Spherical harmonic matrix. + class ShMatrix + { + public: + + /// Create an identity matrix of the given order. + ShMatrix(int o = 2) : m_order(o), m_identity(true) + { + nvCheck(m_order > 0); + m_e = new float[size()]; + m_band = new float *[bandNum()]; + setupBands(); + } + + /// Destroy and free matrix elements. + ~ShMatrix() + { + delete m_e; + delete m_band; + } + + /// Set identity matrix. + void setIdentity() + { + m_identity = true; + } + + /// Return true if this is an identity matrix, false in other case. + bool isIdentity() const { + return m_identity; + } + + /// Get number of bands of this matrix. + int bandNum() const + { + return m_order+1; + } + + /// Get total number of elements in the matrix. + int size() const + { + int size = 0; + for (int i = 0; i < bandNum(); i++) { + size += square(i * 2 + 1); + } + return size; + } + + /// Get element at the given raw index. + float element(int idx) const + { + return m_e[idx]; + } + + /// Get element at the given with the given indices. + float & element(int b, int x, int y) + { + nvDebugCheck(b >= 0); + nvDebugCheck(b < bandNum()); + return m_band[b][(b + y) * (b * 2 + 1) + (b + x)]; + } + + /// Get element at the given with the given indices. + float element(int b, int x, int y) const + { + nvDebugCheck(b >= 0); + nvDebugCheck(b < bandNum()); + return m_band[b][(b + y) * (b * 2 + 1) + (b + x)]; + } + + /// Copy matrix. + void copy(const ShMatrix & m) + { + nvDebugCheck(m_order == m.m_order); + memcpy(m_e, m.m_e, size() * sizeof(float)); + } + + /// Rotate the given coefficients. + /*void transform( const Sh & restrict source, Sh * restrict dest ) const { + nvCheck( &source != dest ); // Make sure there's no aliasing. + nvCheck( dest->m_order <= m_order ); + nvCheck( m_order <= source.m_order ); + + if (m_identity) { + *dest = source; + return; + } + + // Loop through each band. + for (int l = 0; l <= dest->m_order; l++) { + + for (int mo = -l; mo <= l; mo++) { + + Color3f rgb = Color3f::Black; + + for( int mi = -l; mi <= l; mi++ ) { + rgb.Mad( rgb, source.elem(l, mi), elem(l, mo, mi) ); + } + + dest->elem(l, mo) = rgb; + } + } + }*/ + + + NVMATH_API void multiply( const ShMatrix &A, const ShMatrix &B ); + NVMATH_API void rotation( const Matrix & m ); + NVMATH_API void rotation( int axis, float angles ); + NVMATH_API void print(); + + + private: + + // @@ These could be static indices precomputed only once. + /// Setup the band pointers. + void setupBands() + { + int size = 0; + for( int i = 0; i < bandNum(); i++ ) { + m_band[i] = &m_e[size]; + size += square(i * 2 + 1); + } + } + + + private: + + // Matrix order. + const int m_order; + + // Identity flag for quick transform. + bool m_identity; - /// Create an identity matrix of the given order. - ShMatrix(int o = 2) : order(o), identity(true) - { - nvCheck(order > 0); - e = new float[Size()]; - band = new float *[GetBandNum()]; - setupBands(); - } - - /// Destroy and free matrix elements. - ~ShMatrix() - { - delete e; - delete band; - } - - /// Set identity matrix. - void setIdentity() - { - identity = true; - } - - /// Return true if this is an identity matrix, false in other case. - bool isIdentity() const { - return identity; - } - - /// Get number of bands of this matrix. - int bandNum() const - { - return order+1; - } - - /// Get total number of elements in the matrix. - int size() const - { - int size = 0; - for( int i = 0; i < bandNum(); i++ ) { - size += SQ(i * 2 + 1); - } - return size; - } - - /// Get element at the given raw index. - float elem(const int idx) const - { - return e[idx]; - } - - /// Get element at the given with the given indices. - float & elem( const int b, const int x, const int y ) - { - nvDebugCheck(b >= 0); - nvDebugCheck(b < bandNum()); - return band[b][(b + y) * (b * 2 + 1) + (b + x)]; - } - - /// Get element at the given with the given indices. - float elem( const int b, const int x, const int y ) const - { - nvDebugCheck(b >= 0); - nvDebugCheck(b < bandNum()); - return band[b][(b + y) * (b * 2 + 1) + (b + x)]; - } - - /** Copy matrix. */ - void Copy( const ShMatrix & m ) - { - nvDebugCheck(order == m.order); - memcpy(e, m.e, Size() * sizeof(float)); - } - - /** Rotate the given coefficients. */ - void transform( const Sh & restrict source, Sh * restrict dest ) const { - piCheck( &source != dest ); // Make sure there's no aliasing. - piCheck( dest->order <= order ); - piCheck( order <= source.order ); - - if( identity ) { - *dest = source; - return; - } - - // Loop through each band. - for( int l = 0; l <= dest->order; l++ ) { - - for( int mo = -l; mo <= l; mo++ ) { - - Color3f rgb = Color3f::Black; - - for( int mi = -l; mi <= l; mi++ ) { - rgb.Mad( rgb, source.elem(l, mi), elem(l, mo, mi) ); - } - - dest->elem(l, mo) = rgb; - } - } - } - - - MATHLIB_API void multiply( const ShMatrix &A, const ShMatrix &B ); - MATHLIB_API void rotation( const Matrix & m ); - MATHLIB_API void rotation( int axis, float angles ); - MATHLIB_API void print(); - - -private: - - // @@ These could be static indices precomputed only once. - /// Setup the band pointers. - void setupBands() - { - int size = 0; - for( int i = 0; i < bandNum(); i++ ) { - band[i] = &e[size]; - size += SQ(i * 2 + 1); - } - } - - -private: - - // Matrix order. - const int m_order; - - // Identity flag for quick transform. - bool m_identity; - - // Array of elements. - float * m_e; - - // Band pointers. - float ** m_band; - -}; + // Array of elements. + float * m_e; -#endif // 0 + // Band pointers. + float ** m_band; + }; } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/SphericalHarmonic.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/SphericalHarmonic.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/SphericalHarmonic.cpp @@ -1,6 +1,8 @@ // This code is in the public domain -- castanyo@yahoo.es -#include +#include "SphericalHarmonic.h" + +#include "Vector.h" using namespace nv; @@ -11,8 +13,10 @@ // Basic integer factorial. inline static int factorial( int v ) { - if (v == 0) { - return 1; + const static int fac_table[] = { 1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800 }; + + if(v <= 11){ + return fac_table[v]; } int result = v; @@ -80,7 +84,7 @@ template float legendre(float x); - template <> float legendre<0, 0>(float x) { + template <> float legendre<0, 0>(float ) { return 1; } @@ -171,7 +175,7 @@ * @param theta is the altitude, in the range [0, PI] * @param phi is the azimuth, in the range [0, 2*PI] */ -float nv::y( int l, int m, float theta, float phi ) +float nv::shBasis( int l, int m, float theta, float phi ) { if( m == 0 ) { // K(l, 0) = sqrt((2*l+1)/(4*PI)) @@ -193,11 +197,11 @@ * y = sin(theta)*sin(phi) * z = cos(theta) */ -float nv::y( int l, int m, Vector3::Arg v ) +float nv::shBasis( int l, int m, Vector3::Arg v ) { - float theta = acosf(v.z()); - float phi = atan2f(v.y(), v.x()); - return y( l, m, theta, phi ); + float theta = acosf(v.z); + float phi = atan2f(v.y, v.x); + return shBasis( l, m, theta, phi ); } @@ -208,7 +212,7 @@ * @param theta is the altitude, in the range [0, PI/2] * @param phi is the azimuth, in the range [0, 2*PI] */ -float nv::hy( int l, int m, float theta, float phi ) +float nv::hshBasis( int l, int m, float theta, float phi ) { if( m == 0 ) { // HK(l, 0) = sqrt((2*l+1)/(2*PI)) @@ -230,11 +234,11 @@ * y = sin(theta)*sin(phi) * z = cos(theta) */ -float nv::hy( int l, int m, Vector3::Arg v ) +float nv::hshBasis( int l, int m, Vector3::Arg v ) { - float theta = acosf(v.z()); - float phi = atan2f(v.y(), v.x()); - return y( l, m, theta, phi ); + float theta = acosf(v.z); + float phi = atan2f(v.y, v.x); + return hshBasis( l, m, theta, phi ); } Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/TriBox.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/TriBox.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/TriBox.cpp @@ -1,226 +0,0 @@ -/********************************************************/ -/* AABB-triangle overlap test code */ -/* by Tomas Akenine-Möller */ -/* Function: int triBoxOverlap(float boxcenter[3], */ -/* float boxhalfsize[3],float triverts[3][3]); */ -/* History: */ -/* 2001-03-05: released the code in its first version */ -/* 2001-06-18: changed the order of the tests, faster */ -/* */ -/* Acknowledgement: Many thanks to Pierre Terdiman for */ -/* suggestions and discussions on how to optimize code. */ -/* Thanks to David Hunt for finding a ">="-bug! */ -/********************************************************/ - -#include -#include - -using namespace nv; - -#define X 0 -#define Y 1 -#define Z 2 - -#define FINDMINMAX(x0,x1,x2,min,max) \ - min = max = x0; \ - if(x1max) max=x1;\ - if(x2max) max=x2; - - -static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg maxbox) // -NJMP- -{ - Vector3 vmin, vmax; - - float signs[3] = {1, 1, 1}; - if (normal.x() <= 0.0f) signs[0] = -1; - if (normal.y() <= 0.0f) signs[1] = -1; - if (normal.z() <= 0.0f) signs[2] = -1; - - Vector3 sign(signs[0], signs[1], signs[2]); - vmin = -scale(sign, maxbox) - vert; - vmax = scale(sign, maxbox) - vert; - - if (dot(normal, vmin) > 0.0f) return false; - if (dot(normal, vmax) >= 0.0f) return true; - - return false; -} - - -/*======================== X-tests ========================*/ -#define AXISTEST_X01(a, b, fa, fb) \ - p0 = a*v0.y() - b*v0.z(); \ - p2 = a*v2.y() - b*v2.z(); \ - if(p0rad || max<-rad) return false; - -#define AXISTEST_X2(a, b, fa, fb) \ - p0 = a*v0.y() - b*v0.z(); \ - p1 = a*v1.y() - b*v1.z(); \ - if(p0rad || max<-rad) return false; - -/*======================== Y-tests ========================*/ -#define AXISTEST_Y02(a, b, fa, fb) \ - p0 = -a*v0.x() + b*v0.z(); \ - p2 = -a*v2.x() + b*v2.z(); \ - if(p0rad || max<-rad) return false; - -#define AXISTEST_Y1(a, b, fa, fb) \ - p0 = -a*v0.x() + b*v0.z(); \ - p1 = -a*v1.x() + b*v1.z(); \ - if(p0rad || max<-rad) return false; - -/*======================== Z-tests ========================*/ - -#define AXISTEST_Z12(a, b, fa, fb) \ - p1 = a*v1.x() - b*v1.y(); \ - p2 = a*v2.x() - b*v2.y(); \ - if(p2rad || max<-rad) return false; - -#define AXISTEST_Z0(a, b, fa, fb) \ - p0 = a*v0.x() - b*v0.y(); \ - p1 = a*v1.x() - b*v1.y(); \ - if(p0rad || max<-rad) return false; - - -bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) -{ - // use separating axis theorem to test overlap between triangle and box - // need to test for overlap in these directions: - // 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle - // we do not even need to test these) - // 2) normal of the triangle - // 3) crossproduct(edge from tri, {x,y,z}-directin) - // this gives 3x3=9 more tests - Vector3 v0, v1, v2; - float min, max, p0, p1, p2, rad, fex, fey, fez; - Vector3 normal, e0, e1, e2; - - // This is the fastest branch on Sun. - // move everything so that the boxcenter is in (0,0,0) - v0 = tri.v[0] - boxcenter; - v1 = tri.v[1] - boxcenter; - v2 = tri.v[2] - boxcenter; - - // Compute triangle edges. - e0 = v1 - v0; // tri edge 0 - e1 = v2 - v1; // tri edge 1 - e2 = v0 - v2; // tri edge 2 - - // Bullet 3: - // test the 9 tests first (this was faster) - fex = fabsf(e0.x()); - fey = fabsf(e0.y()); - fez = fabsf(e0.z()); - AXISTEST_X01(e0.z(), e0.y(), fez, fey); - AXISTEST_Y02(e0.z(), e0.x(), fez, fex); - AXISTEST_Z12(e0.y(), e0.x(), fey, fex); - - fex = fabsf(e1.x()); - fey = fabsf(e1.y()); - fez = fabsf(e1.z()); - AXISTEST_X01(e1.z(), e1.y(), fez, fey); - AXISTEST_Y02(e1.z(), e1.x(), fez, fex); - AXISTEST_Z0(e1.y(), e1.x(), fey, fex); - - fex = fabsf(e2.x()); - fey = fabsf(e2.y()); - fez = fabsf(e2.z()); - AXISTEST_X2(e2.z(), e2.y(), fez, fey); - AXISTEST_Y1(e2.z(), e2.x(), fez, fex); - AXISTEST_Z12(e2.y(), e2.x(), fey, fex); - - // Bullet 1: - // first test overlap in the {x,y,z}-directions - // find min, max of the triangle each direction, and test for overlap in - // that direction -- this is equivalent to testing a minimal AABB around - // the triangle against the AABB - - // test in X-direction - FINDMINMAX(v0.x(), v1.x(), v2.x(), min, max); - if(min > boxhalfsize.x() || max < -boxhalfsize.x()) return false; - - // test in Y-direction - FINDMINMAX(v0.y(), v1.y(), v2.y(), min, max); - if(min > boxhalfsize.y() || max < -boxhalfsize.y()) return false; - - // test in Z-direction - FINDMINMAX(v0.z(), v1.z(), v2.z(), min, max); - if(min > boxhalfsize.z() || max < -boxhalfsize.z()) return false; - - // Bullet 2: - // test if the box intersects the plane of the triangle - // compute plane equation of triangle: normal*x+d=0 - normal = cross(e0, e1); - - return planeBoxOverlap(normal, v0, boxhalfsize); -} - - -bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) -{ - // use separating axis theorem to test overlap between triangle and box - // need to test for overlap in these directions: - // 1) the {x,y,z}-directions (actually, since we use the AABB of the triangle - // we do not even need to test these) - // 2) normal of the triangle - // 3) crossproduct(edge from tri, {x,y,z}-directin) - // this gives 3x3=9 more tests - Vector3 v0, v1, v2; - float min, max, p0, p1, p2, rad, fex, fey, fez; - Vector3 normal, e0, e1, e2; - - // This is the fastest branch on Sun. - // move everything so that the boxcenter is in (0,0,0) - v0 = tri.v[0] - boxcenter; - v1 = tri.v[1] - boxcenter; - v2 = tri.v[2] - boxcenter; - - // Compute triangle edges. - e0 = v1 - v0; // tri edge 0 - e1 = v2 - v1; // tri edge 1 - e2 = v0 - v2; // tri edge 2 - - // Bullet 3: - // test the 9 tests first (this was faster) - fex = fabsf(e0.x()); - fey = fabsf(e0.y()); - fez = fabsf(e0.z()); - AXISTEST_X01(e0.z(), e0.y(), fez, fey); - AXISTEST_Y02(e0.z(), e0.x(), fez, fex); - AXISTEST_Z12(e0.y(), e0.x(), fey, fex); - - fex = fabsf(e1.x()); - fey = fabsf(e1.y()); - fez = fabsf(e1.z()); - AXISTEST_X01(e1.z(), e1.y(), fez, fey); - AXISTEST_Y02(e1.z(), e1.x(), fez, fex); - AXISTEST_Z0(e1.y(), e1.x(), fey, fex); - - fex = fabsf(e2.x()); - fey = fabsf(e2.y()); - fez = fabsf(e2.z()); - AXISTEST_X2(e2.z(), e2.y(), fez, fey); - AXISTEST_Y1(e2.z(), e2.x(), fez, fex); - AXISTEST_Z12(e2.y(), e2.x(), fey, fex); - - // Bullet 2: - // test if the box intersects the plane of the triangle - // compute plane equation of triangle: normal*x+d=0 - normal = cross(e0, e1); - - return planeBoxOverlap(normal, v0, boxhalfsize); -} Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Triangle.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Triangle.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Triangle.h @@ -1,81 +0,0 @@ -// This code is in the public domain -- Ignacio Castaño - -#ifndef NV_MATH_TRIANGLE_H -#define NV_MATH_TRIANGLE_H - -#include -#include -#include - -namespace nv -{ - - /// Triangle class with three vertices. - class Triangle - { - public: - Triangle() {}; - - Triangle(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2) - { - v[0] = v0; - v[1] = v1; - v[2] = v2; - } - - /// Get the bounds of the triangle. - Box bounds() const - { - Box bounds; - bounds.clearBounds(); - bounds.addPointToBounds(v[0]); - bounds.addPointToBounds(v[1]); - bounds.addPointToBounds(v[2]); - return bounds; - } - - Vector4 plane() const - { - Vector3 n = cross(v[1]-v[0], v[2]-v[0]); - return Vector4(n, dot(n, v[0])); - } - - Vector3 v[3]; - }; - - - // Tomas Akenine-Möller box-triangle test. - NVMATH_API bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & triangle); - NVMATH_API bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & triangle); - - - // Moller ray triangle test. - NVMATH_API bool rayTest_Moller(const Triangle & t, Vector3::Arg orig, Vector3::Arg dir, float * out_t, float * out_u, float * out_v); - - inline bool rayTest(const Triangle & t, Vector3::Arg orig, Vector3::Arg dir, float * out_t, float * out_u, float * out_v) - { - return rayTest_Moller(t, orig, dir, out_t, out_u, out_v); - } - - inline bool overlap(const Triangle & t, const Box & b) - { - Vector3 center = b.center(); - Vector3 extents = b.extents(); - return triBoxOverlap(center, extents, t); - } - - inline bool overlap(const Box & b, const Triangle & t) - { - return overlap(t, b); - } - - inline bool overlapNoBounds(const Triangle & t, const Box & b) - { - Vector3 center = b.center(); - Vector3 extents = b.extents(); - return triBoxOverlapNoBounds(center, extents, t); - } - -} // nv namespace - -#endif // NV_MATH_TRIANGLE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Triangle.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Triangle.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Triangle.cpp @@ -1,168 +0,0 @@ -// This code is in the public domain -- Ignacio Castaño - -#include - -using namespace nv; - - -/// Tomas Möller, barycentric ray-triangle test. -bool rayTest_Moller(const Triangle & t, Vector3::Arg orig, Vector3::Arg dir, float * out_t, float * out_u, float * out_v) -{ - // find vectors for two edges sharing vert0 - Vector3 e1 = t.v[1] - t.v[0]; - Vector3 e2 = t.v[2] - t.v[0]; - - // begin calculating determinant - also used to calculate U parameter - Vector3 pvec = cross(dir, e2); - - // if determinant is near zero, ray lies in plane of triangle - float det = dot(e1, pvec); - if (det < -NV_EPSILON) { - return false; - } - - // calculate distance from vert0 to ray origin - Vector3 tvec = orig - t.v[0]; - - // calculate U parameter and test bounds - float u = dot(tvec, pvec); - if( u < 0.0f || u > det ) { - return false; - } - - // prepare to test V parameter - Vector3 qvec = cross(tvec, e1); - - // calculate V parameter and test bounds - float v = dot(dir, qvec); - if (v < 0.0f || u + v > det) { - return false; - } - - // calculate t, scale parameters, ray intersects triangle - float inv_det = 1.0f / det; - *out_t = dot(e2, qvec) * inv_det; - *out_u = u * inv_det; // v - *out_v = v * inv_det; // 1-(u+v) - - return true; -} - - - - - -#if 0 - - -// IC: This code is adapted from my Pi.MathLib code, based on Moller-Trumbore triangle test. -FXVector3 edge1, edge2, pvec, tvec, qvec; - -edge1 = tri.V1 - tri.V0; -edge2 = tri.V2 - tri.V0; - -pvec.Cross(ray.Direction, edge2); - -float det = FXVector3.Dot(edge1, pvec); - -// calculate distance from vert0 to ray origin. -FXVector3 tvec = ray.Origin - vert0; - -if( det < 0 ) -{ - // calculate U parameter and test bounds. - float u = FXVector3.Dot(tvec, pvec); - if (u > 0.0 || u < det) - { - return false; - } - - // prepare to test V parameter. - qvec.Cross(tvec, edge1); - - // calculate V parameter and test bounds. - float v = FXVector3.Dot(dir, qvec); - - return v <= 0.0 && u + v >= det; -} -else -{ - // calculate U parameter and test bounds. - float u = FXVector3.Dot(tvec, pvec); - if (u < 0.0 || u > det) - { - return false; - } - - // prepare to test V parameter. - qvec.Cross(tvec, edge1); - - // calculate V parameter and test bounds. - float v = FXVector3.Dot(dir, qvec); - - return v >= 0.0 && u + v <= det; -} - - - -/** - * Dan Sunday, parametric ray-triangle test. - */ -// Output: *I = intersection point (when it exists) -// Return: -1 = triangle is degenerate (a segment or point) -// 0 = disjoint (no intersect) -// 1 = intersect in unique point I1 -// 2 = are in the same plane -bool RayTriangleTest( const Vec3 &p0, const Vec3 &p1, - const Vec3 &v0, const Vec3 &v1, const Vec3 &v2, const Vec3 &n, - Vec3 &I ) { - Vec3 u, v; // triangle vectors - Vec3 dir, w0, w; // ray vectors - float r, a, b; // params to calc ray-plane intersect - - // get triangle edge vectors and plane normal - u.Sub( v1, v0 ); - v.Sub( v2, v0 ); - - dir.Sub( p1, p0 ); // ray direction vector - w0.Sub( p0, v0 ); - a = Vec3DotProduct( n, w0 ); - b = Vec3DotProduct( n, dir ); - - if( fabs(b) < TI_EPSILON ) // ray is parallel to triangle plane - return false; - - - // get intersect point of ray with triangle plane - r = -a / b; - if( r < 0.0f ) // ray goes away from triangle - return false; // => no intersect - - // for a segment, also test if (r > 1.0) => no intersect - - I.Mad( p0, dir, r ); // intersect point of ray and plane - - // is I inside T? - float uu, uv, vv, wu, wv, D; - uu = Vec3DotProduct( u, u ); - uv = Vec3DotProduct( u, v ); - vv = Vec3DotProduct( v, v ); - w = I - v0; - wu = Vec3DotProduct( w, u ); - wv = Vec3DotProduct( w, v ); - D = uv * uv - uu * vv; - - // get and test parametric coords - float s, t; - s = (uv * wv - vv * wu) / D; - if( s<0.0 || s > 1.0) // I is outside T - return false; - t = (uv * wu - uu * wv) / D; - if( t<0.0 || (s + t) > 1.0) // I is outside T - return false; - - return true; // I is in T -} - - -#endif // 0 Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Vector.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Vector.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Vector.h @@ -1,805 +1,149 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_MATH_VECTOR_H #define NV_MATH_VECTOR_H -#include -#include // min, max +#include "nvmath.h" namespace nv { - -enum zero_t { zero }; -enum identity_t { identity }; - -// I should probably use templates. -typedef float scalar; - -class NVMATH_CLASS Vector2 -{ -public: - typedef Vector2 const & Arg; - - Vector2(); - explicit Vector2(zero_t); - explicit Vector2(scalar f); - Vector2(scalar x, scalar y); - Vector2(Vector2::Arg v); - - const Vector2 & operator=(Vector2::Arg v); - - scalar x() const; - scalar y() const; - - scalar component(uint idx) const; - - const scalar * ptr() const; - - void set(scalar x, scalar y); - - Vector2 operator-() const; - void operator+=(Vector2::Arg v); - void operator-=(Vector2::Arg v); - void operator*=(scalar s); - void operator*=(Vector2::Arg v); - - friend bool operator==(Vector2::Arg a, Vector2::Arg b); - friend bool operator!=(Vector2::Arg a, Vector2::Arg b); - -private: - scalar m_x, m_y; -}; - - -class NVMATH_CLASS Vector3 -{ -public: - typedef Vector3 const & Arg; - - Vector3(); - explicit Vector3(zero_t); - Vector3(scalar x, scalar y, scalar z); - Vector3(Vector2::Arg v, scalar z); - Vector3(Vector3::Arg v); - - const Vector3 & operator=(Vector3::Arg v); - - scalar x() const; - scalar y() const; - scalar z() const; - - Vector2 xy() const; - - scalar component(uint idx) const; - - const scalar * ptr() const; - - void set(scalar x, scalar y, scalar z); - - Vector3 operator-() const; - void operator+=(Vector3::Arg v); - void operator-=(Vector3::Arg v); - void operator*=(scalar s); - void operator/=(scalar s); - void operator*=(Vector3::Arg v); - - friend bool operator==(Vector3::Arg a, Vector3::Arg b); - friend bool operator!=(Vector3::Arg a, Vector3::Arg b); - -private: - scalar m_x, m_y, m_z; -}; - - -class NVMATH_CLASS Vector4 -{ -public: - typedef Vector4 const & Arg; - - Vector4(); - explicit Vector4(zero_t); - Vector4(scalar x, scalar y, scalar z, scalar w); - Vector4(Vector2::Arg v, scalar z, scalar w); - Vector4(Vector3::Arg v, scalar w); - Vector4(Vector4::Arg v); -// Vector4(const Quaternion & v); - - const Vector4 & operator=(Vector4::Arg v); - - scalar x() const; - scalar y() const; - scalar z() const; - scalar w() const; - - Vector2 xy() const; - Vector3 xyz() const; - - scalar component(uint idx) const; - - const scalar * ptr() const; - - void set(scalar x, scalar y, scalar z, scalar w); - - Vector4 operator-() const; - void operator+=(Vector4::Arg v); - void operator-=(Vector4::Arg v); - void operator*=(scalar s); - void operator*=(Vector4::Arg v); - - friend bool operator==(Vector4::Arg a, Vector4::Arg b); - friend bool operator!=(Vector4::Arg a, Vector4::Arg b); - -private: - scalar m_x, m_y, m_z, m_w; -}; - - -// Vector2 - -inline Vector2::Vector2() {} -inline Vector2::Vector2(zero_t) : m_x(0.0f), m_y(0.0f) {} -inline Vector2::Vector2(scalar f) : m_x(f), m_y(f) {} -inline Vector2::Vector2(scalar x, scalar y) : m_x(x), m_y(y) {} -inline Vector2::Vector2(Vector2::Arg v) : m_x(v.x()), m_y(v.y()) {} - -inline const Vector2 & Vector2::operator=(Vector2::Arg v) -{ - m_x = v.x(); - m_y = v.y(); - return *this; -} - -inline scalar Vector2::x() const { return m_x; } -inline scalar Vector2::y() const { return m_y; } - -inline scalar Vector2::component(uint idx) const -{ - nvDebugCheck(idx < 2); - if (idx == 0) return x(); - if (idx == 1) return y(); - nvAssume(false); - return 0.0f; -} - -inline const scalar * Vector2::ptr() const -{ - return &m_x; -} - -inline void Vector2::set(scalar x, scalar y) -{ - m_x = x; - m_y = y; -} - -inline Vector2 Vector2::operator-() const -{ - return Vector2(-m_x, -m_y); -} - -inline void Vector2::operator+=(Vector2::Arg v) -{ - m_x += v.m_x; - m_y += v.m_y; -} - -inline void Vector2::operator-=(Vector2::Arg v) -{ - m_x -= v.m_x; - m_y -= v.m_y; -} - -inline void Vector2::operator*=(scalar s) -{ - m_x *= s; - m_y *= s; -} - -inline void Vector2::operator*=(Vector2::Arg v) -{ - m_x *= v.m_x; - m_y *= v.m_y; -} - -inline bool operator==(Vector2::Arg a, Vector2::Arg b) -{ - return a.m_x == b.m_x && a.m_y == b.m_y; -} -inline bool operator!=(Vector2::Arg a, Vector2::Arg b) -{ - return a.m_x != b.m_x || a.m_y != b.m_y; -} - - -// Vector3 - -inline Vector3::Vector3() {} -inline Vector3::Vector3(zero_t) : m_x(0.0f), m_y(0.0f), m_z(0.0f) {} -inline Vector3::Vector3(scalar x, scalar y, scalar z) : m_x(x), m_y(y), m_z(z) {} -inline Vector3::Vector3(Vector2::Arg v, scalar z) : m_x(v.x()), m_y(v.y()), m_z(z) {} -inline Vector3::Vector3(Vector3::Arg v) : m_x(v.x()), m_y(v.y()), m_z(v.z()) {} - -inline const Vector3 & Vector3::operator=(Vector3::Arg v) -{ - m_x = v.m_x; - m_y = v.m_y; - m_z = v.m_z; - return *this; -} - -inline scalar Vector3::x() const { return m_x; } -inline scalar Vector3::y() const { return m_y; } -inline scalar Vector3::z() const { return m_z; } - -inline Vector2 Vector3::xy() const -{ - return Vector2(m_x, m_y); -} - -inline scalar Vector3::component(uint idx) const -{ - nvDebugCheck(idx < 3); - if (idx == 0) return x(); - if (idx == 1) return y(); - if (idx == 2) return z(); - nvAssume(false); - return 0.0f; -} - -inline const scalar * Vector3::ptr() const -{ - return &m_x; -} - -inline void Vector3::set(scalar x, scalar y, scalar z) -{ - m_x = x; - m_y = y; - m_z = z; -} - -inline Vector3 Vector3::operator-() const -{ - return Vector3(-m_x, -m_y, -m_z); -} - -inline void Vector3::operator+=(Vector3::Arg v) -{ - m_x += v.m_x; - m_y += v.m_y; - m_z += v.m_z; -} - -inline void Vector3::operator-=(Vector3::Arg v) -{ - m_x -= v.m_x; - m_y -= v.m_y; - m_z -= v.m_z; -} - -inline void Vector3::operator*=(scalar s) -{ - m_x *= s; - m_y *= s; - m_z *= s; -} - -inline void Vector3::operator/=(scalar s) -{ - float is = 1.0f / s; - m_x *= is; - m_y *= is; - m_z *= is; -} - -inline void Vector3::operator*=(Vector3::Arg v) -{ - m_x *= v.m_x; - m_y *= v.m_y; - m_z *= v.m_z; -} - -inline bool operator==(Vector3::Arg a, Vector3::Arg b) -{ - return a.m_x == b.m_x && a.m_y == b.m_y && a.m_z == b.m_z; -} -inline bool operator!=(Vector3::Arg a, Vector3::Arg b) -{ - return a.m_x != b.m_x || a.m_y != b.m_y || a.m_z != b.m_z; -} - - -// Vector4 - -inline Vector4::Vector4() {} -inline Vector4::Vector4(zero_t) : m_x(0.0f), m_y(0.0f), m_z(0.0f), m_w(0.0f) {} -inline Vector4::Vector4(scalar x, scalar y, scalar z, scalar w) : m_x(x), m_y(y), m_z(z), m_w(w) {} -inline Vector4::Vector4(Vector2::Arg v, scalar z, scalar w) : m_x(v.x()), m_y(v.y()), m_z(z), m_w(w) {} -inline Vector4::Vector4(Vector3::Arg v, scalar w) : m_x(v.x()), m_y(v.y()), m_z(v.z()), m_w(w) {} -inline Vector4::Vector4(Vector4::Arg v) : m_x(v.x()), m_y(v.y()), m_z(v.z()), m_w(v.w()) {} - -inline const Vector4 & Vector4::operator=(const Vector4 & v) -{ - m_x = v.m_x; - m_y = v.m_y; - m_z = v.m_z; - m_w = v.m_w; - return *this; -} - -inline scalar Vector4::x() const { return m_x; } -inline scalar Vector4::y() const { return m_y; } -inline scalar Vector4::z() const { return m_z; } -inline scalar Vector4::w() const { return m_w; } - -inline Vector2 Vector4::xy() const -{ - return Vector2(m_x, m_y); -} - -inline Vector3 Vector4::xyz() const -{ - return Vector3(m_x, m_y, m_z); -} - -inline scalar Vector4::component(uint idx) const -{ - nvDebugCheck(idx < 4); - if (idx == 0) return x(); - if (idx == 1) return y(); - if (idx == 2) return z(); - if (idx == 3) return w(); - nvAssume(false); - return 0.0f; -} - -inline const scalar * Vector4::ptr() const -{ - return &m_x; -} - -inline void Vector4::set(scalar x, scalar y, scalar z, scalar w) -{ - m_x = x; - m_y = y; - m_z = z; - m_w = w; -} - -inline Vector4 Vector4::operator-() const -{ - return Vector4(-m_x, -m_y, -m_z, -m_w); -} - -inline void Vector4::operator+=(Vector4::Arg v) -{ - m_x += v.m_x; - m_y += v.m_y; - m_z += v.m_z; - m_w += v.m_w; -} - -inline void Vector4::operator-=(Vector4::Arg v) -{ - m_x -= v.m_x; - m_y -= v.m_y; - m_z -= v.m_z; - m_w -= v.m_w; -} - -inline void Vector4::operator*=(scalar s) -{ - m_x *= s; - m_y *= s; - m_z *= s; - m_w *= s; -} - -inline void Vector4::operator*=(Vector4::Arg v) -{ - m_x *= v.m_x; - m_y *= v.m_y; - m_z *= v.m_z; - m_w *= v.m_w; -} - -inline bool operator==(Vector4::Arg a, Vector4::Arg b) -{ - return a.m_x == b.m_x && a.m_y == b.m_y && a.m_z == b.m_z && a.m_w == b.m_w; -} -inline bool operator!=(Vector4::Arg a, Vector4::Arg b) -{ - return a.m_x != b.m_x || a.m_y != b.m_y || a.m_z != b.m_z || a.m_w != b.m_w; -} - - - -// Functions - - -// Vector2 - -inline Vector2 add(Vector2::Arg a, Vector2::Arg b) -{ - return Vector2(a.x() + b.x(), a.y() + b.y()); -} -inline Vector2 operator+(Vector2::Arg a, Vector2::Arg b) -{ - return add(a, b); -} - -inline Vector2 sub(Vector2::Arg a, Vector2::Arg b) -{ - return Vector2(a.x() - b.x(), a.y() - b.y()); -} -inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b) -{ - return sub(a, b); -} - -inline Vector2 scale(Vector2::Arg v, scalar s) -{ - return Vector2(v.x() * s, v.y() * s); -} - -inline Vector2 scale(Vector2::Arg v, Vector2::Arg s) -{ - return Vector2(v.x() * s.x(), v.y() * s.y()); -} - -inline Vector2 operator*(Vector2::Arg v, scalar s) -{ - return scale(v, s); -} - -inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2) -{ - return Vector2(v1.x()*v2.x(), v1.y()*v2.y()); -} - -inline Vector2 operator*(scalar s, Vector2::Arg v) -{ - return scale(v, s); -} - -inline scalar dot(Vector2::Arg a, Vector2::Arg b) -{ - return a.x() * b.x() + a.y() * b.y(); -} - -inline scalar length_squared(Vector2::Arg v) -{ - return v.x() * v.x() + v.y() * v.y(); -} - -inline scalar length(Vector2::Arg v) -{ - return sqrtf(length_squared(v)); -} - -inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) -{ - return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon); -} - -inline Vector2 min(Vector2::Arg a, Vector2::Arg b) -{ - return Vector2(min(a.x(), b.x()), min(a.y(), b.y())); -} - -inline Vector2 max(Vector2::Arg a, Vector2::Arg b) -{ - return Vector2(max(a.x(), b.x()), max(a.y(), b.y())); -} - -inline bool isValid(Vector2::Arg v) -{ - return isFinite(v.x()) && isFinite(v.y()); -} - - -// Vector3 - -inline Vector3 add(Vector3::Arg a, Vector3::Arg b) -{ - return Vector3(a.x() + b.x(), a.y() + b.y(), a.z() + b.z()); -} -inline Vector3 add(Vector3::Arg a, float b) -{ - return Vector3(a.x() + b, a.y() + b, a.z() + b); -} -inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b) -{ - return add(a, b); -} -inline Vector3 operator+(Vector3::Arg a, float b) -{ - return add(a, b); -} - -inline Vector3 sub(Vector3::Arg a, Vector3::Arg b) -{ - return Vector3(a.x() - b.x(), a.y() - b.y(), a.z() - b.z()); -} -inline Vector3 sub(Vector3::Arg a, float b) -{ - return Vector3(a.x() - b, a.y() - b, a.z() - b); -} -inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b) -{ - return sub(a, b); -} -inline Vector3 operator-(Vector3::Arg a, float b) -{ - return sub(a, b); -} - -inline Vector3 cross(Vector3::Arg a, Vector3::Arg b) -{ - return Vector3(a.y() * b.z() - a.z() * b.y(), a.z() * b.x() - a.x() * b.z(), a.x() * b.y() - a.y() * b.x()); -} - -inline Vector3 scale(Vector3::Arg v, scalar s) -{ - return Vector3(v.x() * s, v.y() * s, v.z() * s); -} - -inline Vector3 scale(Vector3::Arg v, Vector3::Arg s) -{ - return Vector3(v.x() * s.x(), v.y() * s.y(), v.z() * s.z()); -} - -inline Vector3 operator*(Vector3::Arg v, scalar s) -{ - return scale(v, s); -} - -inline Vector3 operator*(scalar s, Vector3::Arg v) -{ - return scale(v, s); -} - -inline Vector3 operator*(Vector3::Arg v, Vector3::Arg s) -{ - return scale(v, s); -} - -inline Vector3 operator/(Vector3::Arg v, scalar s) -{ - return scale(v, 1.0f/s); -} - -inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, scalar s) -{ - return Vector3(a.x() + b.x() * s, a.y() + b.y() * s, a.z() + b.z() * s); -} - -inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, scalar t) -{ - const scalar s = 1.0f - t; - return Vector3(v1.x() * s + t * v2.x(), v1.y() * s + t * v2.y(), v1.z() * s + t * v2.z()); -} - -inline scalar dot(Vector3::Arg a, Vector3::Arg b) -{ - return a.x() * b.x() + a.y() * b.y() + a.z() * b.z(); -} - -inline scalar length_squared(Vector3::Arg v) -{ - return v.x() * v.x() + v.y() * v.y() + v.z() * v.z(); -} - -inline scalar length(Vector3::Arg v) -{ - return sqrtf(length_squared(v)); -} - -inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON) -{ - return equal(length(v), 1, epsilon); -} - -inline Vector3 normalize(Vector3::Arg v, float epsilon = NV_EPSILON) -{ - float l = length(v); - nvDebugCheck(!isZero(l, epsilon)); - Vector3 n = scale(v, 1.0f / l); - nvDebugCheck(isNormalized(n)); - return n; -} - -inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilon = NV_EPSILON) -{ - float l = length(v); - if (isZero(l, epsilon)) { - return fallback; - } - return scale(v, 1.0f / l); -} - -inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON) -{ - return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon) && equal(v1.z(), v2.z(), epsilon); -} - -inline Vector3 min(Vector3::Arg a, Vector3::Arg b) -{ - return Vector3(min(a.x(), b.x()), min(a.y(), b.y()), min(a.z(), b.z())); -} - -inline Vector3 max(Vector3::Arg a, Vector3::Arg b) -{ - return Vector3(max(a.x(), b.x()), max(a.y(), b.y()), max(a.z(), b.z())); -} - -inline Vector3 clamp(Vector3::Arg v, float min, float max) -{ - return Vector3(clamp(v.x(), min, max), clamp(v.y(), min, max), clamp(v.z(), min, max)); -} - -inline bool isValid(Vector3::Arg v) -{ - return isFinite(v.x()) && isFinite(v.y()) && isFinite(v.z()); -} - -/* -Vector3 transform(Quaternion, vector3); -Vector3 transform_point(matrix34, vector3); -Vector3 transform_vector(matrix34, vector3); -Vector3 transform_point(matrix44, vector3); -Vector3 transform_vector(matrix44, vector3); -*/ - -// Vector4 - -inline Vector4 add(Vector4::Arg a, Vector4::Arg b) -{ - return Vector4(a.x() + b.x(), a.y() + b.y(), a.z() + b.z(), a.w() + b.w()); -} -inline Vector4 operator+(Vector4::Arg a, Vector4::Arg b) -{ - return add(a, b); -} - -inline Vector4 sub(Vector4::Arg a, Vector4::Arg b) -{ - return Vector4(a.x() - b.x(), a.y() - b.y(), a.z() - b.z(), a.w() - b.w()); -} -inline Vector4 operator-(Vector4::Arg a, Vector4::Arg b) -{ - return sub(a, b); -} - -inline Vector4 scale(Vector4::Arg v, scalar s) -{ - return Vector4(v.x() * s, v.y() * s, v.z() * s, v.w() * s); -} - -inline Vector4 scale(Vector4::Arg v, Vector4::Arg s) -{ - return Vector4(v.x() * s.x(), v.y() * s.y(), v.z() * s.z(), v.w() * s.w()); -} - -inline Vector4 operator*(Vector4::Arg v, scalar s) -{ - return scale(v, s); -} - -inline Vector4 operator*(scalar s, Vector4::Arg v) -{ - return scale(v, s); -} - -inline Vector4 operator/(Vector4::Arg v, scalar s) -{ - return scale(v, 1.0f/s); -} - -inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, scalar s) -{ - return Vector4(a.x() + b.x() * s, a.y() + b.y() * s, a.z() + b.z() * s, a.w() + b.w() * s); -} - -inline scalar dot(Vector4::Arg a, Vector4::Arg b) -{ - return a.x() * b.x() + a.y() * b.y() + a.z() * b.z() + a.w() * b.w(); -} - -inline scalar length_squared(Vector4::Arg v) -{ - return v.x() * v.x() + v.y() * v.y() + v.z() * v.z() + v.w() * v.w(); -} - -inline scalar length(Vector4::Arg v) -{ - return sqrtf(length_squared(v)); -} - -inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON) -{ - return equal(length(v), 1, epsilon); -} - -inline Vector4 normalize(Vector4::Arg v, float epsilon = NV_EPSILON) -{ - float l = length(v); - nvDebugCheck(!isZero(l, epsilon)); - Vector4 n = scale(v, 1.0f / l); - nvDebugCheck(isNormalized(n)); - return n; -} - -inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilon = NV_EPSILON) -{ - float l = length(v); - if (isZero(l, epsilon)) { - return fallback; - } - return scale(v, 1.0f / l); -} - -inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON) -{ - return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon) && equal(v1.z(), v2.z(), epsilon) && equal(v1.w(), v2.w(), epsilon); -} - -inline Vector4 min(Vector4::Arg a, Vector4::Arg b) -{ - return Vector4(min(a.x(), b.x()), min(a.y(), b.y()), min(a.z(), b.z()), min(a.w(), b.w())); -} - -inline Vector4 max(Vector4::Arg a, Vector4::Arg b) -{ - return Vector4(max(a.x(), b.x()), max(a.y(), b.y()), max(a.z(), b.z()), max(a.w(), b.w())); -} - -inline bool isValid(Vector4::Arg v) -{ - return isFinite(v.x()) && isFinite(v.y()) && isFinite(v.z()) && isFinite(v.w()); -} - - - -/* -vector4 transform(matrix34, vector4); -vector4 transform(matrix44, vector4); -*/ - -/* -Quaternion mul(Quaternion, Quaternion); // rotational composition -Quaternion conjugate(Quaternion); -Quaternion inverse(Quaternion); -Quaternion axis_angle(const Vector3 & v, scalar s); -*/ - -/* -matrix34 add(matrix34, matrix34); // note: implicit '1' stays as '1' -matrix34 operator+(matrix34, matrix34); -matrix34 sub(matrix34, matrix34); // note: implicit '1' stays as '1' -matrix34 operator-(matrix34, matrix34); -matrix34 mul(matrix34, matrix34); -matrix34 operator*(matrix34, matrix34); -matrix34 mul(matrix34, quaternion4); // rotation multiplication -matrix34 operator*(matrix34, quaternion4); // rotation multiplication -matrix34 translation(vector3); -matrix34 rotation(quaternion4); -matrix34 rotation(vector3, scalar); // axis/angle - -matrix44 add(matrix44, matrix44); -matrix44 operator+(matrix44, matrix44); -matrix44 sub(matrix44, matrix44); -matrix44 operator-(matrix44, matrix44); -matrix44 mul(matrix44, matrix44); -matrix44 operator*(matrix44, matrix44); -matrix44 mul(matrix44, quaternion4); // rotation multiplication -matrix44 operator*(matrix44, quaternion4); // rotation multiplication -matrix44 invert(matrix34); -matrix44 invert(matrix44); -matrix44 transpose(matrix34); -matrix44 transpose(matrix44); -*/ + class NVMATH_CLASS Vector2 + { + public: + typedef Vector2 const & Arg; + + Vector2(); + explicit Vector2(float f); + Vector2(float x, float y); + Vector2(Vector2::Arg v); + + //template explicit Vector2(const T & v) : x(v.x), y(v.y) {} + //template operator T() const { return T(x, y); } + + const Vector2 & operator=(Vector2::Arg v); + + const float * ptr() const; + + void set(float x, float y); + + Vector2 operator-() const; + void operator+=(Vector2::Arg v); + void operator-=(Vector2::Arg v); + void operator*=(float s); + void operator*=(Vector2::Arg v); + + friend bool operator==(Vector2::Arg a, Vector2::Arg b); + friend bool operator!=(Vector2::Arg a, Vector2::Arg b); + + union { + struct { + float x, y; + }; + float component[2]; + }; + }; + + class NVMATH_CLASS Vector3 + { + public: + typedef Vector3 const & Arg; + + Vector3(); + explicit Vector3(float x); + //explicit Vector3(int x) : x(float(x)), y(float(x)), z(float(x)) {} + Vector3(float x, float y, float z); + Vector3(Vector2::Arg v, float z); + Vector3(Vector3::Arg v); + + //template explicit Vector3(const T & v) : x(v.x), y(v.y), z(v.z) {} + //template operator T() const { return T(x, y, z); } + + const Vector3 & operator=(Vector3::Arg v); + + Vector2 xy() const; + + const float * ptr() const; + + void set(float x, float y, float z); + + Vector3 operator-() const; + void operator+=(Vector3::Arg v); + void operator-=(Vector3::Arg v); + void operator*=(float s); + void operator/=(float s); + void operator*=(Vector3::Arg v); + void operator/=(Vector3::Arg v); + + friend bool operator==(Vector3::Arg a, Vector3::Arg b); + friend bool operator!=(Vector3::Arg a, Vector3::Arg b); + + union { + struct { + float x, y, z; + }; + float component[3]; + }; + }; + + class NVMATH_CLASS Vector4 + { + public: + typedef Vector4 const & Arg; + + Vector4(); + explicit Vector4(float x); + Vector4(float x, float y, float z, float w); + Vector4(Vector2::Arg v, float z, float w); + Vector4(Vector2::Arg v, Vector2::Arg u); + Vector4(Vector3::Arg v, float w); + Vector4(Vector4::Arg v); + // Vector4(const Quaternion & v); + + //template explicit Vector4(const T & v) : x(v.x), y(v.y), z(v.z), w(v.w) {} + //template operator T() const { return T(x, y, z, w); } + + const Vector4 & operator=(Vector4::Arg v); + + Vector2 xy() const; + Vector2 zw() const; + Vector3 xyz() const; + + const float * ptr() const; + + void set(float x, float y, float z, float w); + + Vector4 operator-() const; + void operator+=(Vector4::Arg v); + void operator-=(Vector4::Arg v); + void operator*=(float s); + void operator/=(float s); + void operator*=(Vector4::Arg v); + void operator/=(Vector4::Arg v); + + friend bool operator==(Vector4::Arg a, Vector4::Arg b); + friend bool operator!=(Vector4::Arg a, Vector4::Arg b); + + union { + struct { + float x, y, z, w; + }; + float component[4]; + }; + }; } // nv namespace +// If we had these functions, they would be ambiguous, the compiler would not know which one to pick: +//template Vector2 to(const T & v) { return Vector2(v.x, v.y); } +//template Vector3 to(const T & v) { return Vector3(v.x, v.y, v.z); } +//template Vector4 to(const T & v) { return Vector4(v.x, v.y, v.z, v.z); } + +// We could use a cast operator so that we could infer the expected type, but that doesn't work the same way in all compilers and produces horrible error messages. + +// Instead we simply have explicit casts: +template T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); } +template T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); } +template T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.w); } + #endif // NV_MATH_VECTOR_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Vector.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Vector.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Vector.cpp @@ -0,0 +1,4 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Vector.h" +#include "Vector.inl" Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Vector.inl =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Vector.inl +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Vector.inl @@ -0,0 +1,919 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_VECTOR_INL +#define NV_MATH_VECTOR_INL + +#include "Vector.h" +#include "nvcore/Utils.h" // min, max +#include "nvcore/Hash.h" // hash + +namespace nv +{ + + // Helpers to convert vector types. Assume T has x,y members and 2 argument constructor. + //template T to(Vector2::Arg v) { return T(v.x, v.y); } + + // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor. + //template T to(Vector3::Arg v) { return T(v.x, v.y, v.z); } + + // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor. + //template T to(Vector4::Arg v) { return T(v.x, v.y, v.z, v.w); } + + + // Vector2 + inline Vector2::Vector2() {} + inline Vector2::Vector2(float f) : x(f), y(f) {} + inline Vector2::Vector2(float x, float y) : x(x), y(y) {} + inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {} + + inline const Vector2 & Vector2::operator=(Vector2::Arg v) + { + x = v.x; + y = v.y; + return *this; + } + + inline const float * Vector2::ptr() const + { + return &x; + } + + inline void Vector2::set(float x, float y) + { + this->x = x; + this->y = y; + } + + inline Vector2 Vector2::operator-() const + { + return Vector2(-x, -y); + } + + inline void Vector2::operator+=(Vector2::Arg v) + { + x += v.x; + y += v.y; + } + + inline void Vector2::operator-=(Vector2::Arg v) + { + x -= v.x; + y -= v.y; + } + + inline void Vector2::operator*=(float s) + { + x *= s; + y *= s; + } + + inline void Vector2::operator*=(Vector2::Arg v) + { + x *= v.x; + y *= v.y; + } + + inline bool operator==(Vector2::Arg a, Vector2::Arg b) + { + return a.x == b.x && a.y == b.y; + } + inline bool operator!=(Vector2::Arg a, Vector2::Arg b) + { + return a.x != b.x || a.y != b.y; + } + + + // Vector3 + inline Vector3::Vector3() {} + inline Vector3::Vector3(float f) : x(f), y(f), z(f) {} + inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {} + inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {} + inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {} + + inline const Vector3 & Vector3::operator=(Vector3::Arg v) + { + x = v.x; + y = v.y; + z = v.z; + return *this; + } + + + inline Vector2 Vector3::xy() const + { + return Vector2(x, y); + } + + inline const float * Vector3::ptr() const + { + return &x; + } + + inline void Vector3::set(float x, float y, float z) + { + this->x = x; + this->y = y; + this->z = z; + } + + inline Vector3 Vector3::operator-() const + { + return Vector3(-x, -y, -z); + } + + inline void Vector3::operator+=(Vector3::Arg v) + { + x += v.x; + y += v.y; + z += v.z; + } + + inline void Vector3::operator-=(Vector3::Arg v) + { + x -= v.x; + y -= v.y; + z -= v.z; + } + + inline void Vector3::operator*=(float s) + { + x *= s; + y *= s; + z *= s; + } + + inline void Vector3::operator/=(float s) + { + float is = 1.0f / s; + x *= is; + y *= is; + z *= is; + } + + inline void Vector3::operator*=(Vector3::Arg v) + { + x *= v.x; + y *= v.y; + z *= v.z; + } + + inline void Vector3::operator/=(Vector3::Arg v) + { + x /= v.x; + y /= v.y; + z /= v.z; + } + + inline bool operator==(Vector3::Arg a, Vector3::Arg b) + { + return a.x == b.x && a.y == b.y && a.z == b.z; + } + inline bool operator!=(Vector3::Arg a, Vector3::Arg b) + { + return a.x != b.x || a.y != b.y || a.z != b.z; + } + + + // Vector4 + inline Vector4::Vector4() {} + inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {} + inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {} + inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {} + inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {} + inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {} + inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {} + + inline const Vector4 & Vector4::operator=(const Vector4 & v) + { + x = v.x; + y = v.y; + z = v.z; + w = v.w; + return *this; + } + + inline Vector2 Vector4::xy() const + { + return Vector2(x, y); + } + + inline Vector2 Vector4::zw() const + { + return Vector2(z, w); + } + + inline Vector3 Vector4::xyz() const + { + return Vector3(x, y, z); + } + + inline const float * Vector4::ptr() const + { + return &x; + } + + inline void Vector4::set(float x, float y, float z, float w) + { + this->x = x; + this->y = y; + this->z = z; + this->w = w; + } + + inline Vector4 Vector4::operator-() const + { + return Vector4(-x, -y, -z, -w); + } + + inline void Vector4::operator+=(Vector4::Arg v) + { + x += v.x; + y += v.y; + z += v.z; + w += v.w; + } + + inline void Vector4::operator-=(Vector4::Arg v) + { + x -= v.x; + y -= v.y; + z -= v.z; + w -= v.w; + } + + inline void Vector4::operator*=(float s) + { + x *= s; + y *= s; + z *= s; + w *= s; + } + + inline void Vector4::operator/=(float s) + { + x /= s; + y /= s; + z /= s; + w /= s; + } + + inline void Vector4::operator*=(Vector4::Arg v) + { + x *= v.x; + y *= v.y; + z *= v.z; + w *= v.w; + } + + inline void Vector4::operator/=(Vector4::Arg v) + { + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + } + + inline bool operator==(Vector4::Arg a, Vector4::Arg b) + { + return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; + } + inline bool operator!=(Vector4::Arg a, Vector4::Arg b) + { + return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; + } + + + + // Functions + + + // Vector2 + + inline Vector2 add(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(a.x + b.x, a.y + b.y); + } + inline Vector2 operator+(Vector2::Arg a, Vector2::Arg b) + { + return add(a, b); + } + + inline Vector2 sub(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(a.x - b.x, a.y - b.y); + } + inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b) + { + return sub(a, b); + } + + inline Vector2 scale(Vector2::Arg v, float s) + { + return Vector2(v.x * s, v.y * s); + } + + inline Vector2 scale(Vector2::Arg v, Vector2::Arg s) + { + return Vector2(v.x * s.x, v.y * s.y); + } + + inline Vector2 operator*(Vector2::Arg v, float s) + { + return scale(v, s); + } + + inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2) + { + return Vector2(v1.x*v2.x, v1.y*v2.y); + } + + inline Vector2 operator*(float s, Vector2::Arg v) + { + return scale(v, s); + } + + inline Vector2 operator/(Vector2::Arg v, float s) + { + return scale(v, 1.0f/s); + } + + inline Vector2 lerp(Vector2::Arg v1, Vector2::Arg v2, float t) + { + const float s = 1.0f - t; + return Vector2(v1.x * s + t * v2.x, v1.y * s + t * v2.y); + } + + inline float dot(Vector2::Arg a, Vector2::Arg b) + { + return a.x * b.x + a.y * b.y; + } + + inline float lengthSquared(Vector2::Arg v) + { + return v.x * v.x + v.y * v.y; + } + + inline float length(Vector2::Arg v) + { + return sqrtf(lengthSquared(v)); + } + + inline float distance(Vector2::Arg a, Vector2::Arg b) + { + return length(a - b); + } + + inline float inverseLength(Vector2::Arg v) + { + return 1.0f / sqrtf(lengthSquared(v)); + } + + inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(v), 1, epsilon); + } + + inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON) + { + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector2 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON) + { + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); + } + + // Safe, branchless normalization from Andy Firth. All error checking ommitted. + // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ + inline Vector2 normalizeFast(Vector2::Arg v) + { + const float very_small_float = 1.0e-037f; + float l = very_small_float + length(v); + return scale(v, 1.0f / l); + } + + inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) + { + return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon); + } + + inline Vector2 min(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(min(a.x, b.x), min(a.y, b.y)); + } + + inline Vector2 max(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(max(a.x, b.x), max(a.y, b.y)); + } + + inline Vector2 clamp(Vector2::Arg v, float min, float max) + { + return Vector2(clamp(v.x, min, max), clamp(v.y, min, max)); + } + + inline Vector2 saturate(Vector2::Arg v) + { + return Vector2(saturate(v.x), saturate(v.y)); + } + + inline bool isFinite(Vector2::Arg v) + { + return isFinite(v.x) && isFinite(v.y); + } + + inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f)) + { + if (!isFinite(v)) return fallback; + Vector2 vf = v; + nv::floatCleanup(vf.component, 2); + return vf; + } + + // Note, this is the area scaled by 2! + inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1) + { + return (v0.x * v1.y - v0.y * v1.x); // * 0.5f; + } + inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c) + { + // IC: While it may be appealing to use the following expression: + //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f; + + // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point + // numbers and the results becomes very unstable and dependent on the order of the factors. + + // Instead, it's preferable to substract the vertices first, and multiply the resulting small values together. The result + // in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of + // the triangle. + + //return ((a.x - c.x) * (b.y - c.y) - (a.y - c.y) * (b.x - c.x)); // * 0.5f; + return triangleArea(a-c, b-c); + } + + + template <> + inline uint hash(const Vector2 & v, uint h) + { + return sdbmFloatHash(v.component, 2, h); + } + + + + // Vector3 + + inline Vector3 add(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(a.x + b.x, a.y + b.y, a.z + b.z); + } + inline Vector3 add(Vector3::Arg a, float b) + { + return Vector3(a.x + b, a.y + b, a.z + b); + } + inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b) + { + return add(a, b); + } + inline Vector3 operator+(Vector3::Arg a, float b) + { + return add(a, b); + } + + inline Vector3 sub(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(a.x - b.x, a.y - b.y, a.z - b.z); + } + inline Vector3 sub(Vector3::Arg a, float b) + { + return Vector3(a.x - b, a.y - b, a.z - b); + } + inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b) + { + return sub(a, b); + } + inline Vector3 operator-(Vector3::Arg a, float b) + { + return sub(a, b); + } + + inline Vector3 cross(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); + } + + inline Vector3 scale(Vector3::Arg v, float s) + { + return Vector3(v.x * s, v.y * s, v.z * s); + } + + inline Vector3 scale(Vector3::Arg v, Vector3::Arg s) + { + return Vector3(v.x * s.x, v.y * s.y, v.z * s.z); + } + + inline Vector3 operator*(Vector3::Arg v, float s) + { + return scale(v, s); + } + + inline Vector3 operator*(float s, Vector3::Arg v) + { + return scale(v, s); + } + + inline Vector3 operator*(Vector3::Arg v, Vector3::Arg s) + { + return scale(v, s); + } + + inline Vector3 operator/(Vector3::Arg v, float s) + { + return scale(v, 1.0f/s); + } + + /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s) + { + return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s); + }*/ + + inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t) + { + const float s = 1.0f - t; + return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z); + } + + inline float dot(Vector3::Arg a, Vector3::Arg b) + { + return a.x * b.x + a.y * b.y + a.z * b.z; + } + + inline float lengthSquared(Vector3::Arg v) + { + return v.x * v.x + v.y * v.y + v.z * v.z; + } + + inline float length(Vector3::Arg v) + { + return sqrtf(lengthSquared(v)); + } + + inline float distance(Vector3::Arg a, Vector3::Arg b) + { + return length(a - b); + } + + inline float distanceSquared(Vector3::Arg a, Vector3::Arg b) + { + return lengthSquared(a - b); + } + + inline float inverseLength(Vector3::Arg v) + { + return 1.0f / sqrtf(lengthSquared(v)); + } + + inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(v), 1, epsilon); + } + + inline Vector3 normalize(Vector3::Arg v, float epsilon = NV_EPSILON) + { + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector3 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilon = NV_EPSILON) + { + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); + } + + // Safe, branchless normalization from Andy Firth. All error checking ommitted. + // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ + inline Vector3 normalizeFast(Vector3::Arg v) + { + const float very_small_float = 1.0e-037f; + float l = very_small_float + length(v); + return scale(v, 1.0f / l); + } + + inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON) + { + return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon); + } + + inline Vector3 min(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); + } + + inline Vector3 max(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); + } + + inline Vector3 clamp(Vector3::Arg v, float min, float max) + { + return Vector3(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max)); + } + + inline Vector3 saturate(Vector3::Arg v) + { + return Vector3(saturate(v.x), saturate(v.y), saturate(v.z)); + } + + inline Vector3 floor(Vector3::Arg v) + { + return Vector3(floorf(v.x), floorf(v.y), floorf(v.z)); + } + + inline Vector3 ceil(Vector3::Arg v) + { + return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z)); + } + + inline bool isFinite(Vector3::Arg v) + { + return isFinite(v.x) && isFinite(v.y) && isFinite(v.z); + } + + inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f)) + { + if (!isFinite(v)) return fallback; + Vector3 vf = v; + nv::floatCleanup(vf.component, 3); + return vf; + } + + inline Vector3 reflect(Vector3::Arg v, Vector3::Arg n) + { + return v - (2 * dot(v, n)) * n; + } + + template <> + inline uint hash(const Vector3 & v, uint h) + { + return sdbmFloatHash(v.component, 3, h); + } + + + // Vector4 + + inline Vector4 add(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + inline Vector4 operator+(Vector4::Arg a, Vector4::Arg b) + { + return add(a, b); + } + + inline Vector4 sub(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + inline Vector4 operator-(Vector4::Arg a, Vector4::Arg b) + { + return sub(a, b); + } + + inline Vector4 scale(Vector4::Arg v, float s) + { + return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); + } + + inline Vector4 scale(Vector4::Arg v, Vector4::Arg s) + { + return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w); + } + + inline Vector4 operator*(Vector4::Arg v, float s) + { + return scale(v, s); + } + + inline Vector4 operator*(float s, Vector4::Arg v) + { + return scale(v, s); + } + + inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s) + { + return scale(v, s); + } + + inline Vector4 operator/(Vector4::Arg v, float s) + { + return scale(v, 1.0f/s); + } + + /*inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s) + { + return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s); + }*/ + + inline Vector4 lerp(Vector4::Arg v1, Vector4::Arg v2, float t) + { + const float s = 1.0f - t; + return Vector4(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z, v1.w * s + t * v2.w); + } + + inline float dot(Vector4::Arg a, Vector4::Arg b) + { + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; + } + + inline float lengthSquared(Vector4::Arg v) + { + return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; + } + + inline float length(Vector4::Arg v) + { + return sqrtf(lengthSquared(v)); + } + + inline float inverseLength(Vector4::Arg v) + { + return 1.0f / sqrtf(lengthSquared(v)); + } + + inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(v), 1, epsilon); + } + + inline Vector4 normalize(Vector4::Arg v, float epsilon = NV_EPSILON) + { + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector4 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilon = NV_EPSILON) + { + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); + } + + // Safe, branchless normalization from Andy Firth. All error checking ommitted. + // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ + inline Vector4 normalizeFast(Vector4::Arg v) + { + const float very_small_float = 1.0e-037f; + float l = very_small_float + length(v); + return scale(v, 1.0f / l); + } + + inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON) + { + return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon); + } + + inline Vector4 min(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); + } + + inline Vector4 max(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); + } + + inline Vector4 clamp(Vector4::Arg v, float min, float max) + { + return Vector4(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max), clamp(v.w, min, max)); + } + + inline Vector4 saturate(Vector4::Arg v) + { + return Vector4(saturate(v.x), saturate(v.y), saturate(v.z), saturate(v.w)); + } + + inline bool isFinite(Vector4::Arg v) + { + return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w); + } + + inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f)) + { + if (!isFinite(v)) return fallback; + Vector4 vf = v; + nv::floatCleanup(vf.component, 4); + return vf; + } + + template <> + inline uint hash(const Vector4 & v, uint h) + { + return sdbmFloatHash(v.component, 4, h); + } + + +#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float + + //int: + + inline Vector2 scale(Vector2::Arg v, int s) + { + return Vector2(v.x * s, v.y * s); + } + + inline Vector2 operator*(Vector2::Arg v, int s) + { + return scale(v, s); + } + + inline Vector2 operator*(int s, Vector2::Arg v) + { + return scale(v, s); + } + + inline Vector2 operator/(Vector2::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + inline Vector3 scale(Vector3::Arg v, int s) + { + return Vector3(v.x * s, v.y * s, v.z * s); + } + + inline Vector3 operator*(Vector3::Arg v, int s) + { + return scale(v, s); + } + + inline Vector3 operator*(int s, Vector3::Arg v) + { + return scale(v, s); + } + + inline Vector3 operator/(Vector3::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + inline Vector4 scale(Vector4::Arg v, int s) + { + return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); + } + + inline Vector4 operator*(Vector4::Arg v, int s) + { + return scale(v, s); + } + + inline Vector4 operator*(int s, Vector4::Arg v) + { + return scale(v, s); + } + + inline Vector4 operator/(Vector4::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + //double: + + inline Vector3 operator*(Vector3::Arg v, double s) + { + return scale(v, (float)s); + } + + inline Vector3 operator*(double s, Vector3::Arg v) + { + return scale(v, (float)s); + } + + inline Vector3 operator/(Vector3::Arg v, double s) + { + return scale(v, 1.f/((float)s)); + } + +#endif //NV_OS_IOS + +} // nv namespace + +#endif // NV_MATH_VECTOR_INL Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/ftoi.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/ftoi.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/ftoi.h @@ -0,0 +1,256 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_MATH_FTOI_H +#define NV_MATH_FTOI_H + +#include "nvmath/nvmath.h" + +#include + +namespace nv +{ + // Optimized float to int conversions. See: + // http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html + // http://www.stereopsis.com/sree/fpu2006.html + // http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/ + // http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point + + + union DoubleAnd64 { + uint64 i; + double d; + }; + + static const double floatutil_xs_doublemagic = (6755399441055744.0); // 2^52 * 1.5 + static const double floatutil_xs_doublemagicdelta = (1.5e-8); // almost .5f = .5f + 1e^(number of exp bit) + static const double floatutil_xs_doublemagicroundeps = (0.5f - floatutil_xs_doublemagicdelta); // almost .5f = .5f - 1e^(number of exp bit) + + NV_FORCEINLINE int ftoi_round_xs(double val, double magic) { +#if 1 + DoubleAnd64 dunion; + dunion.d = val + magic; + return (int32) dunion.i; // just cast to grab the bottom bits +#else + val += magic; + return ((int*)&val)[0]; // @@ Assumes little endian. +#endif + } + + NV_FORCEINLINE int ftoi_round_xs(float val) { + return ftoi_round_xs(val, floatutil_xs_doublemagic); + } + + NV_FORCEINLINE int ftoi_floor_xs(float val) { + return ftoi_round_xs(val - floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic); + } + + NV_FORCEINLINE int ftoi_ceil_xs(float val) { + return ftoi_round_xs(val + floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic); + } + + NV_FORCEINLINE int ftoi_trunc_xs(float val) { + return (val<0) ? ftoi_ceil_xs(val) : ftoi_floor_xs(val); + } + +#if NV_CPU_X86 || NV_CPU_X86_64 + + NV_FORCEINLINE int ftoi_round_sse(float f) { + return _mm_cvt_ss2si(_mm_set_ss(f)); + } + + NV_FORCEINLINE int ftoi_trunc_sse(float f) { + return _mm_cvtt_ss2si(_mm_set_ss(f)); + } + +#endif + + + +#if NV_USE_SSE + + NV_FORCEINLINE int ftoi_round(float val) { + return ftoi_round_sse(val); + } + + NV_FORCEINLINE int ftoi_trunc(float f) { + return ftoi_trunc_sse(f); + } + + // We can probably do better than this. See for example: + // http://dss.stephanierct.com/DevBlog/?p=8 + NV_FORCEINLINE int ftoi_floor(float val) { + return ftoi_round(floorf(val)); + } + + NV_FORCEINLINE int ftoi_ceil(float val) { + return ftoi_round(ceilf(val)); + } + +#else + + // In theory this should work with any double floating point math implementation, but it appears that MSVC produces incorrect code + // when SSE2 is targeted and fast math is enabled (/arch:SSE2 & /fp:fast). These problems go away with /fp:precise, which is the default mode. + + NV_FORCEINLINE int ftoi_round(float val) { + return ftoi_round_xs(val); + } + + NV_FORCEINLINE int ftoi_floor(float val) { + return ftoi_floor_xs(val); + } + + NV_FORCEINLINE int ftoi_ceil(float val) { + return ftoi_ceil_xs(val); + } + + NV_FORCEINLINE int ftoi_trunc(float f) { + return ftoi_trunc_xs(f); + } + +#endif + + + inline void test_ftoi() { + + // Round to nearest integer. + nvCheck(ftoi_round(0.1f) == 0); + nvCheck(ftoi_round(0.6f) == 1); + nvCheck(ftoi_round(-0.2f) == 0); + nvCheck(ftoi_round(-0.7f) == -1); + nvCheck(ftoi_round(10.1f) == 10); + nvCheck(ftoi_round(10.6f) == 11); + nvCheck(ftoi_round(-90.1f) == -90); + nvCheck(ftoi_round(-90.6f) == -91); + + nvCheck(ftoi_round(0) == 0); + nvCheck(ftoi_round(1) == 1); + nvCheck(ftoi_round(-1) == -1); + + nvCheck(ftoi_round(0.5f) == 0); // How are midpoints rounded? Bankers rounding. + nvCheck(ftoi_round(1.5f) == 2); + nvCheck(ftoi_round(2.5f) == 2); + nvCheck(ftoi_round(3.5f) == 4); + nvCheck(ftoi_round(4.5f) == 4); + nvCheck(ftoi_round(-0.5f) == 0); + nvCheck(ftoi_round(-1.5f) == -2); + + + // Truncation (round down if > 0, round up if < 0). + nvCheck(ftoi_trunc(0.1f) == 0); + nvCheck(ftoi_trunc(0.6f) == 0); + nvCheck(ftoi_trunc(-0.2f) == 0); + nvCheck(ftoi_trunc(-0.7f) == 0); // @@ When using /arch:SSE2 in Win32, msvc produce wrong code for this one. It is skipping the addition. + nvCheck(ftoi_trunc(1.99f) == 1); + nvCheck(ftoi_trunc(-1.2f) == -1); + + // Floor (round down). + nvCheck(ftoi_floor(0.1f) == 0); + nvCheck(ftoi_floor(0.6f) == 0); + nvCheck(ftoi_floor(-0.2f) == -1); + nvCheck(ftoi_floor(-0.7f) == -1); + nvCheck(ftoi_floor(1.99f) == 1); + nvCheck(ftoi_floor(-1.2f) == -2); + + nvCheck(ftoi_floor(0) == 0); + nvCheck(ftoi_floor(1) == 1); + nvCheck(ftoi_floor(-1) == -1); + nvCheck(ftoi_floor(2) == 2); + nvCheck(ftoi_floor(-2) == -2); + + // Ceil (round up). + nvCheck(ftoi_ceil(0.1f) == 1); + nvCheck(ftoi_ceil(0.6f) == 1); + nvCheck(ftoi_ceil(-0.2f) == 0); + nvCheck(ftoi_ceil(-0.7f) == 0); + nvCheck(ftoi_ceil(1.99f) == 2); + nvCheck(ftoi_ceil(-1.2f) == -1); + + nvCheck(ftoi_ceil(0) == 0); + nvCheck(ftoi_ceil(1) == 1); + nvCheck(ftoi_ceil(-1) == -1); + nvCheck(ftoi_ceil(2) == 2); + nvCheck(ftoi_ceil(-2) == -2); + } + + + + + + // Safe versions using standard casts. + + inline int iround(float f) + { + return int(floorf(f + 0.5f)); + } + + inline int iround(double f) + { + return int(::floor(f + 0.5)); + } + + inline int ifloor(float f) + { + return int(floorf(f)); + } + + inline int iceil(float f) + { + return int(ceilf(f)); + } + + + + // I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule. + // Some notes from cbloom: http://cbloomrants.blogspot.com/2011/07/07-26-11-pixel-int-to-float-options.html + + // Quantize a float in the [0,1] range, using exact end points or uniform bins. + inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) { + nvDebugCheck(bits <= 16); + + float range = float(1 << bits); + if (exactEndPoints) { + return floorf(x * (range-1) + 0.5f) / (range-1); + } + else { + return (floorf(x * range) + 0.5f) / range; + } + } + + + // This is the most common rounding mode: + // + // 0 1 2 3 + // |___|_______|_______|___| + // 0 1 + // + // You get that if you take the unit floating point number multiply by 'N-1' and round to nearest. That is, `i = round(f * (N-1))`. + // You reconstruct the original float dividing by 'N-1': `f = i / (N-1)` + + + // 0 1 2 3 + // |_____|_____|_____|_____| + // 0 1 + + /*enum BinningMode { + RoundMode_ExactEndPoints, + RoundMode_UniformBins, + };*/ + + template + inline uint unitFloatToFixed(float f) { + return ftoi_round(f * ((1<(f); + } + + inline uint16 unitFloatToFixed16(float f) { + return (uint16)unitFloatToFixed<16>(f); + } + + +} // nv + +#endif // NV_MATH_FTOI_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/nvmath.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/nvmath.h +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/nvmath.h @@ -1,13 +1,26 @@ // This code is in the public domain -- castanyo@yahoo.es +#pragma once #ifndef NV_MATH_H #define NV_MATH_H -#include -#include +#include "nvcore/nvcore.h" +#include "nvcore/Debug.h" // nvDebugCheck +#include "nvcore/Utils.h" // max, clamp #include +#if NV_OS_WIN32 || NV_OS_XBOX +#include // finite, isnan +#endif + +#if NV_CPU_X86 || NV_CPU_X86_64 + //#include + #include +#endif + + + // Function linkage #if NVMATH_SHARED #ifdef NVMATH_EXPORTS @@ -22,142 +35,295 @@ #define NVMATH_CLASS #endif // NVMATH_SHARED +// Set some reasonable defaults. +#ifndef NV_USE_ALTIVEC +# define NV_USE_ALTIVEC NV_CPU_PPC +//# define NV_USE_ALTIVEC defined(__VEC__) +#endif + +#ifndef NV_USE_SSE +# if NV_CPU_X86_64 + // x64 always supports at least SSE2 +# define NV_USE_SSE 2 +# elif NV_CC_MSVC && defined(_M_IX86_FP) + // Also on x86 with the /arch:SSE flag in MSVC. +# define NV_USE_SSE _M_IX86_FP // 1=SSE, 2=SS2 +# elif defined(__SSE__) +# define NV_USE_SSE 1 +# elif defined(__SSE2__) +# define NV_USE_SSE 2 +# else + // Otherwise we assume no SSE. +# define NV_USE_SSE 0 +# endif +#endif + + +// Internally set NV_USE_SIMD when either altivec or sse is available. +#if NV_USE_ALTIVEC && NV_USE_SSE +# error "Cannot enable both altivec and sse!" +#endif + + + #ifndef PI -#define PI float(3.1415926535897932384626433833) +#define PI float(3.1415926535897932384626433833) #endif -#define NV_EPSILON (0.0001f) -#define NV_NORMAL_EPSILON (0.001f) +#define NV_EPSILON (0.0001f) +#define NV_NORMAL_EPSILON (0.001f) /* -#define SQ(r) ((r)*(r)) +#define SQ(r) ((r)*(r)) -#define SIGN_BITMASK 0x80000000 +#define SIGN_BITMASK 0x80000000 /// Integer representation of a floating-point value. -#define IR(x) ((uint32 &)(x)) +#define IR(x) ((uint32 &)(x)) /// Absolute integer representation of a floating-point value -#define AIR(x) (IR(x) & 0x7fffffff) +#define AIR(x) (IR(x) & 0x7fffffff) /// Floating-point representation of an integer value. -#define FR(x) ((float&)(x)) +#define FR(x) ((float&)(x)) /// Integer-based comparison of a floating point value. /// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context. -#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK) +#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK) */ -inline double sqrt_assert(const double f) +extern "C" inline double sqrt_assert(const double f) { - nvDebugCheck(f >= 0.0f); - return sqrt(f); + nvDebugCheck(f >= 0.0f); + return sqrt(f); } inline float sqrtf_assert(const float f) { - nvDebugCheck(f >= 0.0f); - return sqrtf(f); + nvDebugCheck(f >= 0.0f); + return sqrtf(f); } -inline double acos_assert(const double f) +extern "C" inline double acos_assert(const double f) { - nvDebugCheck(f >= -1.0f && f <= 1.0f); - return acos(f); + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return acos(f); } inline float acosf_assert(const float f) { - nvDebugCheck(f >= -1.0f && f <= 1.0f); - return acosf(f); + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return acosf(f); } -inline double asin_assert(const double f) +extern "C" inline double asin_assert(const double f) { - nvDebugCheck(f >= -1.0f && f <= 1.0f); - return asin(f); + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return asin(f); } inline float asinf_assert(const float f) { - nvDebugCheck(f >= -1.0f && f <= 1.0f); - return asinf(f); + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return asinf(f); } // Replace default functions with asserting ones. +#if !NV_CC_MSVC || (NV_CC_MSVC && (_MSC_VER < 1700)) // IC: Apparently this was causing problems in Visual Studio 2012. See Issue 194: https://code.google.com/p/nvidia-texture-tools/issues/detail?id=194 #define sqrt sqrt_assert #define sqrtf sqrtf_assert #define acos acos_assert #define acosf acosf_assert #define asin asin_assert #define asinf asinf_assert - -#if NV_OS_WIN32 -#include #endif -namespace nv +#if NV_CC_MSVC +NV_FORCEINLINE float log2f(float x) { -inline float toRadian(float degree) { return degree * (PI / 180.0f); } -inline float toDegree(float radian) { return radian * (180.0f / PI); } - -inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON) -{ - return fabs(f0-f1) <= epsilon; + nvCheck(x >= 0); + return logf(x) / logf(2.0f); } - -inline bool isZero(const float f, const float epsilon = NV_EPSILON) +NV_FORCEINLINE float exp2f(float x) { - return fabs(f) <= epsilon; + return powf(2.0f, x); } +#endif -inline bool isFinite(const float f) +namespace nv { -#if NV_OS_WIN32 - return _finite(f) != 0; -#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD - return isfinite(f); + inline float toRadian(float degree) { return degree * (PI / 180.0f); } + inline float toDegree(float radian) { return radian * (180.0f / PI); } + + // Robust floating point comparisons: + // http://realtimecollisiondetection.net/blog/?p=89 + inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON) + { + //return fabs(f0-f1) <= epsilon; + return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1)); + } + + inline bool isZero(const float f, const float epsilon = NV_EPSILON) + { + return fabs(f) <= epsilon; + } + + inline bool isFinite(const float f) + { +#if NV_OS_WIN32 || NV_OS_XBOX + return _finite(f) != 0; +#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD || NV_OS_ORBIS + return isfinite(f); #elif NV_OS_LINUX - return finitef(f); + return finitef(f); #else -# error "isFinite not supported" +# error "isFinite not supported" #endif -//return std::isfinite (f); -//return finite (f); -} - -inline bool isNan(const float f) -{ -#if NV_OS_WIN32 - return _isnan(f) != 0; -#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD - return isnan(f); -#elif NV_OS_LINUX - return isnanf(f); + //return std::isfinite (f); + //return finite (f); + } + + inline bool isNan(const float f) + { +#if NV_OS_WIN32 || NV_OS_XBOX + return _isnan(f) != 0; +#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD || NV_OS_ORBIS || NV_OS_LINUX + return isnan(f); #else -# error "isNan not supported" +# error "isNan not supported" #endif -} - -inline uint log2(uint i) -{ - uint value = 0; - while( i >>= 1 ) { - value++; - } - return value; -} + } -inline float lerp(float f0, float f1, float t) -{ - const float s = 1.0f - t; - return f0 * s + f1 * t; -} + inline uint log2(uint i) + { + uint value = 0; + while( i >>= 1 ) { + value++; + } + return value; + } + + inline float lerp(float f0, float f1, float t) + { + const float s = 1.0f - t; + return f0 * s + f1 * t; + } + + inline float square(float f) { return f * f; } + inline int square(int i) { return i * i; } + + inline float cube(float f) { return f * f * f; } + inline int cube(int i) { return i * i * i; } + + inline float frac(float f) + { + return f - floor(f); + } + + inline float floatRound(float f) + { + return floorf(f + 0.5f); + } + + // Eliminates negative zeros from a float array. + inline void floatCleanup(float * fp, int n) + { + for (int i = 0; i < n; i++) { + //nvDebugCheck(isFinite(fp[i])); + union { float f; uint32 i; } x = { fp[i] }; + if (x.i == 0x80000000) fp[i] = 0.0f; + } + } + + inline float saturate(float f) { + return clamp(f, 0.0f, 1.0f); + } + + inline float linearstep(float edge0, float edge1, float x) { + // Scale, bias and saturate x to 0..1 range + return saturate((x - edge0) / (edge1 - edge0)); + } + + inline float smoothstep(float edge0, float edge1, float x) { + x = linearstep(edge0, edge1, x); + + // Evaluate polynomial + return x*x*(3 - 2*x); + } + + inline int sign(float a) + { + return (a > 0) - (a < 0); + //if (a > 0.0f) return 1; + //if (a < 0.0f) return -1; + //return 0; + } + + union Float754 { + unsigned int raw; + float value; + struct { + #if NV_BIG_ENDIAN + unsigned int negative:1; + unsigned int biasedexponent:8; + unsigned int mantissa:23; + #else + unsigned int mantissa:23; + unsigned int biasedexponent:8; + unsigned int negative:1; + #endif + } field; + }; + + // Return the exponent of x ~ Floor(Log2(x)) + inline int floatExponent(float x) + { + Float754 f; + f.value = x; + return (f.field.biasedexponent - 127); + } + + + // FloatRGB9E5 + union Float3SE { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 e : 5; + uint32 zm : 9; + uint32 ym : 9; + uint32 xm : 9; + #else + uint32 xm : 9; + uint32 ym : 9; + uint32 zm : 9; + uint32 e : 5; + #endif + }; + }; + + // FloatR11G11B10 + union Float3PK { + uint32 v; + struct { + #if NV_BIG_ENDIAN + uint32 ze : 5; + uint32 zm : 5; + uint32 ye : 5; + uint32 ym : 6; + uint32 xe : 5; + uint32 xm : 6; + #else + uint32 xm : 6; + uint32 xe : 5; + uint32 ym : 6; + uint32 ye : 5; + uint32 zm : 5; + uint32 ze : 5; + #endif + }; + }; -inline float square(float f) -{ - return f * f; -} } // nv Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/Atomic.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/Atomic.h +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/Atomic.h @@ -0,0 +1,408 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_THREAD_ATOMIC_H +#define NV_THREAD_ATOMIC_H + +#include "nvthread.h" + +#include "nvcore/Debug.h" + + +#if NV_CC_MSVC + +#include // Already included by nvthread.h + +#pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement) +#pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange) +//#pragma intrinsic(_InterlockedExchangeAdd64) + +/* +extern "C" +{ + #pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement) + LONG __cdecl _InterlockedIncrement(long volatile *Addend); + LONG __cdecl _InterlockedDecrement(long volatile *Addend); + + #pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange) + LONG __cdecl _InterlockedCompareExchange(long volatile * Destination, long Exchange, long Compared); + LONG __cdecl _InterlockedExchange(long volatile * Target, LONG Value); +} +*/ + +#endif // NV_CC_MSVC + +#if NV_CC_CLANG && POSH_CPU_STRONGARM +// LLVM/Clang do not yet have functioning atomics as of 2.1 +// #include +#endif + +//ACS: need this if we want to use Apple's atomics. +/* +#if NV_OS_IOS || NV_OS_DARWIN +// for iOS & OSX we use apple's atomics +#include "libkern/OSAtomic.h" +#endif +*/ + +namespace nv { + + // Load and stores. + inline uint32 loadRelaxed(const uint32 * ptr) { return *ptr; } + inline void storeRelaxed(uint32 * ptr, uint32 value) { *ptr = value; } + + inline uint32 loadAcquire(const volatile uint32 * ptr) + { + nvDebugCheck((intptr_t(ptr) & 3) == 0); + +#if POSH_CPU_X86 || POSH_CPU_X86_64 + uint32 ret = *ptr; // on x86, loads are Acquire + nvCompilerReadBarrier(); + return ret; +#elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64 + // need more specific cpu type for armv7? + // also utilizes a full barrier + // currently treating laod like x86 - this could be wrong + + // this is the easiest but slowest way to do this + nvCompilerReadWriteBarrier(); + uint32 ret = *ptr; // replace with ldrex? + nvCompilerReadWriteBarrier(); + return ret; +#elif POSH_CPU_PPC64 + // need more specific cpu type for ppc64? + // also utilizes a full barrier + // currently treating load like x86 - this could be wrong + + // this is the easiest but slowest way to do this + nvCompilerReadWriteBarrier(); + uint32 ret = *ptr; // replace with ldrex? + nvCompilerReadWriteBarrier(); + return ret; +#else +#error "Not implemented" +#endif + } + + inline void storeRelease(volatile uint32 * ptr, uint32 value) + { + nvDebugCheck((intptr_t(ptr) & 3) == 0); + nvDebugCheck((intptr_t(&value) & 3) == 0); + +#if POSH_CPU_X86 || POSH_CPU_X86_64 + nvCompilerWriteBarrier(); + *ptr = value; // on x86, stores are Release + //nvCompilerWriteBarrier(); // @@ IC: Where does this barrier go? In nvtt it was after, in Witness before. Not sure which one is right. +#elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64 + // this is the easiest but slowest way to do this + nvCompilerReadWriteBarrier(); + *ptr = value; //strex? + nvCompilerReadWriteBarrier(); +#elif POSH_CPU_PPC64 + // this is the easiest but slowest way to do this + nvCompilerReadWriteBarrier(); + *ptr = value; //strex? + nvCompilerReadWriteBarrier(); +#else +#error "Atomics not implemented." +#endif + } + + + template + inline void storeReleasePointer(volatile T * pTo, T from) + { + NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t)); + nvDebugCheck((((intptr_t)pTo) % sizeof(intptr_t)) == 0); + nvDebugCheck((((intptr_t)&from) % sizeof(intptr_t)) == 0); + nvCompilerWriteBarrier(); + *pTo = from; // on x86, stores are Release + } + + template + inline T loadAcquirePointer(volatile T * ptr) + { + NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t)); + nvDebugCheck((((intptr_t)ptr) % sizeof(intptr_t)) == 0); + T ret = *ptr; // on x86, loads are Acquire + nvCompilerReadBarrier(); + return ret; + } + + + // Atomics. @@ Assuming sequential memory order? + +#if NV_CC_MSVC + NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long)); + + // Returns incremented value. + inline uint32 atomicIncrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return uint32(_InterlockedIncrement((long *)value)); + } + + // Returns decremented value. + inline uint32 atomicDecrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return uint32(_InterlockedDecrement((long *)value)); + } + + // Returns added value. + inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add)) + value_to_add; + } + + // Returns original value before addition. + inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add)); + } + + + + + // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'. + // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated. + // @@ Is this strong or weak? Does InterlockedCompareExchange have spurious failures? + inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + long result = _InterlockedCompareExchange((long *)value, (long)desired, (long)expected); + return result == (long)expected; + } + + + inline uint32 atomicSwap(uint32 * value, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return (uint32)_InterlockedExchange((long *)value, (long)desired); + } + + + +#elif NV_CC_CLANG && (NV_OS_IOS || NV_OS_DARWIN) + + //ACS: Use Apple's atomics instead? I don't know if these are better in any way; there are non-barrier versions too. There's no OSAtomicSwap32 tho' + /* + inline uint32 atomicIncrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return (uint32)OSAtomicIncrement32Barrier((int32_t *)value); + } + + inline uint32 atomicDecrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return (uint32)OSAtomicDecrement32Barrier((int32_t *)value); + } + + // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'. + // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated. + // @@ Is this strong or weak? + inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return OSAtomicCompareAndSwap32Barrier((int32_t)expected, (int32_t)desired, (int32_t *)value); + } + */ + + // Returns incremented value. + inline uint32 atomicIncrement(uint32 * value) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_add_and_fetch(value, 1); + } + + // Returns decremented value. + inline uint32 atomicDecrement(uint32 * value) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_sub_and_fetch(value, 1); + } + + // Returns added value. + inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_add_and_fetch(value, value_to_add); + } + + // Returns original value before addition. + inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_fetch_and_add(value, value_to_add); + } + + + // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'. + // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated. + // @@ Is this strong or weak? + inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_bool_compare_and_swap(value, expected, desired); + } + + inline uint32 atomicSwap(uint32 * value, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + // this is confusingly named, it doesn't actually do a test but always sets + return __sync_lock_test_and_set(value, desired); + } + + + + +#elif NV_CC_CLANG && POSH_CPU_STRONGARM + + inline uint32 atomicIncrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + + // this should work in LLVM eventually, but not as of 2.1 + // return (uint32)AtomicIncrement((long *)value); + + // in the mean time, + register uint32 result; + asm volatile ( + "1: ldrexb %0, [%1] \n\t" + "add %0, %0, #1 \n\t" + "strexb r1, %0, [%1] \n\t" + "cmp r1, #0 \n\t" + "bne 1b" + : "=&r" (result) + : "r"(value) + : "r1" + ); + return result; + + } + + inline uint32 atomicDecrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + + // this should work in LLVM eventually, but not as of 2.1: + // return (uint32)sys::AtomicDecrement((long *)value); + + // in the mean time, + + register uint32 result; + asm volatile ( + "1: ldrexb %0, [%1] \n\t" + "sub %0, %0, #1 \n\t" + "strexb r1, %0, [%1] \n\t" + "cmp r1, #0 \n\t" + "bne 1b" + : "=&r" (result) + : "r"(value) + : "r1" + ); + return result; + + } + +#elif NV_CC_GNUC + // Many alternative implementations at: + // http://www.memoryhole.net/kyle/2007/05/atomic_incrementing.html + + // Returns incremented value. + inline uint32 atomicIncrement(uint32 * value) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_add_and_fetch(value, 1); + } + + // Returns decremented value. + inline uint32 atomicDecrement(uint32 * value) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_sub_and_fetch(value, 1); + } + + // Returns added value. + inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_add_and_fetch(value, value_to_add); + } + + // Returns original value before addition. + inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_fetch_and_add(value, value_to_add); + } + + // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'. + // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated. + // @@ Is this strong or weak? + inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_bool_compare_and_swap(value, expected, desired); + } + + inline uint32 atomicSwap(uint32 * value, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + // this is confusingly named, it doesn't actually do a test but always sets + return __sync_lock_test_and_set(value, desired); + } + +#else +#error "Atomics not implemented." + +#endif + + + + + // It would be nice to have C++0x-style atomic types, but I'm not in the mood right now. Only uint32 supported so far. +#if 0 + template + void increment(T * value); + + template + void decrement(T * value); + + template <> + void increment(uint32 * value) { + } + + template <> + void increment(uint64 * value) { + } + + + + template + class Atomic + { + public: + explicit Atomic() : m_value() { } + explicit Atomic( T val ) : m_value(val) { } + ~Atomic() { } + + T loadRelaxed() const { return m_value; } + void storeRelaxed(T val) { m_value = val; } + + //T loadAcquire() const volatile { return nv::loadAcquire(&m_value); } + //void storeRelease(T val) volatile { nv::storeRelease(&m_value, val); } + + void increment() /*volatile*/ { nv::atomicIncrement(m_value); } + void decrement() /*volatile*/ { nv::atomicDecrement(m_value); } + + void compareAndStore(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); } + T compareAndExchange(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); } + T exchange(T newVal) { nv::atomicExchange(&m_value, newVal); } + + private: + // don't provide operator = or == ; make the client write Store( Load() ) + NV_FORBID_COPY(Atomic); + + NV_COMPILER_CHECK(sizeof(T) == sizeof(uint32) || sizeof(T) == sizeof(uint64)); + + T m_value; + }; +#endif + +} // nv namespace + + +#endif // NV_THREADS_ATOMICS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/CMakeLists.txt @@ -0,0 +1,28 @@ +PROJECT(nvthread) + +SET(THREAD_SRCS + nvthread.h nvthread.cpp + Atomic.h + Event.h Event.cpp + Mutex.h Mutex.cpp + ParallelFor.h ParallelFor.cpp + Thread.h Thread.cpp + ThreadPool.h ThreadPool.cpp) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# targets +ADD_DEFINITIONS(-DNVTHREAD_EXPORTS) + +IF(NVTHREAD_SHARED) + ADD_LIBRARY(nvthread SHARED ${THREAD_SRCS}) +ELSE(NVTHREAD_SHARED) + ADD_LIBRARY(nvthread ${THREAD_SRCS}) +ENDIF(NVTHREAD_SHARED) + +TARGET_LINK_LIBRARIES(nvthread ${LIBS} nvcore) + +INSTALL(TARGETS nvthread + RUNTIME DESTINATION ${BINDIR} + LIBRARY DESTINATION ${LIBDIR} + ARCHIVE DESTINATION ${LIBDIR}) Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/Event.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/Event.h +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/Event.h @@ -0,0 +1,34 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_THREAD_EVENT_H +#define NV_THREAD_EVENT_H + +#include "nvthread.h" + +#include "nvcore/Ptr.h" + +namespace nv +{ + // This is intended to be used by a single waiter thread. + class NVTHREAD_CLASS Event + { + NV_FORBID_COPY(Event); + public: + Event(); + ~Event(); + + void post(); + void wait(); // Wait resets the event. + + static void post(Event * events, uint count); + static void wait(Event * events, uint count); + + private: + struct Private; + AutoPtr m; + }; + +} // nv namespace + +#endif // NV_THREAD_EVENT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/Event.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/Event.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/Event.cpp @@ -0,0 +1,97 @@ +// This code is in the public domain -- castano@gmail.com + +#include "Event.h" + +#if NV_OS_WIN32 +#include "Win32.h" +#elif NV_OS_USE_PTHREAD +#include +#endif + +using namespace nv; + +#if NV_OS_WIN32 + +struct Event::Private { + HANDLE handle; +}; + +Event::Event() : m(new Private) { + m->handle = CreateEvent(NULL, FALSE, FALSE, NULL); +} + +Event::~Event() { + CloseHandle(m->handle); +} + +void Event::post() { + SetEvent(m->handle); +} + +void Event::wait() { + WaitForSingleObject(m->handle, INFINITE); +} + +#elif NV_OS_USE_PTHREAD + +struct Event::Private { + pthread_cond_t pt_cond; + pthread_mutex_t pt_mutex; + int count; + int wait_count; +}; + +Event::Event() : m(new Private) { + m->count=0; + m->wait_count=0; + pthread_mutex_init(&m->pt_mutex, NULL); + pthread_cond_init(&m->pt_cond, NULL); +} + +Event::~Event() { + pthread_cond_destroy(&m->pt_cond); + pthread_mutex_destroy(&m->pt_mutex); +} + +void Event::post() { + pthread_mutex_lock(&m->pt_mutex); + + m->count++; + + //ACS: move this after the unlock? + if(m->wait_count>0) { + pthread_cond_signal(&m->pt_cond); + } + + pthread_mutex_unlock(&m->pt_mutex); +} + +void Event::wait() { + pthread_mutex_lock(&m->pt_mutex); + + while(m->count==0) { + m->wait_count++; + pthread_cond_wait(&m->pt_cond, &m->pt_mutex); + m->wait_count--; + } + m->count--; + + pthread_mutex_unlock(&m->pt_mutex); +} + +#endif // NV_OS_UNIX + + +/*static*/ void Event::post(Event * events, uint count) { + for (uint i = 0; i < count; i++) { + events[i].post(); + } +} + +/*static*/ void Event::wait(Event * events, uint count) { + // @@ Use wait for multiple objects in win32? + + for (uint i = 0; i < count; i++) { + events[i].wait(); + } +} Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/Mutex.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/Mutex.h +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/Mutex.h @@ -0,0 +1,47 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_THREAD_MUTEX_H +#define NV_THREAD_MUTEX_H + +#include "nvthread.h" + +#include "nvcore/Ptr.h" + +namespace nv +{ + + class NVTHREAD_CLASS Mutex + { + NV_FORBID_COPY(Mutex); + public: + Mutex (const char * name); + ~Mutex (); + + void lock(); + bool tryLock(); + void unlock(); + + private: + struct Private; + AutoPtr m; + }; + + + // Templated lock that can be used with any mutex. + template + class Lock + { + NV_FORBID_COPY(Lock); + public: + + Lock (M & m) : m_mutex (m) { m_mutex.lock(); } + ~Lock () { m_mutex.unlock(); } + + private: + M & m_mutex; + }; + +} // nv namespace + +#endif // NV_THREAD_MUTEX_H Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/Mutex.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/Mutex.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/Mutex.cpp @@ -0,0 +1,129 @@ +// This code is in the public domain -- castano@gmail.com + +#include "Mutex.h" + +#if NV_OS_WIN32 + +#include "Win32.h" + +#elif NV_OS_USE_PTHREAD + +#include +#include // EBUSY + +#endif // NV_OS + +#if NV_USE_TELEMETRY +#include +extern HTELEMETRY tmContext; +#endif + +using namespace nv; + + +#if NV_OS_WIN32 + +struct Mutex::Private { + CRITICAL_SECTION mutex; + const char * name; +}; + + +Mutex::Mutex (const char * name) : m(new Private) +{ + InitializeCriticalSection(&m->mutex); + m->name = name; +#if NV_USE_TELEMETRY + tmLockName(tmContext, this, name); +#endif +} + +Mutex::~Mutex () +{ + DeleteCriticalSection(&m->mutex); +} + +void Mutex::lock() +{ +#if NV_USE_TELEMETRY + TmU64 matcher; + tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); +#endif + + EnterCriticalSection(&m->mutex); + +#if NV_USE_TELEMETRY + tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS); + tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired"); +#endif +} + +bool Mutex::tryLock() +{ +#if NV_USE_TELEMETRY + TmU64 matcher; + tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); + if (TryEnterCriticalSection(&m->mutex) != 0) { + tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS); + tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired"); + return true; + } + else { + tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_FAILED); + return false; + } +#else + return TryEnterCriticalSection(&m->mutex) != 0; +#endif +} + +void Mutex::unlock() +{ +#if NV_USE_TELEMETRY + tmSetLockState(tmContext, this, TMLS_RELEASED, "released"); +#endif + + LeaveCriticalSection(&m->mutex); +} + +#elif NV_OS_USE_PTHREAD + +struct Mutex::Private { + pthread_mutex_t mutex; + const char * name; +}; + + +Mutex::Mutex (const char * name) : m(new Private) +{ + int result = pthread_mutex_init(&m->mutex, NULL); + m->name = name; + nvDebugCheck(result == 0); +} + +Mutex::~Mutex () +{ + int result = pthread_mutex_destroy(&m->mutex); + nvDebugCheck(result == 0); +} + +void Mutex::lock() +{ + int result = pthread_mutex_lock(&m->mutex); + nvDebugCheck(result == 0); +} + +bool Mutex::tryLock() +{ + int result = pthread_mutex_trylock(&m->mutex); + nvDebugCheck(result == 0 || result == EBUSY); + return result == 0; +} + +void Mutex::unlock() +{ + int result = pthread_mutex_unlock(&m->mutex); + nvDebugCheck(result == 0); +} + +#endif // NV_OS_UNIX Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.h +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.h @@ -0,0 +1,181 @@ +// This code is in the public domain -- Ignacio Castaño + +#pragma once +#ifndef NV_THREAD_PARALLELFOR_H +#define NV_THREAD_PARALLELFOR_H + +#include "nvthread.h" +//#include "Atomic.h" // atomic + +namespace nv +{ + class Thread; + class ThreadPool; + + typedef void ForTask(void * context, /*int tid,*/ int idx); // @@ It would be nice to have the thread index as an argument here. + + struct ParallelFor { + ParallelFor(ForTask * task, void * context); + ~ParallelFor(); + + void run(uint count, uint step = 1); + + // Invariant: + ForTask * task; + void * context; + ThreadPool * pool; + + // State: + uint count; + uint step; + /*atomic*/ uint idx; + }; + + +#if NV_CC_CPP11 + + template + void sequential_for(uint count, F f) { + for (uint i = 0; i < count; i++) { + f(i); + } + } + + + template + void parallel_for(uint count, uint step, F f) { + // Transform lambda into function pointer. + auto lambda = [](void* context, /*int tid, */int idx) { + F & f = *reinterpret_cast(context); + f(/*tid, */idx); + }; + + ParallelFor pf(lambda, &f); + pf.run(count, step); + } + + + template + void parallel_for(uint count, F f) { + parallel_for(count, /*step=*/1, f); + } + + + template + void parallel_for_if(uint count, uint step, bool condition, F f) { + if (condition) { + parallel_for(count, step, f); + } + else { + sequential_for(count, f); + } + } + + +#if 0 + template + void parallel_for_each(Array & array, uint step, F f) { + // Transform lambda into function pointer. + auto lambda = [](void* context, int idx) { + F & f = *reinterpret_cast(context); + f(array[idx]); + }; + + ParallelFor pf(lambda, &f); + pf.run(count, step); + } +#endif + + +#endif // NV_CC_CPP11 + + +/* + +#include "nvthread/Mutex.h" +#include "nvcore/Array.inl" + + template + struct ParallelOutputStream { +#if 0 + // In its most basic implementation the parallel stream is simply a single array protected by a mutex. + Parallel_Output_Stream(uint producer_count) {} + + void reset() { final_array.clear(); } + void append(uint producer_id, const T & t) { Lock(mutex); final_array.append(t); } + nv::Array & finalize() { return final_array; } + + nv::Mutex mutex; + nv::Array final_array; + +#elif 0 + // Another simple implementation is to have N arrays that are merged at the end. + ParallelOutputStream(uint producer_count) : producer_count(producer_count) { + partial_array = new Array[producer_count]; + } + + void reset() { + for (int i = 0; i < producer_count; i++) { + partial_array[i].clear(); + } + } + + void append(uint producer_id, const T & t) { + nvCheck(producer_id < producer_count); + partial_array[producer_id].append(t); + } + + nv::Array & finalize() { + for (int i = 1; i < producer_count; i++) { + partial_array->append(partial_array[i]); + partial_array[i].clear(); + } + return *partial_array; + } + + uint producer_count; + nv::Array * partial_array; +#else + ParallelOutputStream(uint producer_count) : producer_count(producer_count) { + partial_array = new PartialArray[producer_count]; + } + + // But a more sophisticated implementation keeps N short arrays that are merged as they get full. This preserves partial order. + struct PartialArray { // Make sure this is aligned to cache lines. We want producers to access their respective arrays without conflicts. + uint count; + T data[32]; // Pick size to minimize wasted space considering cache line alignment? + }; + + const uint producer_count; + PartialArray * partial_array; + + // @@ Make sure mutex and partial_array are not in the same cache line! + + nv::Mutex mutex; + nv::Array final_array; + + void append(uint producer_id, const T & t) { + if (partial_array[producer_id].count == 32) { + partial_array[producer_id].count = 0; + Lock(mutex); + final_array.append(partial_array[producer_id].data, 32); + } + + partial_array[producer_id].data[partial_array[producer_id].count++] = t; + } + nv::Array & finalize() { + for (int i = 0; i < producer_count; i++) { + final_array.append(partial_array[producer_id].data, partial_array[producer_id].count); + } + return final_array; + } +#endif + }; + +*/ + + +} // nv namespace + + +#endif // NV_THREAD_PARALLELFOR_H Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.cpp @@ -0,0 +1,61 @@ +// This code is in the public domain -- Ignacio Castaño + +#include "ParallelFor.h" +#include "Thread.h" +#include "Atomic.h" +#include "ThreadPool.h" + +#include "nvcore/Utils.h" // toI32 + +using namespace nv; + +#define ENABLE_PARALLEL_FOR 1 + +static void worker(void * arg, int tid) { + ParallelFor * owner = (ParallelFor *)arg; + + while(true) { + uint new_idx = atomicFetchAndAdd(&owner->idx, owner->step); + if (new_idx >= owner->count) { + break; + } + + const uint count = min(owner->count, new_idx + owner->step); + for (uint i = new_idx; i < count; i++) { + owner->task(owner->context, /*tid, */i); + } + } +} + + +ParallelFor::ParallelFor(ForTask * task, void * context) : task(task), context(context) { +#if ENABLE_PARALLEL_FOR + pool = ThreadPool::acquire(); +#endif +} + +ParallelFor::~ParallelFor() { +#if ENABLE_PARALLEL_FOR + ThreadPool::release(pool); +#endif +} + +void ParallelFor::run(uint count, uint step/*= 1*/) { +#if ENABLE_PARALLEL_FOR + storeRelease(&this->count, count); + storeRelease(&this->step, step); + + // Init atomic counter to zero. + storeRelease(&idx, 0); + + // Start threads. + pool->run(worker, this); + + nvDebugCheck(idx >= count); +#else + for (int i = 0; i < toI32(count); i++) { + task(context, i); + } +#endif +} + Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/Thread.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/Thread.h +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/Thread.h @@ -0,0 +1,42 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_THREAD_THREAD_H +#define NV_THREAD_THREAD_H + +#include "nvthread.h" + +#include "nvcore/Ptr.h" // AutoPtr + +namespace nv +{ + typedef void ThreadFunc(void * arg); + + class NVTHREAD_CLASS Thread + { + NV_FORBID_COPY(Thread); + public: + Thread(); + Thread(const char * name); + ~Thread(); + + void setName(const char * name); + + void start(ThreadFunc * func, void * arg); + void wait(); + + bool isRunning() const; + + static void spinWait(uint count); + static void yield(); + static void sleep(uint ms); + + static void wait(Thread * threads, uint count); + + struct Private; + AutoPtr p; + }; + +} // nv namespace + +#endif // NV_THREAD_THREAD_H Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/Thread.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/Thread.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/Thread.cpp @@ -0,0 +1,210 @@ +// This code is in the public domain -- castano@gmail.com + +#include "Thread.h" + +#if NV_OS_WIN32 + #include "Win32.h" +#elif NV_OS_USE_PTHREAD + #include + #include // usleep +#endif + +#if NV_USE_TELEMETRY +#include +extern HTELEMETRY tmContext; +#endif + + +using namespace nv; + +struct Thread::Private +{ +#if NV_OS_WIN32 + HANDLE thread; +#elif NV_OS_USE_PTHREAD + pthread_t thread; +#endif + + ThreadFunc * func; + void * arg; + const char * name; +}; + + +#if NV_OS_WIN32 + +unsigned long __stdcall threadFunc(void * arg) { + Thread::Private * thread = (Thread::Private *)arg; + thread->func(thread->arg); + return 0; +} + +// SetThreadName implementation from msdn: +// http://msdn.microsoft.com/en-us/library/xcb2z8hs.aspx + +const DWORD MS_VC_EXCEPTION=0x406D1388; + +#pragma pack(push,8) +typedef struct tagTHREADNAME_INFO +{ + DWORD dwType; // Must be 0x1000. + LPCSTR szName; // Pointer to name (in user addr space). + DWORD dwThreadID; // Thread ID (-1=caller thread). + DWORD dwFlags; // Reserved for future use, must be zero. +} THREADNAME_INFO; +#pragma pack(pop) + +static void setThreadName(DWORD dwThreadID, const char* threadName) +{ + THREADNAME_INFO info; + info.dwType = 0x1000; + info.szName = threadName; + info.dwThreadID = dwThreadID; + info.dwFlags = 0; + + __try + { + RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info ); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + } +} + + +#elif NV_OS_USE_PTHREAD + +extern "C" void * threadFunc(void * arg) { + Thread::Private * thread = (Thread::Private *)arg; + thread->func(thread->arg); + pthread_exit(0); +} + +#endif + + +Thread::Thread() : p(new Private) +{ + p->thread = 0; + p->name = NULL; +} + +Thread::Thread(const char * name) : p(new Private) +{ + p->thread = 0; + p->name = name; +} + +Thread::~Thread() +{ + nvDebugCheck(p->thread == 0); +} + +void Thread::setName(const char * name) +{ + nvCheck(p->name == NULL); + p->name = name; +} + +void Thread::start(ThreadFunc * func, void * arg) +{ + p->func = func; + p->arg = arg; + +#if NV_OS_WIN32 + DWORD threadId; + p->thread = CreateThread(NULL, 0, threadFunc, p.ptr(), 0, &threadId); + //p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, p.ptr(), 0, NULL); // @@ So that we can call CRT functions... + nvDebugCheck(p->thread != NULL); + if (p->name != NULL) { + setThreadName(threadId, p->name); + #if NV_USE_TELEMETRY + tmThreadName(tmContext, threadId, p->name); + #endif + } +#elif NV_OS_ORBIS + int ret = scePthreadCreate(&p->thread, NULL, threadFunc, p.ptr(), p->name ? p->name : "nv::Thread"); + nvDebugCheck(ret == 0); + // use any non-system core + scePthreadSetaffinity(p->thread, 0x3F); + scePthreadSetprio(p->thread, (SCE_KERNEL_PRIO_FIFO_DEFAULT + SCE_KERNEL_PRIO_FIFO_HIGHEST) / 2); +#elif NV_OS_USE_PTHREAD + int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr()); + nvDebugCheck(result == 0); +#endif +} + +void Thread::wait() +{ +#if NV_OS_WIN32 + DWORD status = WaitForSingleObject (p->thread, INFINITE); + nvCheck (status == WAIT_OBJECT_0); + BOOL ok = CloseHandle (p->thread); + p->thread = NULL; + nvCheck (ok); +#elif NV_OS_USE_PTHREAD + int result = pthread_join(p->thread, NULL); + p->thread = 0; + nvDebugCheck(result == 0); +#endif +} + +bool Thread::isRunning () const +{ +#if NV_OS_WIN32 + return p->thread != NULL; +#elif NV_OS_USE_PTHREAD + return p->thread != 0; +#endif +} + +/*static*/ void Thread::spinWait(uint count) +{ + for (uint i = 0; i < count; i++) {} +} + +/*static*/ void Thread::yield() +{ +#if NV_OS_WIN32 + SwitchToThread(); +#elif NV_OS_USE_PTHREAD + int result = sched_yield(); + nvDebugCheck(result == 0); +#endif +} + +/*static*/ void Thread::sleep(uint ms) +{ +#if NV_OS_WIN32 + Sleep(ms); +#elif NV_OS_USE_PTHREAD + usleep(1000 * ms); +#endif +} + +/*static*/ void Thread::wait(Thread * threads, uint count) +{ +/*#if NV_OS_WIN32 + // @@ Is there any advantage in doing this? + nvDebugCheck(count < MAXIMUM_WAIT_OBJECTS); + + HANDLE * handles = new HANDLE[count]; + for (uint i = 0; i < count; i++) { + handles[i] = threads->p->thread; + } + + DWORD result = WaitForMultipleObjects(count, handles, TRUE, INFINITE); + + for (uint i = 0; i < count; i++) { + CloseHandle (threads->p->thread); + threads->p->thread = 0; + } + + delete [] handles; +#else*/ + for (uint i = 0; i < count; i++) { + threads[i].wait(); + } +//#endif +} + Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/ThreadPool.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/ThreadPool.h +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/ThreadPool.h @@ -0,0 +1,86 @@ +// This code is in the public domain -- castano@gmail.com + +#pragma once +#ifndef NV_THREAD_THREADPOOL_H +#define NV_THREAD_THREADPOOL_H + +#include "nvthread.h" + +#include "Event.h" +#include "Thread.h" + +// The thread pool creates one worker thread for each physical core. +// The threads are idle waiting for their start events so that they do not consume any resources while inactive. +// The thread pool runs the same function in all worker threads, the idea is to use this as the foundation of a custom task scheduler. +// When the thread pool starts, the main thread continues running, but the common use case is to inmmediately wait for the termination events of the worker threads. +// @@ The start and wait methods could probably be merged. +// It may be running the thread function on the invoking thread to avoid thread switches. + +namespace nv { + + class Thread; + class Event; + + typedef void ThreadTask(void * context, int id); + + class ThreadPool { + NV_FORBID_COPY(ThreadPool); + public: + + static void setup(uint workerCount, bool useThreadAffinity, bool useCallingThread); + + static ThreadPool * acquire(); + static void release(ThreadPool *); + + ThreadPool(uint workerCount = processorCount(), bool useThreadAffinity = true, bool useCallingThread = false); + ~ThreadPool(); + + void run(ThreadTask * func, void * arg); + + void start(ThreadTask * func, void * arg); + void wait(); + + //NV_THREAD_LOCAL static uint threadId; + + private: + + static void workerFunc(void * arg); + + bool useThreadAffinity; + bool useCallingThread; + uint workerCount; + + Thread * workers; + Event * startEvents; + Event * finishEvents; + + uint allIdle; + + // Current function: + ThreadTask * func; + void * arg; + }; + + +#if NV_CC_CPP11 + + template + void thread_pool_run(F f) { + // Transform lambda into function pointer. + auto lambda = [](void* context, int id) { + F & f = *reinterpret_cast(context); + f(id); + }; + + ThreadPool * pool = ThreadPool::acquire(); + pool->run(lambda, &f); + ThreadPool::release(pool); + } + +#endif // NV_CC_CPP11 + + +} // namespace nv + + +#endif // NV_THREAD_THREADPOOL_H Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/ThreadPool.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/ThreadPool.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/ThreadPool.cpp @@ -0,0 +1,180 @@ +// This code is in the public domain -- castano@gmail.com + +#include "ThreadPool.h" +#include "Mutex.h" +#include "Thread.h" +#include "Atomic.h" + +#include "nvcore/Utils.h" +#include "nvcore/StrLib.h" + +#if NV_USE_TELEMETRY +#include +extern HTELEMETRY tmContext; +#endif + + +// Most of the time it's not necessary to protect the thread pool, but if it doesn't add a significant overhead, then it'd be safer to do it. +#define PROTECT_THREAD_POOL 1 + + +using namespace nv; + +#if PROTECT_THREAD_POOL +Mutex s_pool_mutex("thread pool"); +#endif + +AutoPtr s_pool; + + +/*static*/ void ThreadPool::setup(uint workerCount, bool useThreadAffinity, bool useCallingThread) { +#if PROTECT_THREAD_POOL + Lock lock(s_pool_mutex); +#endif + + s_pool = new ThreadPool(workerCount, useThreadAffinity, useCallingThread); +} + +/*static*/ ThreadPool * ThreadPool::acquire() +{ +#if PROTECT_THREAD_POOL + s_pool_mutex.lock(); // @@ If same thread tries to lock twice, this should assert. +#endif + + if (s_pool == NULL) { + ThreadPool * p = new ThreadPool; + nvDebugCheck(s_pool == p); + } + + return s_pool.ptr(); +} + +/*static*/ void ThreadPool::release(ThreadPool * pool) +{ + nvDebugCheck(pool == s_pool); + + // Make sure the threads of the pool are idle. + s_pool->wait(); + +#if PROTECT_THREAD_POOL + s_pool_mutex.unlock(); +#endif +} + + + + +/*static*/ void ThreadPool::workerFunc(void * arg) { + uint i = U32((uintptr_t)arg); // This is OK, because workerCount should always be much smaller than 2^32 + + //ThreadPool::threadId = i; + + if (s_pool->useThreadAffinity) { + lockThreadToProcessor(s_pool->useCallingThread + i); + } + + while(true) + { + s_pool->startEvents[i].wait(); + + ThreadTask * func = loadAcquirePointer(&s_pool->func); + + if (func == NULL) { + return; + } + + { +#if NV_USE_TELEMETRY + tmZoneFiltered(tmContext, 20, TMZF_NONE, "worker"); +#endif + func(s_pool->arg, s_pool->useCallingThread + i); + } + + s_pool->finishEvents[i].post(); + } +} + + +ThreadPool::ThreadPool(uint workerCount/*=processorCount()*/, bool useThreadAffinity/*=true*/, bool useCallingThread/*=false*/) +{ + s_pool = this; // Worker threads need this to be initialized before they start. + + this->useThreadAffinity = useThreadAffinity; + this->workerCount = workerCount; + this->useCallingThread = useCallingThread; + + uint threadCount = workerCount - useCallingThread; + + workers = new Thread[threadCount]; + + startEvents = new Event[threadCount]; + finishEvents = new Event[threadCount]; + + nvCompilerWriteBarrier(); // @@ Use a memory fence? + + if (useCallingThread && useThreadAffinity) { + lockThreadToProcessor(0); // Calling thread always locked to processor 0. + } + + for (uint i = 0; i < threadCount; i++) { + StringBuilder name; + name.format("worker %d", i); + workers[i].setName(name.release()); // @Leak + workers[i].start(workerFunc, (void *)i); + } + + allIdle = true; +} + +ThreadPool::~ThreadPool() +{ + // Set threads to terminate. + start(NULL, NULL); + + // Wait until threads actually exit. + Thread::wait(workers, workerCount - useCallingThread); + + delete [] workers; + delete [] startEvents; + delete [] finishEvents; +} + +void ThreadPool::run(ThreadTask * func, void * arg) +{ + // Wait until threads are idle. + wait(); + + start(func, arg); + + if (useCallingThread) { + func(arg, 0); + } + + wait(); +} + +void ThreadPool::start(ThreadTask * func, void * arg) +{ + // Wait until threads are idle. + wait(); + + // Set our desired function. + storeReleasePointer(&this->func, func); + storeReleasePointer(&this->arg, arg); + + allIdle = false; + + // Resume threads. + Event::post(startEvents, workerCount - useCallingThread); +} + +void ThreadPool::wait() +{ + if (!allIdle) + { + // Wait for threads to complete. + Event::wait(finishEvents, workerCount - useCallingThread); + + allIdle = true; + } +} Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/Win32.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/Win32.h +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/Win32.h @@ -0,0 +1,9 @@ +// This code is in the public domain -- castano@gmail.com + +// Never include this from a header file. + +#define WIN32_LEAN_AND_MEAN +#define VC_EXTRALEAN +#define _WIN32_WINNT 0x0400 // for SwitchToThread, TryEnterCriticalSection +#include +//#include // for _beginthreadex \ No newline at end of file Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/nvthread.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/nvthread.h +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/nvthread.h @@ -0,0 +1,105 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_THREAD_H +#define NV_THREAD_H + +#include "nvcore/nvcore.h" + +// Function linkage +#if NVTHREAD_SHARED +#ifdef NVTHREAD_EXPORTS +#define NVTHREAD_API DLL_EXPORT +#define NVTHREAD_CLASS DLL_EXPORT_CLASS +#else +#define NVTHREAD_API DLL_IMPORT +#define NVTHREAD_CLASS DLL_IMPORT +#endif +#else // NVMATH_SHARED +#define NVTHREAD_API +#define NVTHREAD_CLASS +#endif // NVMATH_SHARED + + +// Compiler barriers. +// See: http://en.wikipedia.org/wiki/Memory_ordering +#if NV_CC_MSVC + +#include + +#pragma intrinsic(_WriteBarrier) +#define nvCompilerWriteBarrier _WriteBarrier + +#pragma intrinsic(_ReadWriteBarrier) +#define nvCompilerReadWriteBarrier _ReadWriteBarrier + +#if _MSC_VER >= 1400 // ReadBarrier is VC2005 +#pragma intrinsic(_ReadBarrier) +#define nvCompilerReadBarrier _ReadBarrier +#else +#define nvCompilerReadBarrier _ReadWriteBarrier +#endif + +#elif NV_CC_GNUC + +#define nvCompilerReadWriteBarrier() asm volatile("" ::: "memory"); +#define nvCompilerWriteBarrier nvCompilerReadWriteBarrier +#define nvCompilerReadBarrier nvCompilerReadWriteBarrier + +#elif NV_CC_CLANG && NV_CPU_ARM +// thanks to Autor Artur Bac for +inline void sync_synchronize() { asm volatile( "dmb;"); } + +/* this is not yet supported by LLVM 2.1 but it is planned +#define nvCompilerReadWriteBarrier() MemoryFence() + */ + + +// JBeilin: from what i read this should do the trick for ARM +// however this might also be wrong and dumb. +//#define nvCompilerReadWriteBarrier() asm volatile( "dmb;"); +#define nvCompilerReadWriteBarrier() nvCompilerReadWriteBarrier() +#define nvCompilerWriteBarrier nvCompilerReadWriteBarrier +#define nvCompilerReadBarrier nvCompilerReadWriteBarrier + + +#endif // NV_CC_MSVC + + +// @@ Memory barriers / fences. + +// @@ Atomics. + + +/* Wrap this up: +#define YieldProcessor() __asm { rep nop } +#define YieldProcessor _mm_pause +#define YieldProcessor __yield + +BOOL WINAPI SwitchToThread(void); +*/ + + +namespace nv +{ + //void initThreadingSystemInfo(); + + // Reentrant. + uint processorCount(); + uint logicalProcessorCount(); + uint physicalProcessorCount(); + + // Locks the current thread to the given logical processor index. + void lockThreadToProcessor(int idx); + void unlockThreadToProcessor(); + + uint threadId(); + +} // nv namespace + + + + + + +#endif // NV_THREAD_H Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/nvthread.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/nvthread.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/nvthread.cpp @@ -0,0 +1,334 @@ +// This code is in the public domain -- Ignacio Castaño + +#include "nvthread.h" + +#include "Thread.h" + +#if NV_OS_WIN32 +#include "Win32.h" +#elif NV_OS_UNIX +#include +#if !NV_OS_LINUX +#include +#endif +#include +#elif NV_OS_DARWIN +#import +#import +#import +#import + +//#include + +#include +#include +#include +#include +#include +#endif + +using namespace nv; + +#if NV_OS_WIN32 + +typedef BOOL(WINAPI *LPFN_GSI)(LPSYSTEM_INFO); +typedef BOOL(WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); + +static bool isWow64() { + LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); + + BOOL wow64 = FALSE; + + if (NULL != fnIsWow64Process) { + if (!fnIsWow64Process(GetCurrentProcess(), &wow64)) { + // If error, assume false. + } + } + + return wow64 != 0; +} + +static void getSystemInfo(SYSTEM_INFO * sysinfo) { + BOOL success = FALSE; + + if (isWow64()) { + LPFN_GSI fnGetNativeSystemInfo = (LPFN_GSI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetNativeSystemInfo"); + + if (fnGetNativeSystemInfo != NULL) { + success = fnGetNativeSystemInfo(sysinfo); + } + } + + if (!success) { + GetSystemInfo(sysinfo); + } +} + +#endif // NV_OS_WIN32 + +// Find the number of logical processors in the system. +// Based on: http://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine +uint nv::processorCount() { +#if NV_OS_WIN32 + SYSTEM_INFO sysinfo; + getSystemInfo(&sysinfo); + //return sysinfo.dwNumberOfProcessors; + + // Respect process affinity mask? + DWORD_PTR pam, sam; + GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam); + + // Count number of bits set in the processor affinity mask. + uint count = 0; + for (int i = 0; i < sizeof(DWORD_PTR) * 8; i++) { + if (pam & (DWORD_PTR(1) << i)) count++; + } + nvDebugCheck(count <= sysinfo.dwNumberOfProcessors); + + return count; +#elif NV_OS_ORBIS + return 6; +#elif NV_OS_XBOX + return 3; // or 6? +#elif NV_OS_LINUX || NV_OS_NETBSD // Linux, Solaris, & AIX + return sysconf(_SC_NPROCESSORS_ONLN); +#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD + int numCPU; + int mib[4]; + size_t len = sizeof(numCPU); + + // set the mib for hw.ncpu + mib[0] = CTL_HW; + +#if NV_OS_OPENBSD || NV_OS_FREEBSD + mib[1] = HW_NCPU; +#else + mib[1] = HW_AVAILCPU; +#endif + + // get the number of CPUs from the system + sysctl(mib, 2, &numCPU, &len, NULL, 0); + + if (numCPU < 1) { + mib[1] = HW_NCPU; + sysctl( mib, 2, &numCPU, &len, NULL, 0 ); + + if (numCPU < 1) { + return 1; // Assume single core. + } + } + + return numCPU; +#else + return 1; // Assume single core. +#endif +} + + +uint nv::threadId() { +#if NV_OS_WIN32 + return GetCurrentThreadId(); +#else + return 0; // @@ +#endif +} + + +// @@ If we are using less worker threads than processors and hyperthreading is available, we probably want to enumerate the logical processors +// so that the first cores of each processor goes first. This way, if say, we leave 2 hardware threads free, then we still have one worker +// thread on each physical processor. + +// I believe that currently logical processors are enumerated in physical order, that is: +// 0 = thread a in physical core 0 +// 1 = thread b in physical core 0 +// 2 = thread a in physical core 1 +// ... and so on ... +// I'm not sure we can actually rely on that. And in any case we should start detecting the number of physical processors, which appears to be a pain +// to do in a way that's compatible with newer i7 processors. + +void nv::lockThreadToProcessor(int idx) { +#if NV_OS_WIN32 + //nvDebugCheck(idx < hardwareThreadCount()); +#if 0 + DWORD_PTR tam = 1 << idx; +#else + DWORD_PTR pam, sam; + BOOL rc = GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam); + + // Find the idx's bit set. + uint pidx = 0; + DWORD_PTR tam = 0; + for (int i = 0; i < sizeof(DWORD_PTR) * 8; i++) { + DWORD_PTR mask = DWORD_PTR(1) << i; + if (pam & mask) { + if (pidx == idx) { + tam = mask; + break; + } + pidx++; + } + } + + nvDebugCheck(tam != 0); +#endif + + SetThreadAffinityMask(GetCurrentThread(), tam); +#else + // @@ NOP +#endif +} + + +void nv::unlockThreadToProcessor() { +#if NV_OS_WIN32 + DWORD_PTR pam, sam; + BOOL rc = GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam); + SetThreadAffinityMask(GetCurrentThread(), pam); +#else + // @@ NOP +#endif +} + +uint nv::logicalProcessorCount() { + return processorCount(); +} + + +#if NV_OS_WIN32 + +struct LOGICALPROCESSORDATA +{ + unsigned int nLargestStandardFunctionNumber; + unsigned int nLargestExtendedFunctionNumber; + int nLogicalProcessorCount; + int nLocalApicId; + int nCPUcore; + int nProcessorId; + int nApicIdCoreIdSize; + int nNC; + int nMNC; + int nCPUCoresperProcessor; + int nThreadsperCPUCore; + int nProcId; + int nCoreId; + bool CmpLegacy; + bool HTT; +}; + +#define MAX_NUMBER_OF_LOGICAL_PROCESSORS 96 +#define MAX_NUMBER_OF_PHYSICAL_PROCESSORS 8 +#define MAX_NUMBER_OF_IOAPICS 16 +static LOGICALPROCESSORDATA LogicalProcessorMap[MAX_NUMBER_OF_LOGICAL_PROCESSORS]; +static int PhysProcIds[MAX_NUMBER_OF_PHYSICAL_PROCESSORS + MAX_NUMBER_OF_IOAPICS]; + +static void gatherProcessorData(LOGICALPROCESSORDATA * p) { + + int CPUInfo[4] = { 0, 0, 0, 0 }; + __cpuid(CPUInfo, 0); + + p->nLargestStandardFunctionNumber = CPUInfo[0]; + + // Get the information associated with each valid Id + for (uint i = 0; i <= p->nLargestStandardFunctionNumber; ++i) { + __cpuid(CPUInfo, i); + + // Interpret CPU feature information. + if (i == 1) { + // Some of the bits of LocalApicId represent the CPU core + // within a processor and other bits represent the processor ID. + p->nLocalApicId = (CPUInfo[1] >> 24) & 0xff; + p->HTT = (CPUInfo[3] >> 28) & 0x1; + // recalculate later after 0x80000008 + p->nLogicalProcessorCount = (CPUInfo[1] >> 16) & 0x0FF; + } + } + + // Calling __cpuid with 0x80000000 as the InfoType argument + // gets the number of valid extended IDs. + __cpuid(CPUInfo, 0x80000000); + p->nLargestExtendedFunctionNumber = CPUInfo[0]; + + // Get the information associated with each extended ID. + for (uint i = 0x80000000; i <= p->nLargestExtendedFunctionNumber; ++i) { + __cpuid(CPUInfo, i); + if (i == 0x80000008) { + p->nApicIdCoreIdSize = (CPUInfo[2] >> 12) & 0xF; + p->nNC = (CPUInfo[2]) & 0x0FF; + } + } + + // MNC + // A value of zero for ApicIdCoreIdSize indicates that MNC is derived by this + // legacy formula: MNC = NC + 1 + // A non-zero value of ApicIdCoreIdSize means that MNC is 2^ApicIdCoreIdSize + if (p->nApicIdCoreIdSize) { + p->nMNC = 2; + for (uint j = p->nApicIdCoreIdSize - 1; j > 0; j--) { + p->nMNC = p->nMNC * 2; + } + } + else { + p->nMNC = p->nNC + 1; + } + + // If HTT==0, then LogicalProcessorCount is reserved, and the CPU contains + // one CPU core and the CPU core is single-threaded. + // If HTT==1 and CmpLegacy==1, LogicalProcessorCount represents the number of + // CPU cores per processor, where each CPU core is single-threaded. If HTT==1 + // and CmpLegacy==0, then LogicalProcessorCount is the number of threads per + // processor, which is the number of cores times the number of threads per core. + // The number of cores is NC+1. + p->nCPUCoresperProcessor = p->nNC + 1; + p->nThreadsperCPUCore = (p->HTT == 0 ? 1 : (p->CmpLegacy == 1 ? 1 : p->nLogicalProcessorCount / p->nCPUCoresperProcessor )); + + // Calculate a mask for the core IDs + uint mask = 1; + uint numbits = 1; + if (p->nApicIdCoreIdSize) { + numbits = p->nApicIdCoreIdSize; + for (uint j = p->nApicIdCoreIdSize; j > 1; j--) { + mask = (mask << 1) + 1; + } + } + p->nProcId = (p->nLocalApicId & ~mask) >> numbits; + p->nCoreId = p->nLocalApicId & mask; +} + + +uint nv::physicalProcessorCount() { + + uint lpc = logicalProcessorCount(); + + // Get info about each logical processor. + for (uint i = 0; i < lpc; i++) { + // Make sure thread doesn't change processor while we gather it's data. + lockThreadToProcessor(i); + + gatherProcessorData(&LogicalProcessorMap[i]); + } + + unlockThreadToProcessor(); + + memset(PhysProcIds, 0, sizeof(PhysProcIds)); + for (uint i = 0; i < lpc; i++) { + PhysProcIds[LogicalProcessorMap[i].nProcId]++; + } + + uint pc = 0; + for (uint i = 0; i < (MAX_NUMBER_OF_PHYSICAL_PROCESSORS + MAX_NUMBER_OF_IOAPICS); i++) { + if (PhysProcIds[i] != 0) { + pc++; + } + } + + return pc; +} + +#else + +uint nv::physicalProcessorCount() { + // @@ Assume the same. + return processorCount(); +} + +#endif Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/BlockCompressor.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/BlockCompressor.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/BlockCompressor.h @@ -0,0 +1,55 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_BLOCKCOMPRESSOR_H +#define NVTT_BLOCKCOMPRESSOR_H + +#include "Compressor.h" + + +namespace nv +{ + struct ColorBlock; + class Vector4; + + struct ColorBlockCompressor : public CompressorInterface + { + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; + virtual uint blockSize() const = 0; + }; + + struct FloatColorCompressor : public CompressorInterface + { + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; + virtual uint blockSize() const = 0; + }; + +} // nv namespace + + +#endif // NVTT_BLOCKCOMPRESSOR_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/BlockCompressor.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/BlockCompressor.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/BlockCompressor.cpp @@ -0,0 +1,335 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "BlockCompressor.h" +#include "OutputOptions.h" +#include "TaskDispatcher.h" + +#include "nvimage/Image.h" +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" + +#include "nvmath/Vector.inl" + +#include "nvcore/Memory.h" + +#include // placement new + + +using namespace nv; +using namespace nvtt; + +/* +// OpenMP +#if defined(HAVE_OPENMP) +#include +#endif + +void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + const uint bs = blockSize(); + const uint bw = (w + 3) / 4; + const uint bh = (h + 3) / 4; + +#if defined(HAVE_OPENMP) + bool singleThreaded = false; +#else + bool singleThreaded = true; +#endif + + // Use a single thread to compress small textures. + if (bw * bh < 16) singleThreaded = true; + + if (singleThreaded) + { + nvDebugCheck(bs <= 16); + uint8 mem[16]; // @@ Output one row at a time! + + for (int y = 0; y < int(h); y += 4) { + for (uint x = 0; x < w; x += 4) { + + ColorBlock rgba; + rgba.init(w, h, data, x, y); + + compressBlock(rgba, alphaMode, compressionOptions, mem); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(mem, bs); + } + } + } + } +#if defined(HAVE_OPENMP) + else + { + const uint size = bs * bw * bh; + uint8 * mem = new uint8[size]; + + #pragma omp parallel + { + #pragma omp for + for (int i = 0; i < int(bw*bh); i++) + { + const uint x = i % bw; + const uint y = i / bw; + + ColorBlock rgba; + rgba.init(w, h, data, 4*x, 4*y); + + uint8 * ptr = mem + (y * bw + x) * bs; + compressBlock(rgba, alphaMode, compressionOptions, ptr); + } // omp for + } // omp parallel + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(mem, size); + } + + delete [] mem; + } +#endif +} +*/ + + +struct CompressorContext +{ + nvtt::AlphaMode alphaMode; + uint w, h, d; + const float * data; + const nvtt::CompressionOptions::Private * compressionOptions; + + uint bw, bh, bs; + uint8 * mem; + CompressorInterface * compressor; +}; + + +// Each task compresses one block. +void ColorBlockCompressorTask(void * data, int i) +{ + CompressorContext * d = (CompressorContext *) data; + + uint x = i % d->bw; + uint y = i / d->bw; + + //for (uint x = 0; x < d->bw; x++) + { + ColorBlock rgba; + rgba.init(d->w, d->h, d->data, 4*x, 4*y); + + uint8 * ptr = d->mem + (y * d->bw + x) * d->bs; + ((ColorBlockCompressor *) d->compressor)->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr); + } +} + +void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + + CompressorContext context; + context.alphaMode = alphaMode; + context.w = w; + context.h = h; + context.d = d; + context.data = data; + context.compressionOptions = &compressionOptions; + + context.bs = blockSize(); + context.bw = (w + 3) / 4; + context.bh = (h + 3) / 4; + + context.compressor = this; + + SequentialTaskDispatcher sequential; + + // Use a single thread to compress small textures. + if (context.bh < 4) dispatcher = &sequential; + +#if _DEBUG + dispatcher = &sequential; +#endif + + const uint count = context.bw * context.bh; + const uint size = context.bs * count; + context.mem = new uint8[size]; + + dispatcher->dispatch(ColorBlockCompressorTask, &context, count); + + outputOptions.writeData(context.mem, size); + + delete [] context.mem; +} + + +#if 0 +// Each task compresses one block. +void ColorSetCompressorTask(void * data, int i) +{ + CompressorContext * d = (CompressorContext *) data; + + uint x = i % d->bw; + uint y = i / d->bw; + + //for (uint x = 0; x < d->bw; x++) + { + ColorSet set; + set.setColors(d->data, d->w, d->h, x * 4, y * 4); + + uint8 * ptr = d->mem + (y * d->bw + x) * d->bs; + ((ColorSetCompressor *)d->compressor)->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr); + } +} + + +void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + + CompressorContext context; + context.alphaMode = alphaMode; + context.w = w; + context.h = h; + context.data = data; + context.compressionOptions = &compressionOptions; + + context.bs = blockSize(); + context.bw = (w + 3) / 4; + context.bh = (h + 3) / 4; + + context.compressor = this; + + SequentialTaskDispatcher sequential; + + // Use a single thread to compress small textures. + if (context.bh < 4) dispatcher = &sequential; + +#if _DEBUG + dispatcher = &sequential; +#endif + + const uint count = context.bw * context.bh; + const uint size = context.bs * count; + context.mem = new uint8[size]; + + dispatcher->dispatch(ColorSetCompressorTask, &context, count); + + outputOptions.writeData(context.mem, size); + + delete [] context.mem; +} +#endif // 0 + + +// Each task compresses one block. +void FloatColorCompressorTask(void * data, int i) +{ + CompressorContext * d = (CompressorContext *) data; + + // Copy image to block. + const uint block_x = (i % d->bw); + const uint block_y = (i / d->bw); + + const uint src_x_offset = block_x * 4; + const uint src_y_offset = block_y * 4; + + const float * r = (const float *)d->data + d->w * d->h * d->d * 0; + const float * g = (const float *)d->data + d->w * d->h * d->d * 1; + const float * b = (const float *)d->data + d->w * d->h * d->d * 2; + const float * a = (const float *)d->data + d->w * d->h * d->d * 3; + + Vector4 colors[16]; + float weights[16]; + + const uint block_w = min(d->w - block_x * 4U, 4U); + const uint block_h = min(d->h - block_y * 4U, 4U); + + uint x, y; + for (y = 0; y < block_h; y++) { + for (x = 0; x < block_w; x++) { + uint dst_idx = 4 * y + x; + uint src_idx = (y + src_y_offset) * d->w + (x + src_x_offset); + colors[dst_idx].x = r[src_idx]; + colors[dst_idx].y = g[src_idx]; + colors[dst_idx].z = b[src_idx]; + colors[dst_idx].w = a[src_idx]; + weights[dst_idx] = (d->alphaMode == nvtt::AlphaMode_Transparency) ? a[src_idx] : 1.0f; + } + for (; x < 4; x++) { + uint dst_idx = 4 * y + x; + colors[dst_idx] = Vector4(0); + weights[dst_idx] = 0.0f; + } + } + for (; y < 4; y++) { + for (x = 0; x < 4; x++) { + uint dst_idx = 4 * y + x; + colors[dst_idx] = Vector4(0); + weights[dst_idx] = 0.0f; + } + } + + // Compress block. + uint8 * output = d->mem + (block_y * d->bw + block_x) * d->bs; + ((FloatColorCompressor *)d->compressor)->compressBlock(colors, weights, *d->compressionOptions, output); +} + + +void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); // @@ Add support for compressed 3D textures. + + CompressorContext context; + context.alphaMode = alphaMode; + context.w = w; + context.h = h; + context.d = d; + context.data = data; + context.compressionOptions = &compressionOptions; + + context.bs = blockSize(); + context.bw = (w + 3) / 4; + context.bh = (h + 3) / 4; + + context.compressor = this; + + SequentialTaskDispatcher sequential; + + // Use a single thread to compress small textures. + if (context.bh < 4) dispatcher = &sequential; + +#if _DEBUG + dispatcher = &sequential; +#endif + + const uint count = context.bw * context.bh; + const uint size = context.bs * count; + context.mem = new uint8[size]; + + dispatcher->dispatch(FloatColorCompressorTask, &context, count); + + outputOptions.writeData(context.mem, size); + + delete [] context.mem; +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CMakeLists.txt @@ -3,115 +3,64 @@ ADD_SUBDIRECTORY(squish) SET(NVTT_SRCS - nvtt.h - nvtt.cpp - Compressor.h - Compressor.cpp - nvtt_wrapper.h - nvtt_wrapper.cpp - CompressDXT.h - CompressDXT.cpp - CompressRGB.h - CompressRGB.cpp - QuickCompressDXT.h - QuickCompressDXT.cpp - OptimalCompressDXT.h - OptimalCompressDXT.cpp - SingleColorLookup.h - CompressionOptions.h - CompressionOptions.cpp - InputOptions.h - InputOptions.cpp - OutputOptions.h - OutputOptions.cpp - cuda/CudaUtils.h - cuda/CudaUtils.cpp - cuda/CudaMath.h - cuda/Bitmaps.h - cuda/CudaCompressDXT.h - cuda/CudaCompressDXT.cpp) - -IF(CUDA_FOUND) - ADD_DEFINITIONS(-DHAVE_CUDA) - WRAP_CUDA(CUDA_SRCS cuda/CompressKernel.cu) - SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS}) - SET(LIBS ${LIBS} ${CUDA_LIBRARIES}) - INCLUDE_DIRECTORIES(${CUDA_INCLUDE_PATH}) -ENDIF(CUDA_FOUND) + nvtt.h nvtt.cpp + nvtt_wrapper.h nvtt_wrapper.cpp + ClusterFit.h ClusterFit.cpp + Compressor.h + BlockCompressor.h BlockCompressor.cpp + CompressorDX9.h CompressorDX9.cpp + CompressorDX10.h CompressorDX10.cpp + CompressorDX11.h CompressorDX11.cpp + CompressorDXT1.h CompressorDXT1.cpp + CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp + CompressorRGB.h CompressorRGB.cpp + Context.h Context.cpp + QuickCompressDXT.h QuickCompressDXT.cpp + OptimalCompressDXT.h OptimalCompressDXT.cpp + SingleColorLookup.h SingleColorLookup.cpp + CompressionOptions.h CompressionOptions.cpp + InputOptions.h InputOptions.cpp + OutputOptions.h OutputOptions.cpp + TaskDispatcher.h #TaskDispatcher.cpp + Surface.h Surface.cpp + CubeSurface.h CubeSurface.cpp + cuda/CudaUtils.h cuda/CudaUtils.cpp + cuda/CudaMath.h + cuda/BitmapTable.h + cuda/CudaCompressorDXT.h cuda/CudaCompressorDXT.cpp) + +IF (CUDA_FOUND) + ADD_DEFINITIONS(-DHAVE_CUDA) + CUDA_COMPILE(CUDA_SRCS cuda/CompressKernel.cu) + SET(NVTT_SRCS ${NVTT_SRCS} ${CUDA_SRCS}) + SET(LIBS ${LIBS} ${CUDA_LIBRARIES}) + INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) +ENDIF (CUDA_FOUND) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) ADD_DEFINITIONS(-DNVTT_EXPORTS) IF(NVTT_SHARED) - IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") - SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,origin") - ENDIF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,origin") + ENDIF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") - ADD_DEFINITIONS(-DNVTT_SHARED=1) - ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS}) + ADD_DEFINITIONS(-DNVTT_SHARED=1) + ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS}) ELSE(NVTT_SHARED) - ADD_LIBRARY(nvtt ${NVTT_SRCS}) + ADD_LIBRARY(nvtt ${NVTT_SRCS}) ENDIF(NVTT_SHARED) -TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish) +TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath) INSTALL(TARGETS nvtt - RUNTIME DESTINATION ${BINDIR} - LIBRARY DESTINATION ${LIBDIR} - ARCHIVE DESTINATION ${LIBDIR}) + RUNTIME DESTINATION ${BINDIR} + LIBRARY DESTINATION ${LIBDIR} + ARCHIVE DESTINATION ${LIBDIR}) INSTALL(FILES nvtt.h DESTINATION include/nvtt) - -# test executables -ADD_EXECUTABLE(nvcompress tools/compress.cpp tools/cmdline.h) -TARGET_LINK_LIBRARIES(nvcompress nvcore nvmath nvimage nvtt) - -ADD_EXECUTABLE(nvdecompress tools/decompress.cpp tools/cmdline.h) -TARGET_LINK_LIBRARIES(nvdecompress nvcore nvmath nvimage) - -ADD_EXECUTABLE(nvddsinfo tools/ddsinfo.cpp tools/cmdline.h) -TARGET_LINK_LIBRARIES(nvddsinfo nvcore nvmath nvimage) - -ADD_EXECUTABLE(nvimgdiff tools/imgdiff.cpp tools/cmdline.h) -TARGET_LINK_LIBRARIES(nvimgdiff nvcore nvmath nvimage) - -ADD_EXECUTABLE(nvassemble tools/assemble.cpp tools/cmdline.h) -TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage) - -ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h) -TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage) - -ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h) -TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage) - -INSTALL(TARGETS nvcompress nvdecompress nvddsinfo nvimgdiff nvassemble nvzoom DESTINATION bin) - -# UI tools -IF(QT4_FOUND AND NOT MSVC) - SET(QT_USE_QTOPENGL TRUE) - INCLUDE_DIRECTORIES(${QT_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - - SET(SRCS - tools/main.cpp - tools/configdialog.h - tools/configdialog.cpp) - - SET(LIBS - nvtt - ${QT_QTCORE_LIBRARY} - ${QT_QTGUI_LIBRARY} - ${QT_QTOPENGL_LIBRARY}) - - QT4_WRAP_UI(UICS tools/configdialog.ui) - QT4_WRAP_CPP(MOCS tools/configdialog.h) - #QT4_ADD_RESOURCES(RCCS tools/configdialog.rc) - - ADD_EXECUTABLE(nvcompressui MACOSX_BUNDLE ${SRCS} ${UICS} ${MOCS}) - TARGET_LINK_LIBRARIES(nvcompressui ${LIBS}) - -ENDIF(QT4_FOUND AND NOT MSVC) - - +#ADD_SUBDIRECTORY(tools) +#ADD_SUBDIRECTORY(tests) Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/ClusterFit.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/ClusterFit.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/ClusterFit.h @@ -0,0 +1,83 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + Copyright (c) 2006 Ignacio Castano icastano@nvidia.com + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#ifndef NVTT_CLUSTERFIT_H +#define NVTT_CLUSTERFIT_H + +#include "nvmath/SimdVector.h" +#include "nvmath/Vector.h" +#include "nvcore/Memory.h" + +// Use SIMD version if altivec or SSE are available. +#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE) +//#define NVTT_USE_SIMD 0 + +namespace nv { + + struct ColorSet; + + class ClusterFit + { + public: + ClusterFit(); + + //void setColorSet(const ColorSet * set); + void setColorSet(const Vector3 * colors, const float * weights, int count); + + void setColorWeights(const Vector4 & w); + float bestError() const; + + bool compress3(Vector3 * start, Vector3 * end); + bool compress4(Vector3 * start, Vector3 * end); + + private: + + uint m_count; + + // IC: Color and weight arrays are larger than necessary to avoid compiler warning. + + #if NVTT_USE_SIMD + NV_ALIGN_16 SimdVector m_weighted[17]; // color | weight + SimdVector m_metric; // vec3 + SimdVector m_metricSqr; // vec3 + SimdVector m_xxsum; // color | weight + SimdVector m_xsum; // color | weight (wsum) + SimdVector m_besterror; // scalar + #else + Vector3 m_weighted[17]; + float m_weights[17]; + Vector3 m_metric; + Vector3 m_metricSqr; + Vector3 m_xxsum; + Vector3 m_xsum; + float m_wsum; + float m_besterror; + #endif + }; + +} // nv namespace + +#endif // NVTT_CLUSTERFIT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/ClusterFit.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/ClusterFit.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/ClusterFit.cpp @@ -0,0 +1,660 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + Copyright (c) 2006 Ignacio Castano icastano@nvidia.com + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include "ClusterFit.h" +#include "nvmath/Fitting.h" +#include "nvmath/Vector.inl" +#include "nvmath/ftoi.h" +#include "nvimage/ColorBlock.h" + +#include // FLT_MAX + +using namespace nv; + +ClusterFit::ClusterFit() +{ +} + +#if 0 // @@ Deprecate. Do not use color set directly. +void ClusterFit::setColorSet(const ColorSet * set) +{ + // initialise the best error +#if NVTT_USE_SIMD + m_besterror = SimdVector( FLT_MAX ); + Vector3 metric = m_metric.toVector3(); +#else + m_besterror = FLT_MAX; + Vector3 metric = m_metric; +#endif + + // cache some values + m_count = set->colorCount; + + Vector3 values[16]; + for (uint i = 0; i < m_count; i++) + { + values[i] = set->colors[i].xyz(); + } + + Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric); + //Vector3 principal = Fit::computePrincipalComponent_EigenSolver(m_count, values, set->weights, metric); + + // build the list of values + int order[16]; + float dps[16]; + for (uint i = 0; i < m_count; ++i) + { + dps[i] = dot(values[i], principal); + order[i] = i; + } + + // stable sort + for (uint i = 0; i < m_count; ++i) + { + for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j) + { + swap(dps[j], dps[j - 1]); + swap(order[j], order[j - 1]); + } + } + + // weight all the points +#if NVTT_USE_SIMD + m_xxsum = SimdVector( 0.0f ); + m_xsum = SimdVector( 0.0f ); +#else + m_xxsum = Vector3(0.0f); + m_xsum = Vector3(0.0f); + m_wsum = 0.0f; +#endif + + for (uint i = 0; i < m_count; ++i) + { + int p = order[i]; +#if NVTT_USE_SIMD + NV_ALIGN_16 Vector4 tmp(values[p], 1); + m_weighted[i] = SimdVector(tmp.component) * SimdVector(set->weights[p]); + m_xxsum += m_weighted[i] * m_weighted[i]; + m_xsum += m_weighted[i]; +#else + m_weighted[i] = values[p] * set->weights[p]; + m_xxsum += m_weighted[i] * m_weighted[i]; + m_xsum += m_weighted[i]; + m_weights[i] = set->weights[p]; + m_wsum += m_weights[i]; +#endif + } +} +#endif // 0 + + +void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count) +{ + // initialise the best error +#if NVTT_USE_SIMD + m_besterror = SimdVector( FLT_MAX ); + Vector3 metric = m_metric.toVector3(); +#else + m_besterror = FLT_MAX; + Vector3 metric = m_metric; +#endif + + m_count = count; + + Vector3 principal = Fit::computePrincipalComponent_PowerMethod(count, colors, weights, metric); + //Vector3 principal = Fit::computePrincipalComponent_EigenSolver(count, colors, weights, metric); + + // build the list of values + int order[16]; + float dps[16]; + for (uint i = 0; i < m_count; ++i) + { + dps[i] = dot(colors[i], principal); + order[i] = i; + } + + // stable sort + for (uint i = 0; i < m_count; ++i) + { + for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j) + { + swap(dps[j], dps[j - 1]); + swap(order[j], order[j - 1]); + } + } + + // weight all the points +#if NVTT_USE_SIMD + m_xxsum = SimdVector( 0.0f ); + m_xsum = SimdVector( 0.0f ); +#else + m_xxsum = Vector3(0.0f); + m_xsum = Vector3(0.0f); + m_wsum = 0.0f; +#endif + + for (uint i = 0; i < m_count; ++i) + { + int p = order[i]; +#if NVTT_USE_SIMD + NV_ALIGN_16 Vector4 tmp(colors[p], 1); + m_weighted[i] = SimdVector(tmp.component) * SimdVector(weights[p]); + m_xxsum += m_weighted[i] * m_weighted[i]; + m_xsum += m_weighted[i]; +#else + m_weighted[i] = colors[p] * weights[p]; + m_xxsum += m_weighted[i] * m_weighted[i]; + m_xsum += m_weighted[i]; + m_weights[i] = weights[p]; + m_wsum += m_weights[i]; +#endif + } +} + + + +void ClusterFit::setColorWeights(Vector4::Arg w) +{ +#if NVTT_USE_SIMD + NV_ALIGN_16 Vector4 tmp(w.xyz(), 1); + m_metric = SimdVector(tmp.component); +#else + m_metric = w.xyz(); +#endif + m_metricSqr = m_metric * m_metric; +} + +float ClusterFit::bestError() const +{ +#if NVTT_USE_SIMD + SimdVector x = m_xxsum * m_metricSqr; + SimdVector error = m_besterror + x.splatX() + x.splatY() + x.splatZ(); + return error.toFloat(); +#else + return m_besterror + dot(m_xxsum, m_metricSqr); +#endif + +} + +#if NVTT_USE_SIMD + +bool ClusterFit::compress3( Vector3 * start, Vector3 * end ) +{ + const int count = m_count; + const SimdVector one = SimdVector(1.0f); + const SimdVector zero = SimdVector(0.0f); + const SimdVector half(0.5f, 0.5f, 0.5f, 0.25f); + const SimdVector two = SimdVector(2.0); + const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f ); + const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); + + // declare variables + SimdVector beststart = SimdVector( 0.0f ); + SimdVector bestend = SimdVector( 0.0f ); + SimdVector besterror = SimdVector( FLT_MAX ); + + SimdVector x0 = zero; + + int b0 = 0, b1 = 0; + + // check all possible clusters for this total order + for( int c0 = 0; c0 <= count; c0++) + { + SimdVector x1 = zero; + + for( int c1 = 0; c1 <= count-c0; c1++) + { + const SimdVector x2 = m_xsum - x1 - x0; + + //Vector3 alphax_sum = x0 + x1 * 0.5f; + //float alpha2_sum = w0 + w1 * 0.25f; + const SimdVector alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum + const SimdVector alpha2_sum = alphax_sum.splatW(); + + //const Vector3 betax_sum = x2 + x1 * 0.5f; + //const float beta2_sum = w2 + w1 * 0.25f; + const SimdVector betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum + const SimdVector beta2_sum = betax_sum.splatW(); + + //const float alphabeta_sum = w1 * 0.25f; + const SimdVector alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum + + // const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); + + SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; + SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; + + // clamp to the grid + a = min( one, max( zero, a ) ); + b = min( one, max( zero, b ) ); + a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp; + b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp; + + // compute the error (we skip the constant xxsum) + SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); + SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 ); + SimdVector e4 = multiplyAdd( two, e3, e1 ); + + // apply the metric to the error term + SimdVector e5 = e4 * m_metricSqr; + SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); + + // keep the solution if it wins + if( compareAnyLessThan( error, besterror ) ) + { + besterror = error; + beststart = a; + bestend = b; + b0 = c0; + b1 = c1; + } + + x1 += m_weighted[c0+c1]; + } + + x0 += m_weighted[c0]; + } + + // save the block if necessary + if( compareAnyLessThan( besterror, m_besterror ) ) + { + + *start = beststart.toVector3(); + *end = bestend.toVector3(); + + // save the error + m_besterror = besterror; + + return true; + } + + return false; +} + +bool ClusterFit::compress4( Vector3 * start, Vector3 * end ) +{ + const int count = m_count; + const SimdVector one = SimdVector(1.0f); + const SimdVector zero = SimdVector(0.0f); + const SimdVector half = SimdVector(0.5f); + const SimdVector two = SimdVector(2.0); + const SimdVector onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); + const SimdVector twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); + const SimdVector twonineths = SimdVector( 2.0f/9.0f ); + const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f ); + const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); + + // declare variables + SimdVector beststart = SimdVector( 0.0f ); + SimdVector bestend = SimdVector( 0.0f ); + SimdVector besterror = SimdVector( FLT_MAX ); + + SimdVector x0 = zero; + int b0 = 0, b1 = 0, b2 = 0; + + // check all possible clusters for this total order + for( int c0 = 0; c0 <= count; c0++) + { + SimdVector x1 = zero; + + for( int c1 = 0; c1 <= count-c0; c1++) + { + SimdVector x2 = zero; + + for( int c2 = 0; c2 <= count-c0-c1; c2++) + { + const SimdVector x3 = m_xsum - x2 - x1 - x0; + + //const Vector3 alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); + //const float alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); + const SimdVector alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum + const SimdVector alpha2_sum = alphax_sum.splatW(); + + //const Vector3 betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f); + //const float beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); + const SimdVector betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum + const SimdVector beta2_sum = betax_sum.splatW(); + + //const float alphabeta_sum = (w1 + w2) * (2.0f/9.0f); + const SimdVector alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum + + //const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); + + SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; + SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; + + // clamp to the grid + a = min( one, max( zero, a ) ); + b = min( one, max( zero, b ) ); + a = truncate( multiplyAdd( grid, a, half ) ) * gridrcp; + b = truncate( multiplyAdd( grid, b, half ) ) * gridrcp; + + // compute the error (we skip the constant xxsum) + SimdVector e1 = multiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + SimdVector e2 = negativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); + SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 ); + SimdVector e4 = multiplyAdd( two, e3, e1 ); + +#if 1 + // apply the metric to the error term + SimdVector e5 = e4 * m_metricSqr; + SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ(); +#else + // @@ Is there a horizontal max SIMD instruction? + SimdVector error = e4.splatX() + e4.splatY() + e4.splatZ(); + error *= two; + error += max(max(e4.splatX(), e4.splatY()), e4.splatZ()); + error -= min(min(e4.splatX(), e4.splatY()), e4.splatZ()); + +#endif + + // keep the solution if it wins + if (compareAnyLessThan(error, besterror)) + { + besterror = error; + beststart = a; + bestend = b; + b0 = c0; + b1 = c1; + b2 = c2; + } + + x2 += m_weighted[c0+c1+c2]; + } + + x1 += m_weighted[c0+c1]; + } + + x0 += m_weighted[c0]; + } + + // save the block if necessary + if (compareAnyLessThan(besterror, m_besterror)) + { + *start = beststart.toVector3(); + *end = bestend.toVector3(); + + // save the error + m_besterror = besterror; + + return true; + } + + return false; +} + +#else + +inline Vector3 round565(const Vector3 & v) { + uint r = ftoi_trunc(v.x * 31.0f); + float r0 = float(((r+0) << 3) | ((r+0) >> 2)); + float r1 = float(((r+1) << 3) | ((r+1) >> 2)); + if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U); + r = (r << 3) | (r >> 2); + + uint g = ftoi_trunc(v.y * 63.0f); + float g0 = float(((g+0) << 2) | ((g+0) >> 4)); + float g1 = float(((g+1) << 2) | ((g+1) >> 4)); + if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U); + g = (g << 2) | (g >> 4); + + uint b = ftoi_trunc(v.z * 31.0f); + float b0 = float(((b+0) << 3) | ((b+0) >> 2)); + float b1 = float(((b+1) << 3) | ((b+1) >> 2)); + if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U); + + b = (b << 3) | (b >> 2); + + return Vector3(float(r)/255, float(g)/255, float(b)/255); +} + +bool ClusterFit::compress3(Vector3 * start, Vector3 * end) +{ + const uint count = m_count; + const Vector3 grid( 31.0f, 63.0f, 31.0f ); + const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + + // declare variables + Vector3 beststart( 0.0f ); + Vector3 bestend( 0.0f ); + float besterror = FLT_MAX; + + Vector3 x0(0.0f); + float w0 = 0.0f; + + int b0 = 0, b1 = 0; + + // check all possible clusters for this total order + for (uint c0 = 0; c0 <= count; c0++) + { + Vector3 x1(0.0f); + float w1 = 0.0f; + + for (uint c1 = 0; c1 <= count-c0; c1++) + { + float w2 = m_wsum - w0 - w1; + + // These factors could be entirely precomputed. + float const alpha2_sum = w0 + w1 * 0.25f; + float const beta2_sum = w2 + w1 * 0.25f; + float const alphabeta_sum = w1 * 0.25f; + float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + Vector3 const alphax_sum = x0 + x1 * 0.5f; + Vector3 const betax_sum = m_xsum - alphax_sum; + + Vector3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; + Vector3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; + + // clamp to the grid + a = clamp(a, 0, 1); + b = clamp(b, 0, 1); +#if 1 + a = floor(grid * a + 0.5f) * gridrcp; + b = floor(grid * b + 0.5f) * gridrcp; +#else + + //int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f; + //int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f; + //int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f; + //int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f; + //int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f; + //int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f; + + /*a = floor(a * grid + 0.5f); + a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f; + a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f; + a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f; + + b = floor(b * grid + 0.5f); + b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f; + b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f; + b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f;*/ + + a = round565(a); + b = round565(b); +#endif + + // compute the error + Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); + + // apply the metric to the error term + float error = dot(e1, m_metricSqr); + + // keep the solution if it wins + if (error < besterror) + { + besterror = error; + beststart = a; + bestend = b; + b0 = c0; + b1 = c1; + } + + x1 += m_weighted[c0+c1]; + w1 += m_weights[c0+c1]; + } + + x0 += m_weighted[c0]; + w0 += m_weights[c0]; + } + + // save the block if necessary + if( besterror < m_besterror ) + { + + *start = beststart; + *end = bestend; + + // save the error + m_besterror = besterror; + + return true; + } + + return false; +} + +bool ClusterFit::compress4(Vector3 * start, Vector3 * end) +{ + const uint count = m_count; + const Vector3 grid( 31.0f, 63.0f, 31.0f ); + const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + + // declare variables + Vector3 beststart( 0.0f ); + Vector3 bestend( 0.0f ); + float besterror = FLT_MAX; + + Vector3 x0(0.0f); + float w0 = 0.0f; + int b0 = 0, b1 = 0, b2 = 0; + + // check all possible clusters for this total order + for (uint c0 = 0; c0 <= count; c0++) + { + Vector3 x1(0.0f); + float w1 = 0.0f; + + for (uint c1 = 0; c1 <= count-c0; c1++) + { + Vector3 x2(0.0f); + float w2 = 0.0f; + + for (uint c2 = 0; c2 <= count-c0-c1; c2++) + { + float w3 = m_wsum - w0 - w1 - w2; + + float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); + float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); + float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); + float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); + Vector3 const betax_sum = m_xsum - alphax_sum; + + Vector3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; + Vector3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; + + // clamp to the grid + a = clamp(a, 0, 1); + b = clamp(b, 0, 1); +#if 0 + a = floor(a * grid + 0.5f) * gridrcp; + b = floor(b * grid + 0.5f) * gridrcp; +#else + //int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f; + //int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f; + //int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f; + //int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f; + //int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f; + //int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f; + + /* + a = floor(a * grid + 0.5f); + a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f; + a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f; + a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f; + + b = floor(b * grid + 0.5f); + b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f; + b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f; + b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f; + */ + + a = round565(a); + b = round565(b); +#endif + // @@ It would be much more accurate to evaluate the error exactly. + + // compute the error + Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); + + // apply the metric to the error term + float error = dot( e1, m_metricSqr ); + + // keep the solution if it wins + if (error < besterror) + { + besterror = error; + beststart = a; + bestend = b; + b0 = c0; + b1 = c1; + b2 = c2; + } + + x2 += m_weighted[c0+c1+c2]; + w2 += m_weights[c0+c1+c2]; + } + + x1 += m_weighted[c0+c1]; + w1 += m_weights[c0+c1]; + } + + x0 += m_weighted[c0]; + w0 += m_weights[c0]; + } + + // save the block if necessary + if (besterror < m_besterror) + { + *start = beststart; + *end = bestend; + + // save the error + m_besterror = besterror; + + return true; + } + + return false; +} + +#endif // NVTT_USE_SIMD Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressDXT.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressDXT.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressDXT.h @@ -1,87 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_COMPRESSDXT_H -#define NV_TT_COMPRESSDXT_H - -#include -#include "nvtt.h" - -namespace nv -{ - class Image; - class FloatImage; - - class FastCompressor - { - public: - FastCompressor(); - ~FastCompressor(); - - void setImage(const Image * image, nvtt::AlphaMode alphaMode); - - void compressDXT1(const nvtt::OutputOptions::Private & outputOptions); - void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions); - void compressDXT3(const nvtt::OutputOptions::Private & outputOptions); - void compressDXT5(const nvtt::OutputOptions::Private & outputOptions); - void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions); - - private: - const Image * m_image; - nvtt::AlphaMode m_alphaMode; - }; - - class SlowCompressor - { - public: - SlowCompressor(); - ~SlowCompressor(); - - void setImage(const Image * image, nvtt::AlphaMode alphaMode); - - void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - - private: - const Image * m_image; - nvtt::AlphaMode m_alphaMode; - }; - - // External compressors. -#if defined(HAVE_S3QUANT) - void s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions); -#endif - -#if defined(HAVE_ATITC) - void atiCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions); -#endif - -} // nv namespace - - -#endif // NV_TT_COMPRESSDXT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressDXT.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressDXT.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressDXT.cpp @@ -1,597 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include - -#include -#include -#include - -#include "nvtt.h" -#include "CompressDXT.h" -#include "QuickCompressDXT.h" -#include "OptimalCompressDXT.h" -#include "CompressionOptions.h" -#include "OutputOptions.h" - -// squish -#include "squish/colourset.h" -//#include "squish/clusterfit.h" -#include "squish/fastclusterfit.h" -#include "squish/weightedclusterfit.h" - - -// s3_quant -#if defined(HAVE_S3QUANT) -#include "s3tc/s3_quant.h" -#endif - -// ati tc -#if defined(HAVE_ATITC) -#include "atitc/ATI_Compress.h" -#endif - -//#include - -using namespace nv; -using namespace nvtt; - - -nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None) -{ -} - -nv::FastCompressor::~FastCompressor() -{ -} - -void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode) -{ - m_image = image; - m_alphaMode = alphaMode; -} - -void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT1 block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - rgba.init(m_image, x, y); - - QuickCompress::compressDXT1(rgba, &block); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT1 block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - rgba.init(m_image, x, y); - - QuickCompress::compressDXT1a(rgba, &block); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT3 block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - rgba.init(m_image, x, y); - - QuickCompress::compressDXT3(rgba, &block); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT5 block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - rgba.init(m_image, x, y); - - QuickCompress::compressDXT5(rgba, &block, 0); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT5 block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - rgba.init(m_image, x, y); - - rgba.swizzleDXT5n(); - - QuickCompress::compressDXT5(rgba, &block, 0); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None) -{ -} - -nv::SlowCompressor::~SlowCompressor() -{ -} - -void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode) -{ - m_image = image; - m_alphaMode = alphaMode; -} - -void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT1 block; - - squish::WeightedClusterFit fit; - //squish::ClusterFit fit; - //squish::FastClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - - rgba.init(m_image, x, y); - - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block); - } - else - { - squish::ColourSet colours((uint8 *)rgba.colors(), 0, true); - fit.SetColourSet(&colours, squish::kDxt1); - fit.Compress(&block); - } - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT1 block; - - squish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - - rgba.init(m_image, x, y); - - bool anyAlpha = false; - bool allAlpha = true; - - for (uint i = 0; i < 16; i++) - { - if (rgba.color(i).a < 128) anyAlpha = true; - else allAlpha = false; - } - - if ((!anyAlpha && rgba.isSingleColor() || allAlpha)) - { - OptimalCompress::compressDXT1a(rgba.color(0), &block); - } - else - { - squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha); - fit.SetColourSet(&colours, squish::kDxt1); - fit.Compress(&block); - } - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT3 block; - - squish::WeightedClusterFit fit; - //squish::FastClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - - rgba.init(m_image, x, y); - - // Compress explicit alpha. - OptimalCompress::compressDXT3A(rgba, &block.alpha); - - // Compress color. - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block.color); - } - else - { - squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha); - fit.SetColourSet(&colours, 0); - fit.Compress(&block.color); - } - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - -void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT5 block; - - squish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - - rgba.init(m_image, x, y); - - // Compress alpha. - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT5A(rgba, &block.alpha); - } - else - { - QuickCompress::compressDXT5A(rgba, &block.alpha); - } - - // Compress color. - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block.color); - } - else - { - squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha); - fit.SetColourSet(&colours, 0); - fit.Compress(&block.color); - } - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - BlockDXT5 block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - - rgba.init(m_image, x, y); - - rgba.swizzleDXT5n(); - - // Compress X. - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT5A(rgba, &block.alpha); - } - else - { - QuickCompress::compressDXT5A(rgba, &block.alpha); - } - - // Compress Y. - OptimalCompress::compressDXT1G(rgba, &block.color); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock rgba; - AlphaBlockDXT5 block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - - rgba.init(m_image, x, y); - - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT5A(rgba, &block); - } - else - { - QuickCompress::compressDXT5A(rgba, &block); - } - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - const uint w = m_image->width(); - const uint h = m_image->height(); - - ColorBlock xcolor; - ColorBlock ycolor; - - BlockATI2 block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - - xcolor.init(m_image, x, y); - xcolor.splatX(); - - ycolor.init(m_image, x, y); - ycolor.splatY(); - - if (compressionOptions.quality == Quality_Highest) - { - OptimalCompress::compressDXT5A(xcolor, &block.x); - OptimalCompress::compressDXT5A(ycolor, &block.y); - } - else - { - QuickCompress::compressDXT5A(xcolor, &block.x); - QuickCompress::compressDXT5A(ycolor, &block.y); - } - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&block, sizeof(block)); - } - } - } -} - - -#if defined(HAVE_S3QUANT) - -void nv::s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions) -{ - const uint w = image->width(); - const uint h = image->height(); - - float error = 0.0f; - - BlockDXT1 dxtBlock3; - BlockDXT1 dxtBlock4; - ColorBlock block; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4) { - block.init(image, x, y); - - // Init rgb block. - RGBBlock rgbBlock; - rgbBlock.n = 16; - for (uint i = 0; i < 16; i++) { - rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f); - rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f); - rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f); - } - rgbBlock.weight[0] = 1.0f; - rgbBlock.weight[1] = 1.0f; - rgbBlock.weight[2] = 1.0f; - - rgbBlock.inLevel = 4; - CodeRGBBlock(&rgbBlock); - - // Copy results to DXT block. - dxtBlock4.col0.r = rgbBlock.endPoint[0][0]; - dxtBlock4.col0.g = rgbBlock.endPoint[0][1]; - dxtBlock4.col0.b = rgbBlock.endPoint[0][2]; - - dxtBlock4.col1.r = rgbBlock.endPoint[1][0]; - dxtBlock4.col1.g = rgbBlock.endPoint[1][1]; - dxtBlock4.col1.b = rgbBlock.endPoint[1][2]; - - dxtBlock4.setIndices(rgbBlock.index); - - if (dxtBlock4.col0.u < dxtBlock4.col1.u) { - swap(dxtBlock4.col0.u, dxtBlock4.col1.u); - dxtBlock4.indices ^= 0x55555555; - } - - uint error4 = blockError(block, dxtBlock4); - - rgbBlock.inLevel = 3; - - CodeRGBBlock(&rgbBlock); - - // Copy results to DXT block. - dxtBlock3.col0.r = rgbBlock.endPoint[0][0]; - dxtBlock3.col0.g = rgbBlock.endPoint[0][1]; - dxtBlock3.col0.b = rgbBlock.endPoint[0][2]; - - dxtBlock3.col1.r = rgbBlock.endPoint[1][0]; - dxtBlock3.col1.g = rgbBlock.endPoint[1][1]; - dxtBlock3.col1.b = rgbBlock.endPoint[1][2]; - - dxtBlock3.setIndices(rgbBlock.index); - - if (dxtBlock3.col0.u > dxtBlock3.col1.u) { - swap(dxtBlock3.col0.u, dxtBlock3.col1.u); - dxtBlock3.indices ^= (~dxtBlock3.indices >> 1) & 0x55555555; - } - - uint error3 = blockError(block, dxtBlock3); - - if (error3 < error4) { - error += error3; - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3)); - } - } - else { - error += error4; - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4)); - } - } - } - } - - printf("error = %f\n", error/((w+3)/4 * (h+3)/4)); -} - -#endif // defined(HAVE_S3QUANT) - - -#if defined(HAVE_ATITC) - -void nv::atiCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions) -{ - // Init source texture - ATI_TC_Texture srcTexture; - srcTexture.dwSize = sizeof(srcTexture); - srcTexture.dwWidth = image->width(); - srcTexture.dwHeight = image->height(); - srcTexture.dwPitch = image->width() * 4; - srcTexture.format = ATI_TC_FORMAT_ARGB_8888; - srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); - srcTexture.pData = (ATI_TC_BYTE*) image->pixels(); - - // Init dest texture - ATI_TC_Texture destTexture; - destTexture.dwSize = sizeof(destTexture); - destTexture.dwWidth = image->width(); - destTexture.dwHeight = image->height(); - destTexture.dwPitch = 0; - destTexture.format = ATI_TC_FORMAT_DXT1; - destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); - destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); - - // Compress - ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); - } -} - -#endif // defined(HAVE_ATITC) Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressRGB.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressRGB.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressRGB.h @@ -1,39 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_COMPRESSRGB_H -#define NV_TT_COMPRESSRGB_H - -#include "nvtt.h" - -namespace nv -{ - class Image; - - // Pixel format converter. - void compressRGB(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); - -} // nv namespace - - -#endif // NV_TT_COMPRESSDXT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressRGB.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressRGB.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressRGB.cpp @@ -1,140 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include - -#include -#include -#include - -#include "CompressRGB.h" -#include "CompressionOptions.h" -#include "OutputOptions.h" - -using namespace nv; -using namespace nvtt; - -namespace -{ - - inline uint computePitch(uint w, uint bitsize) - { - uint p = w * ((bitsize + 7) / 8); - - // Align to 32 bits. - return ((p + 3) / 4) * 4; - } - - inline void convert_to_a8r8g8b8(const void * src, void * dst, uint w) - { - memcpy(dst, src, 4 * w); - } - - inline void convert_to_x8r8g8b8(const void * src, void * dst, uint w) - { - memcpy(dst, src, 4 * w); - } - -} // namespace - - -// Pixel format converter. -void nv::compressRGB(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions) -{ - nvCheck(image != NULL); - - const uint w = image->width(); - const uint h = image->height(); - - const uint bitCount = compressionOptions.bitcount; - nvCheck(bitCount == 8 || bitCount == 16 || bitCount == 24 || bitCount == 32); - - const uint byteCount = bitCount / 8; - - const uint rmask = compressionOptions.rmask; - uint rshift, rsize; - PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize); - - const uint gmask = compressionOptions.gmask; - uint gshift, gsize; - PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize); - - const uint bmask = compressionOptions.bmask; - uint bshift, bsize; - PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize); - - const uint amask = compressionOptions.amask; - uint ashift, asize; - PixelFormat::maskShiftAndSize(amask, &ashift, &asize); - - // Determine pitch. - uint pitch = computePitch(w, compressionOptions.bitcount); - - uint8 * dst = (uint8 *)::malloc(pitch + 4); - - for (uint y = 0; y < h; y++) - { - const Color32 * src = image->scanline(y); - - if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0xFF000000) - { - convert_to_a8r8g8b8(src, dst, w); - } - else if (bitCount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0) - { - convert_to_x8r8g8b8(src, dst, w); - } - else - { - // Generic pixel format conversion. - for (uint x = 0; x < w; x++) - { - uint c = 0; - c |= PixelFormat::convert(src[x].r, 8, rsize) << rshift; - c |= PixelFormat::convert(src[x].g, 8, gsize) << gshift; - c |= PixelFormat::convert(src[x].b, 8, bsize) << bshift; - c |= PixelFormat::convert(src[x].a, 8, asize) << ashift; - - // Output one byte at a time. - for (uint i = 0; i < byteCount; i++) - { - *(dst + x * byteCount + i) = (c >> (i * 8)) & 0xFF; - } - } - - // Zero padding. - for (uint x = w * byteCount; x < pitch; x++) - { - *(dst + x) = 0; - } - } - - if (outputOptions.outputHandler != NULL) - { - outputOptions.outputHandler->writeData(dst, pitch); - } - } - - ::free(dst); -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressionOptions.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressionOptions.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressionOptions.h @@ -1,61 +1,80 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_COMPRESSIONOPTIONS_H -#define NV_TT_COMPRESSIONOPTIONS_H - -#include -#include -#include "nvtt.h" - -namespace nvtt -{ - - struct CompressionOptions::Private - { - Format format; - - Quality quality; - - nv::Vector4 colorWeight; - - // Pixel format description. - uint bitcount; - uint rmask; - uint gmask; - uint bmask; - uint amask; - - nv::String externalCompressor; - - // Quantization. - bool enableColorDithering; - bool enableAlphaDithering; - bool binaryAlpha; - int alphaThreshold; // reference value used for binary alpha quantization. - }; - -} // nvtt namespace - - -#endif // NV_TT_COMPRESSIONOPTIONS_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NV_TT_COMPRESSIONOPTIONS_H +#define NV_TT_COMPRESSIONOPTIONS_H + +#include "nvtt.h" +#include "nvmath/Vector.h" +#include "nvcore/StrLib.h" + +namespace nvtt +{ + + struct CompressionOptions::Private + { + Format format; + + Quality quality; + + nv::Vector4 colorWeight; + + // Pixel format description. + uint bitcount; + uint rmask; + uint gmask; + uint bmask; + uint amask; + uint8 rsize; + uint8 gsize; + uint8 bsize; + uint8 asize; + + PixelType pixelType; + uint pitchAlignment; + + nv::String externalCompressor; + + // Quantization. + bool enableColorDithering; + bool enableAlphaDithering; + bool binaryAlpha; + int alphaThreshold; // reference value used for binary alpha quantization. + + Decoder decoder; + + uint getBitCount() const + { + if (format == Format_RGBA) { + if (bitcount != 0) return bitcount; + else return rsize + gsize + bsize + asize; + } + return 0; + } + }; + +} // nvtt namespace + + +#endif // NV_TT_COMPRESSIONOPTIONS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressionOptions.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressionOptions.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressionOptions.cpp @@ -1,143 +1,273 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "nvtt.h" -#include "CompressionOptions.h" - -using namespace nv; -using namespace nvtt; - - -/// Constructor. Sets compression options to the default values. -CompressionOptions::CompressionOptions() : m(*new CompressionOptions::Private()) -{ - reset(); -} - - -/// Destructor. -CompressionOptions::~CompressionOptions() -{ - delete &m; -} - - -/// Set default compression options. -void CompressionOptions::reset() -{ - m.format = Format_DXT1; - m.quality = Quality_Normal; - m.colorWeight.set(1.0f, 1.0f, 1.0f, 1.0f); - - m.bitcount = 32; - m.bmask = 0x000000FF; - m.gmask = 0x0000FF00; - m.rmask = 0x00FF0000; - m.amask = 0xFF000000; - - m.enableColorDithering = false; - m.enableAlphaDithering = false; - m.binaryAlpha = false; - m.alphaThreshold = 127; -} - - -/// Set desired compression format. -void CompressionOptions::setFormat(Format format) -{ - m.format = format; -} - - -/// Set compression quality settings. -void CompressionOptions::setQuality(Quality quality) -{ - m.quality = quality; -} - - -/// Set the weights of each color channel. -/// The choice for these values is subjective. In many case uniform color weights -/// (1.0, 1.0, 1.0) work very well. A popular choice is to use the NTSC luma encoding -/// weights (0.2126, 0.7152, 0.0722), but I think that blue contributes to our -/// perception more than a 7%. A better choice in my opinion is (3, 4, 2). -void CompressionOptions::setColorWeights(float red, float green, float blue, float alpha/*=1.0f*/) -{ -// float total = red + green + blue; -// float x = red / total; -// float y = green / total; -// m.colorWeight.set(x, y, 1.0f - x - y); - m.colorWeight.set(red, green, blue, alpha); -} - - -/// Set color mask to describe the RGB/RGBA format. -void CompressionOptions::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask) -{ - // Validate arguments. - nvCheck(bitcount == 8 || bitcount == 16 || bitcount == 24 || bitcount == 32); - nvCheck((rmask & gmask) == 0); - nvCheck((rmask & bmask) == 0); - nvCheck((rmask & amask) == 0); - nvCheck((gmask & bmask) == 0); - nvCheck((gmask & amask) == 0); - nvCheck((bmask & amask) == 0); - - if (bitcount != 32) - { - uint maxMask = (1 << bitcount); - nvCheck(maxMask > rmask); - nvCheck(maxMask > gmask); - nvCheck(maxMask > bmask); - nvCheck(maxMask > amask); - } - - m.bitcount = bitcount; - m.rmask = rmask; - m.gmask = gmask; - m.bmask = bmask; - m.amask = amask; -} - -/// Use external compressor. -void CompressionOptions::setExternalCompressor(const char * name) -{ - m.externalCompressor = name; -} - -/// Set quantization options. -/// @warning Do not enable dithering unless you know what you are doing. Quantization -/// introduces errors. It's better to let the compressor quantize the result to -/// minimize the error, instead of quantizing the data before handling it to -/// the compressor. -void CompressionOptions::setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold/*= 127*/) -{ - nvCheck(alphaThreshold >= 0 && alphaThreshold < 256); - m.enableColorDithering = colorDithering; - m.enableAlphaDithering = alphaDithering; - m.binaryAlpha = binaryAlpha; - m.alphaThreshold = alphaThreshold; -} - - - +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "CompressionOptions.h" +#include "nvimage/DirectDrawSurface.h" +#include "nvmath/Vector.inl" + +using namespace nv; +using namespace nvtt; + + +/// Constructor. Sets compression options to the default values. +CompressionOptions::CompressionOptions() : m(*new CompressionOptions::Private()) +{ + reset(); +} + + +/// Destructor. +CompressionOptions::~CompressionOptions() +{ + delete &m; +} + + +/// Set default compression options. +void CompressionOptions::reset() +{ + m.format = Format_DXT1; + m.quality = Quality_Normal; + m.colorWeight.set(1.0f, 1.0f, 1.0f, 1.0f); + + m.bitcount = 32; + m.bmask = 0x000000FF; + m.gmask = 0x0000FF00; + m.rmask = 0x00FF0000; + m.amask = 0xFF000000; + + m.rsize = 8; + m.gsize = 8; + m.bsize = 8; + m.asize = 8; + + m.pixelType = PixelType_UnsignedNorm; + m.pitchAlignment = 1; + + m.enableColorDithering = false; + m.enableAlphaDithering = false; + m.binaryAlpha = false; + m.alphaThreshold = 127; + + m.decoder = Decoder_D3D10; +} + + +/// Set desired compression format. +void CompressionOptions::setFormat(Format format) +{ + m.format = format; +} + + +/// Set compression quality settings. +void CompressionOptions::setQuality(Quality quality) +{ + m.quality = quality; +} + + +/// Set the weights of each color channel. +/// The choice for these values is subjective. In most cases uniform color weights +/// (1.0, 1.0, 1.0) work very well. A popular choice is to use the NTSC luma encoding +/// weights (0.2126, 0.7152, 0.0722), but I think that blue contributes to our +/// perception more than a 7%. A better choice in my opinion is (3, 4, 2). +void CompressionOptions::setColorWeights(float red, float green, float blue, float alpha/*=1.0f*/) +{ +// float total = red + green + blue; +// float x = red / total; +// float y = green / total; +// m.colorWeight.set(x, y, 1.0f - x - y); + m.colorWeight.set(red, green, blue, alpha); +} + + +/// Set color mask to describe the RGB/RGBA format. +void CompressionOptions::setPixelFormat(uint bitCount, uint rmask, uint gmask, uint bmask, uint amask) +{ + // Validate arguments. + nvCheck(bitCount <= 32); + nvCheck((rmask & gmask) == 0); + nvCheck((rmask & bmask) == 0); + nvCheck((rmask & amask) == 0); + nvCheck((gmask & bmask) == 0); + nvCheck((gmask & amask) == 0); + nvCheck((bmask & amask) == 0); + + if (bitCount != 32) + { + uint maxMask = (1 << bitCount); + nvCheck(maxMask > rmask); + nvCheck(maxMask > gmask); + nvCheck(maxMask > bmask); + nvCheck(maxMask > amask); + } + + m.bitcount = bitCount; + m.rmask = rmask; + m.gmask = gmask; + m.bmask = bmask; + m.amask = amask; + + m.rsize = 0; + m.gsize = 0; + m.bsize = 0; + m.asize = 0; +} + +void CompressionOptions::setPixelFormat(uint8 rsize, uint8 gsize, uint8 bsize, uint8 asize) +{ + nvCheck(rsize <= 32 && gsize <= 32 && bsize <= 32 && asize <= 32); + + m.bitcount = 0; + m.rmask = 0; + m.gmask = 0; + m.bmask = 0; + m.amask = 0; + + m.rsize = rsize; + m.gsize = gsize; + m.bsize = bsize; + m.asize = asize; +} + +/// Set pixel type. +void CompressionOptions::setPixelType(PixelType pixelType) +{ + m.pixelType = pixelType; +} + + +/// Set pitch alignment in bytes. +void CompressionOptions::setPitchAlignment(int pitchAlignment) +{ + nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(pitchAlignment)); + m.pitchAlignment = pitchAlignment; +} + + +/// Use external compressor. +void CompressionOptions::setExternalCompressor(const char * name) +{ + m.externalCompressor = name; +} + +/// Set quantization options. +/// @warning Do not enable dithering unless you know what you are doing. Quantization +/// introduces errors. It's better to let the compressor quantize the result to +/// minimize the error, instead of quantizing the data before handling it to +/// the compressor. +void CompressionOptions::setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold/*= 127*/) +{ + nvCheck(alphaThreshold >= 0 && alphaThreshold < 256); + m.enableColorDithering = colorDithering; + m.enableAlphaDithering = alphaDithering; + m.binaryAlpha = binaryAlpha; + m.alphaThreshold = alphaThreshold; +} + +/// Set target decoder to optimize for. +void CompressionOptions::setTargetDecoder(Decoder decoder) +{ + m.decoder = decoder; +} + + + +// Translate to and from D3D formats. +unsigned int CompressionOptions::d3d9Format() const +{ + if (m.format == Format_RGB) { + if (m.pixelType == PixelType_UnsignedNorm) { + + uint bitcount = m.bitcount; + uint rmask = m.rmask; + uint gmask = m.gmask; + uint bmask = m.bmask; + uint amask = m.amask; + + if (bitcount == 0) { + bitcount = m.rsize + m.gsize + m.bsize + m.asize; + rmask = ((1 << m.rsize) - 1) << (m.asize + m.bsize + m.gsize); + gmask = ((1 << m.gsize) - 1) << (m.asize + m.bsize); + bmask = ((1 << m.bsize) - 1) << m.asize; + amask = ((1 << m.asize) - 1) << 0; + } + + if (bitcount <= 32) { + return nv::findD3D9Format(bitcount, rmask, gmask, bmask, amask); + } + else { + //if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return D3DFMT_G16R16; + if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return D3DFMT_A16B16G16R16; + } + } + else if (m.pixelType == PixelType_Float) { + if (m.rsize == 16 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return D3DFMT_R16F; + if (m.rsize == 32 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return D3DFMT_R32F; + if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return D3DFMT_G16R16F; + if (m.rsize == 32 && m.gsize == 32 && m.bsize == 0 && m.asize == 0) return D3DFMT_G32R32F; + if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return D3DFMT_A16B16G16R16F; + if (m.rsize == 32 && m.gsize == 32 && m.bsize == 32 && m.asize == 32) return D3DFMT_A32B32G32R32F; + } + + return 0; + } + else { + uint d3d9_formats[] = { + 0, // Format_RGB, + FOURCC_DXT1, // Format_DXT1 + FOURCC_DXT1, // Format_DXT1a + FOURCC_DXT3, // Format_DXT3 + FOURCC_DXT5, // Format_DXT5 + FOURCC_DXT5, // Format_DXT5n + FOURCC_ATI1, // Format_BC4 + FOURCC_ATI2, // Format_BC5 + FOURCC_DXT1, // Format_DXT1n + 0, // Format_CTX1 + MAKEFOURCC('B', 'C', '6', 'H'), // Format_BC6 + MAKEFOURCC('B', 'C', '7', 'L'), // Format_BC7 + //FOURCC_ATI2, // Format_BC5_Luma + FOURCC_DXT5, // Format_BC3_RGBM + }; + + NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count); + + return d3d9_formats[m.format]; + } +} + +/* +bool CompressionOptions::setDirect3D9Format(unsigned int format) +{ +} + +unsigned int CompressionOptions::dxgiFormat() const +{ +} + +bool CompressionOptions::setDXGIFormat(unsigned int format) +{ +} +*/ Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/Compressor.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/Compressor.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/Compressor.h @@ -1,80 +1,41 @@ -// Copyright NVIDIA Corporation 2008 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_COMPRESSOR_H -#define NV_TT_COMPRESSOR_H - -#include - -#include - -#include "nvtt.h" - -namespace nv -{ - class Image; -} - -namespace nvtt -{ - struct Mipmap; - - struct Compressor::Private - { - Private() {} - - bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; - int estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const; - - private: - - bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; - bool compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; - - bool initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const; - - int findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const; - int findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const; - - void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const; - void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const; - void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const; - void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const; - bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; - - - - public: - - bool cudaSupported; - bool cudaEnabled; - int cudaDevice; - - nv::AutoPtr cuda; - - }; - -} // nvtt namespace - - -#endif // NV_TT_COMPRESSOR_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_COMPRESSOR_H +#define NVTT_COMPRESSOR_H + +#include "nvtt.h" +#include "nvcore/nvcore.h" // uint + +namespace nv +{ + struct CompressorInterface + { + virtual ~CompressorInterface() {} + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0; + }; + +} // nv namespace + +#endif // NVTT_COMPRESSOR_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/Compressor.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/Compressor.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/Compressor.cpp @@ -1,853 +0,0 @@ -// Copyright NVIDIA Corporation 2008 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Compressor.h" -#include "InputOptions.h" -#include "CompressionOptions.h" -#include "OutputOptions.h" - -#include "CompressDXT.h" -#include "CompressRGB.h" -#include "cuda/CudaUtils.h" -#include "cuda/CudaCompressDXT.h" - - -using namespace nv; -using namespace nvtt; - - -namespace -{ - - static int blockSize(Format format) - { - if (format == Format_DXT1 || format == Format_DXT1a) { - return 8; - } - else if (format == Format_DXT3) { - return 16; - } - else if (format == Format_DXT5 || format == Format_DXT5n) { - return 16; - } - else if (format == Format_BC4) { - return 8; - } - else if (format == Format_BC5) { - return 16; - } - return 0; - } - - inline uint computePitch(uint w, uint bitsize) - { - uint p = w * ((bitsize + 7) / 8); - - // Align to 32 bits. - return ((p + 3) / 4) * 4; - } - - static int computeImageSize(uint w, uint h, uint d, uint bitCount, Format format) - { - if (format == Format_RGBA) { - return d * h * computePitch(w, bitCount); - } - else { - // @@ Handle 3D textures. DXT and VTC have different behaviors. - return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format); - } - } - -} // namespace - -namespace nvtt -{ - // Mipmap could be: - // - a pointer to an input image. - // - a fixed point image. - // - a floating point image. - struct Mipmap - { - Mipmap() : m_inputImage(NULL) {} - ~Mipmap() {} - - // Reference input image. - void setFromInput(const InputOptions::Private & inputOptions, uint idx) - { - m_inputImage = inputOptions.image(idx); - m_fixedImage = NULL; - m_floatImage = NULL; - } - - // Assign and take ownership of given image. - void setImage(FloatImage * image) - { - m_inputImage = NULL; - m_fixedImage = NULL; - m_floatImage = image; - } - - - // Convert linear float image to fixed image ready for compression. - void toFixedImage(const InputOptions::Private & inputOptions) - { - if (m_floatImage != NULL) // apfaffe - We should check that we have a float image, if so convert it! - { - if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f) - { - m_fixedImage = m_floatImage->createImage(); - } - else - { - m_fixedImage = m_floatImage->createImageGammaCorrect(inputOptions.outputGamma); - } - } - } - - // Convert input image to linear float image. - void toFloatImage(const InputOptions::Private & inputOptions) - { - if (m_floatImage == NULL) - { - nvDebugCheck(this->asFixedImage() != NULL); - - m_floatImage = new FloatImage(this->asFixedImage()); - - if (inputOptions.isNormalMap) - { - // Expand normals to [-1, 1] range. - // floatImage->expandNormals(0); - } - else if (inputOptions.inputGamma != 1.0f) - { - // Convert to linear space. - m_floatImage->toLinear(0, 3, inputOptions.inputGamma); - } - } - } - - const FloatImage * asFloatImage() const - { - return m_floatImage.ptr(); - } - - FloatImage * asFloatImage() - { - return m_floatImage.ptr(); - } - - const Image * asFixedImage() const - { - // - apfaffe - switched logic to return the 'processed image' rather than the input! - if (m_fixedImage != NULL && m_fixedImage.ptr() != NULL) - { - return m_fixedImage.ptr(); - } - return m_inputImage; - } - - Image * asMutableFixedImage() - { - if (m_inputImage != NULL) - { - // Do not modify input image, create a copy. - m_fixedImage = new Image(*m_inputImage); - m_inputImage = NULL; - } - return m_fixedImage.ptr(); - } - - - private: - const Image * m_inputImage; - AutoPtr m_fixedImage; - AutoPtr m_floatImage; - }; - -} // nvtt namespace - - -Compressor::Compressor() : m(*new Compressor::Private()) -{ - // CUDA initialization. - m.cudaSupported = cuda::isHardwarePresent(); - m.cudaEnabled = false; - m.cudaDevice = -1; - - enableCudaAcceleration(m.cudaSupported); -} - -Compressor::~Compressor() -{ - enableCudaAcceleration(false); - delete &m; -} - - -/// Enable CUDA acceleration. -void Compressor::enableCudaAcceleration(bool enable) -{ - if (m.cudaSupported) - { - if (m.cudaEnabled && !enable) - { - m.cudaEnabled = false; - m.cuda = NULL; - - if (m.cudaDevice != -1) - { - // Exit device. - cuda::exitDevice(); - } - } - else if (!m.cudaEnabled && enable) - { - // Init the CUDA device. This may return -1 if CUDA was already initialized by the app. - m.cudaEnabled = cuda::initDevice(&m.cudaDevice); - - if (m.cudaEnabled) - { - // Create compressor if initialization succeeds. - m.cuda = new CudaCompressor(); - - // But cleanup if failed. - if (!m.cuda->isValid()) - { - enableCudaAcceleration(false); - } - } - } - } -} - -/// Check if CUDA acceleration is enabled. -bool Compressor::isCudaAccelerationEnabled() const -{ - return m.cudaEnabled; -} - - -/// Compress the input texture with the given compression options. -bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const -{ - return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m); -} - - -/// Estimate the size of compressing the input with the given options. -int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const -{ - return m.estimateSize(inputOptions.m, compressionOptions.m); -} - - - - -bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const -{ - // Make sure enums match. - nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp); - nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror); - nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat); - - // Get output handler. - if (!outputOptions.openFile()) - { - if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen); - return false; - } - - inputOptions.computeTargetExtents(); - - // Output DDS header. - if (!outputHeader(inputOptions, compressionOptions, outputOptions)) - { - return false; - } - - for (uint f = 0; f < inputOptions.faceCount; f++) - { - if (!compressMipmaps(f, inputOptions, compressionOptions, outputOptions)) - { - return false; - } - } - - outputOptions.closeFile(); - - return true; -} - - -// Output DDS header. -bool Compressor::Private::outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const -{ - // Output DDS header. - if (outputOptions.outputHandler == NULL || !outputOptions.outputHeader) - { - return true; - } - - DDSHeader header; - - header.setWidth(inputOptions.targetWidth); - header.setHeight(inputOptions.targetHeight); - - int mipmapCount = inputOptions.realMipmapCount(); - nvDebugCheck(mipmapCount > 0); - - header.setMipmapCount(mipmapCount); - - if (inputOptions.textureType == TextureType_2D) { - header.setTexture2D(); - } - else if (inputOptions.textureType == TextureType_Cube) { - header.setTextureCube(); - } - /*else if (inputOptions.textureType == TextureType_3D) { - header.setTexture3D(); - header.setDepth(inputOptions.targetDepth); - }*/ - - if (compressionOptions.format == Format_RGBA) - { - header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount)); - header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask); - } - else - { - header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format)); - - if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) { - header.setFourCC('D', 'X', 'T', '1'); - if (inputOptions.isNormalMap) header.setNormalFlag(true); - } - else if (compressionOptions.format == Format_DXT3) { - header.setFourCC('D', 'X', 'T', '3'); - } - else if (compressionOptions.format == Format_DXT5) { - header.setFourCC('D', 'X', 'T', '5'); - } - else if (compressionOptions.format == Format_DXT5n) { - header.setFourCC('D', 'X', 'T', '5'); - if (inputOptions.isNormalMap) header.setNormalFlag(true); - } - else if (compressionOptions.format == Format_BC4) { - header.setFourCC('A', 'T', 'I', '1'); - } - else if (compressionOptions.format == Format_BC5) { - header.setFourCC('A', 'T', 'I', '2'); - if (inputOptions.isNormalMap) header.setNormalFlag(true); - } - } - - // Swap bytes if necessary. - header.swapBytes(); - - uint headerSize = 128; - if (header.hasDX10Header()) - { - nvStaticCheck(sizeof(DDSHeader) == 128 + 20); - headerSize = 128 + 20; - } - - bool writeSucceed = outputOptions.outputHandler->writeData(&header, headerSize); - if (!writeSucceed && outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_FileWrite); - } - - return writeSucceed; -} - - -bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const -{ - uint w = inputOptions.targetWidth; - uint h = inputOptions.targetHeight; - uint d = inputOptions.targetDepth; - - Mipmap mipmap; - - const uint mipmapCount = inputOptions.realMipmapCount(); - nvDebugCheck(mipmapCount > 0); - - for (uint m = 0; m < mipmapCount; m++) - { - if (outputOptions.outputHandler) - { - int size = computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.format); - outputOptions.outputHandler->beginImage(size, w, h, d, f, m); - } - - // @@ Where to do the color transform? - // - Color transform may not be linear, so we cannot do before computing mipmaps. - // - Should be done in linear space, that is, after gamma correction. - - if (!initMipmap(mipmap, inputOptions, w, h, d, f, m)) - { - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_InvalidInput); - return false; - } - } - - quantizeMipmap(mipmap, compressionOptions); - - compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions); - - // Compute extents of next mipmap: - w = max(1U, w / 2); - h = max(1U, h / 2); - d = max(1U, d / 2); - } - - return true; -} - -bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const -{ - // Find image from input. - int inputIdx = findExactMipmap(inputOptions, w, h, d, f); - - if ((inputIdx == -1 || inputOptions.convertToNormalMap) && m != 0) - { - // Generate from last, when mipmap not found, or normal map conversion enabled. - downsampleMipmap(mipmap, inputOptions); - } - else - { - if (inputIdx != -1) - { - // If input mipmap found, then get from input. - mipmap.setFromInput(inputOptions, inputIdx); - } - else - { - // If not found, resize closest mipmap. - inputIdx = findClosestMipmap(inputOptions, w, h, d, f); - - if (inputIdx == -1) - { - return false; - } - - mipmap.setFromInput(inputOptions, inputIdx); - - scaleMipmap(mipmap, inputOptions, w, h, d); - } - - processInputImage(mipmap, inputOptions); - } - - // Convert linear float image to fixed image ready for compression. - mipmap.toFixedImage(inputOptions); - - return true; -} - -int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const -{ - for (int m = 0; m < int(inputOptions.mipmapCount); m++) - { - int idx = f * inputOptions.mipmapCount + m; - const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx]; - - if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d)) - { - if (inputImage.data != NULL) - { - return idx; - } - return -1; - } - else if (inputImage.width < int(w) || inputImage.height < int(h) || inputImage.depth < int(d)) - { - return -1; - } - } - - return -1; -} - -int Compressor::Private::findClosestMipmap(const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f) const -{ - int bestIdx = -1; - - for (int m = 0; m < int(inputOptions.mipmapCount); m++) - { - int idx = f * inputOptions.mipmapCount + m; - const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx]; - - if (inputImage.data != NULL) - { - int difference = (inputImage.width - w) + (inputImage.height - h) + (inputImage.depth - d); - - if (difference < 0) - { - if (bestIdx == -1) - { - bestIdx = idx; - } - - return bestIdx; - } - - bestIdx = idx; - } - } - - return bestIdx; -} - -// Create mipmap from the given image. -void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const -{ - // Make sure that floating point linear representation is available. - mipmap.toFloatImage(inputOptions); - - const FloatImage * floatImage = mipmap.asFloatImage(); - - if (inputOptions.mipmapFilter == MipmapFilter_Box) - { - // Use fast downsample. - mipmap.setImage(floatImage->fastDownSample()); - } - else if (inputOptions.mipmapFilter == MipmapFilter_Triangle) - { - TriangleFilter filter; - mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode)); - } - else /*if (inputOptions.mipmapFilter == MipmapFilter_Kaiser)*/ - { - nvDebugCheck(inputOptions.mipmapFilter == MipmapFilter_Kaiser); - KaiserFilter filter(inputOptions.kaiserWidth); - filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch); - mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode)); - } - - // Normalize mipmap. - if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps) - { - normalizeNormalMap(mipmap.asFloatImage()); - } -} - - -void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const -{ - mipmap.toFloatImage(inputOptions); - - // @@ Add more filters. - // @@ Select different filters for downscaling and reconstruction. - - // Resize image. - BoxFilter boxFilter; - mipmap.setImage(mipmap.asFloatImage()->resize(boxFilter, w, h, (FloatImage::WrapMode)inputOptions.wrapMode)); -} - - -// Process an input image: Convert to normal map, normalize, or convert to linear space. -void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const -{ - if (inputOptions.convertToNormalMap) - { - mipmap.toFixedImage(inputOptions); - - Vector4 heightScale = inputOptions.heightFactors; - mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale)); - } - else if (inputOptions.isNormalMap) - { - if (inputOptions.normalizeMipmaps) - { - // If floating point image available, normalize in place. - if (mipmap.asFloatImage() == NULL) - { - FloatImage * floatImage = new FloatImage(mipmap.asFixedImage()); - normalizeNormalMap(floatImage); - mipmap.setImage(floatImage); - } - else - { - normalizeNormalMap(mipmap.asFloatImage()); - mipmap.setImage(mipmap.asFloatImage()); - } - } - } - else - { - if (inputOptions.inputGamma != inputOptions.outputGamma) - { - mipmap.toFloatImage(inputOptions); - } - } -} - - -// Quantize the given mipmap according to the compression options. -void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const -{ - nvDebugCheck(mipmap.asFixedImage() != NULL); - - if (compressionOptions.binaryAlpha) - { - if (compressionOptions.enableAlphaDithering) - { - Quantize::FloydSteinberg_BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold); - } - else - { - Quantize::BinaryAlpha(mipmap.asMutableFixedImage(), compressionOptions.alphaThreshold); - } - } - - if (compressionOptions.enableColorDithering || compressionOptions.enableAlphaDithering) - { - uint rsize = 8; - uint gsize = 8; - uint bsize = 8; - uint asize = 8; - - if (compressionOptions.enableColorDithering) - { - if (compressionOptions.format >= Format_DXT1 && compressionOptions.format <= Format_DXT5) - { - rsize = 5; - gsize = 6; - bsize = 5; - } - else if (compressionOptions.format == Format_RGB) - { - uint rshift, gshift, bshift; - PixelFormat::maskShiftAndSize(compressionOptions.rmask, &rshift, &rsize); - PixelFormat::maskShiftAndSize(compressionOptions.gmask, &gshift, &gsize); - PixelFormat::maskShiftAndSize(compressionOptions.bmask, &bshift, &bsize); - } - } - - if (compressionOptions.enableAlphaDithering) - { - if (compressionOptions.format == Format_DXT3) - { - asize = 4; - } - else if (compressionOptions.format == Format_RGB) - { - uint ashift; - PixelFormat::maskShiftAndSize(compressionOptions.amask, &ashift, &asize); - } - } - - if (compressionOptions.binaryAlpha) - { - asize = 8; // Already quantized. - } - - Quantize::FloydSteinberg(mipmap.asMutableFixedImage(), rsize, gsize, bsize, asize); - } -} - - -// Compress the given mipmap. -bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const -{ - const Image * image = mipmap.asFixedImage(); - nvDebugCheck(image != NULL); - - FastCompressor fast; - fast.setImage(image, inputOptions.alphaMode); - - SlowCompressor slow; - slow.setImage(image, inputOptions.alphaMode); - - const bool useCuda = cudaEnabled && image->width() * image->height() >= 512; - - if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB) - { - compressRGB(image, outputOptions, compressionOptions); - } - else if (compressionOptions.format == Format_DXT1) - { -#if defined(HAVE_S3QUANT) - if (compressionOptions.externalCompressor == "s3") - { - s3CompressDXT1(image, outputOptions); - } - else -#endif - -#if defined(HAVE_ATITC) - if (compressionOptions.externalCompressor == "ati") - { - atiCompressDXT1(image, outputOptions); - } - else -#endif - if (compressionOptions.quality == Quality_Fastest) - { - fast.compressDXT1(outputOptions); - } - else - { - if (useCuda) - { - nvDebugCheck(cudaSupported); - cuda->setImage(image, inputOptions.alphaMode); - cuda->compressDXT1(compressionOptions, outputOptions); - } - else - { - slow.compressDXT1(compressionOptions, outputOptions); - } - } - } - else if (compressionOptions.format == Format_DXT1a) - { - if (compressionOptions.quality == Quality_Fastest) - { - fast.compressDXT1a(outputOptions); - } - else - { - if (useCuda) - { - nvDebugCheck(cudaSupported); - /*cuda*/slow.compressDXT1a(compressionOptions, outputOptions); - } - else - { - slow.compressDXT1a(compressionOptions, outputOptions); - } - } - } - else if (compressionOptions.format == Format_DXT3) - { - if (compressionOptions.quality == Quality_Fastest) - { - fast.compressDXT3(outputOptions); - } - else - { - if (useCuda) - { - nvDebugCheck(cudaSupported); - cuda->setImage(image, inputOptions.alphaMode); - cuda->compressDXT3(compressionOptions, outputOptions); - } - else - { - slow.compressDXT3(compressionOptions, outputOptions); - } - } - } - else if (compressionOptions.format == Format_DXT5) - { - if (compressionOptions.quality == Quality_Fastest) - { - fast.compressDXT5(outputOptions); - } - else - { - if (useCuda) - { - nvDebugCheck(cudaSupported); - cuda->setImage(image, inputOptions.alphaMode); - cuda->compressDXT5(compressionOptions, outputOptions); - } - else - { - slow.compressDXT5(compressionOptions, outputOptions); - } - } - } - else if (compressionOptions.format == Format_DXT5n) - { - if (compressionOptions.quality == Quality_Fastest) - { - fast.compressDXT5n(outputOptions); - } - else - { - slow.compressDXT5n(compressionOptions, outputOptions); - } - } - else if (compressionOptions.format == Format_BC4) - { - slow.compressBC4(compressionOptions, outputOptions); - } - else if (compressionOptions.format == Format_BC5) - { - slow.compressBC5(compressionOptions, outputOptions); - } - - return true; -} - - -int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const -{ - const Format format = compressionOptions.format; - const uint bitCount = compressionOptions.bitcount; - - inputOptions.computeTargetExtents(); - - uint mipmapCount = inputOptions.realMipmapCount(); - - int size = 0; - - for (uint f = 0; f < inputOptions.faceCount; f++) - { - uint w = inputOptions.targetWidth; - uint h = inputOptions.targetHeight; - uint d = inputOptions.targetDepth; - - for (uint m = 0; m < mipmapCount; m++) - { - size += computeImageSize(w, h, d, bitCount, format); - - // Compute extents of next mipmap: - w = max(1U, w / 2); - h = max(1U, h / 2); - d = max(1U, d / 2); - } - } - - return size; -} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX10.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX10.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX10.h @@ -0,0 +1,71 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_COMPRESSORDX10_H +#define NVTT_COMPRESSORDX10_H + +#include "BlockCompressor.h" + +namespace nv +{ + struct ColorBlock; + + // Fast CPU compressors. + struct FastCompressorBC4 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; + + struct FastCompressorBC5 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + + // Production CPU compressors. + struct ProductionCompressorBC4 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; + + struct ProductionCompressorBC5 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + /*struct ProductionCompressorBC5_Luma : public ColorSetCompressor + { + virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + };*/ + + +} // nv namespace + + +#endif // NVTT_COMPRESSORDX10_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX10.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX10.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX10.cpp @@ -0,0 +1,122 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "CompressorDX10.h" +#include "QuickCompressDXT.h" +#include "OptimalCompressDXT.h" + +#include "nvtt.h" + +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" + +#include "nvmath/ftoi.h" + +#include // placement new + +using namespace nv; +using namespace nvtt; + + +void FastCompressorBC4::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockATI1 * block = new(output) BlockATI1; + + AlphaBlock4x4 tmp; + tmp.init(src, 0); // Copy red to alpha + QuickCompress::compressDXT5A(tmp, &block->alpha); +} + +void FastCompressorBC5::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockATI2 * block = new(output) BlockATI2; + + AlphaBlock4x4 tmp; + + tmp.init(src, 0); // Copy red to alpha + QuickCompress::compressDXT5A(tmp, &block->x); + + tmp.init(src, 1); // Copy green to alpha + QuickCompress::compressDXT5A(tmp, &block->y); +} + + +void ProductionCompressorBC4::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockATI1 * block = new(output) BlockATI1; + + AlphaBlock4x4 tmp; + tmp.init(src, 0); // Copy red to alpha + OptimalCompress::compressDXT5A(tmp, &block->alpha); +} + +void ProductionCompressorBC5::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockATI2 * block = new(output) BlockATI2; + + AlphaBlock4x4 tmp; + + tmp.init(src, 0); // Copy red to alpha + OptimalCompress::compressDXT5A(tmp, &block->x); + + tmp.init(src, 1); // Copy green to alpha + OptimalCompress::compressDXT5A(tmp, &block->y); +} + + +#if 0 +void ProductionCompressorBC5_Luma::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockATI2 * block = new(output) BlockATI2; + + AlphaBlock4x4 tmp; + tmp.init(set, /*channel=*/0); + OptimalCompress::compressDXT5A(tmp, &block->x); + + // Decode block->x + AlphaBlock4x4 decoded; + block->x.decodeBlock(&decoded); + + const float R = 1.0f / 256.0f; // Maximum residual that we can represent. @@ Tweak this. + + // Compute residual block. + for (int i = 0; i < 16; i++) { + float in = set.color(i).x; // [0,1] + float out = float(decoded.alpha[i]) / 255.0f; // [0,1] + + float residual = (out - in); // [-1,1], but usually [-R,R] + + // Normalize residual to [-1,1] range. + residual /= R; + + // Pack in [0,1] range. + residual = residual * 0.5f + 0.5f; + + tmp.alpha[i] = nv::ftoi_round(nv::saturate(residual) * 255.0f); + } + + OptimalCompress::compressDXT5A(tmp, &block->y); + +} +#endif // 0 \ No newline at end of file Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX11.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX11.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX11.h @@ -0,0 +1,46 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_COMPRESSORDX11_H +#define NVTT_COMPRESSORDX11_H + +#include "BlockCompressor.h" + +namespace nv +{ + struct CompressorBC6 : public FloatColorCompressor + { + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + struct CompressorBC7 : public FloatColorCompressor + { + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + +} // nv namespace + + +#endif // NVTT_COMPRESSORDX11_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX11.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX11.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX11.cpp @@ -0,0 +1,102 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "CompressorDX11.h" + +#include "nvtt.h" +#include "CompressionOptions.h" +#include "nvimage/ColorBlock.h" +#include "nvmath/Half.h" +#include "nvmath/Vector.inl" + +#include "bc6h/zoh.h" +#include "bc7/avpcl.h" + +#include // memset + +using namespace nv; +using namespace nvtt; + + +void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + // !!!UNDONE: support channel weights + // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) + + if (compressionOptions.pixelType == PixelType_UnsignedFloat || + compressionOptions.pixelType == PixelType_UnsignedNorm || + compressionOptions.pixelType == PixelType_UnsignedInt) + { + ZOH::Utils::FORMAT = ZOH::UNSIGNED_F16; + } + else + { + ZOH::Utils::FORMAT = ZOH::SIGNED_F16; + } + + // Convert NVTT's tile struct to ZOH's, and convert float to half. + ZOH::Tile zohTile(4, 4); + memset(zohTile.data, 0, sizeof(zohTile.data)); + memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map)); + for (uint y = 0; y < 4; ++y) + { + for (uint x = 0; x < 4; ++x) + { + Vector4 color = colors[4*y+x]; + uint16 rHalf = to_half(color.x); + uint16 gHalf = to_half(color.y); + uint16 bHalf = to_half(color.z); + zohTile.data[y][x].x = ZOH::Tile::half2float(rHalf); + zohTile.data[y][x].y = ZOH::Tile::half2float(gHalf); + zohTile.data[y][x].z = ZOH::Tile::half2float(bHalf); + zohTile.importance_map[y][x] = weights[4*y+x]; + } + } + + ZOH::compress(zohTile, (char *)output); +} + +void CompressorBC7::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + // !!!UNDONE: support channel weights + // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) + + AVPCL::mode_rgb = false; + AVPCL::flag_premult = false; //(alphaMode == AlphaMode_Premultiplied); + AVPCL::flag_nonuniform = false; + AVPCL::flag_nonuniform_ati = false; + + // Convert NVTT's tile struct to AVPCL's. + AVPCL::Tile avpclTile(4, 4); + memset(avpclTile.data, 0, sizeof(avpclTile.data)); + for (uint y = 0; y < 4; ++y) { + for (uint x = 0; x < 4; ++x) { + Vector4 color = colors[4*y+x]; + avpclTile.data[y][x] = color * 255.0f; + avpclTile.importance_map[y][x] = 1.0f; //weights[4*y+x]; + } + } + + AVPCL::compress(avpclTile, (char *)output); +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX9.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX9.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX9.h @@ -0,0 +1,156 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_COMPRESSORDX9_H +#define NVTT_COMPRESSORDX9_H + +#include "BlockCompressor.h" + +namespace nv +{ + struct ColorBlock; + + // Fast CPU compressors. + struct FastCompressorDXT1 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; + + struct FastCompressorDXT1a : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; + + struct FastCompressorDXT3 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + struct FastCompressorDXT5 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + struct FastCompressorDXT5n : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + + // Normal CPU compressors. +#if 1 + struct CompressorDXT1 : public FloatColorCompressor + { + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; +#else + struct CompressorDXT1 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; +#endif + + struct CompressorDXT1a : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; + + struct CompressorDXT1_Luma : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; + + struct CompressorDXT3 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + struct CompressorDXT5 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + struct CompressorDXT5n : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + struct CompressorBC3_RGBM : public FloatColorCompressor + { + virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; } + }; + + + // External compressors. +#if defined(HAVE_ATITC) + struct AtiCompressorDXT1 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; + + struct AtiCompressorDXT5 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; +#endif + +#if defined(HAVE_SQUISH) + struct SquishCompressorDXT1 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; +#endif + +#if defined(HAVE_D3DX) + struct D3DXCompressorDXT1 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; +#endif + +#if defined(HAVE_STB) + struct StbCompressorDXT1 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; +#endif + +} // nv namespace + + +#endif // NVTT_COMPRESSORDX9_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX9.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX9.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDX9.cpp @@ -0,0 +1,499 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "CompressorDX9.h" +#include "QuickCompressDXT.h" +#include "OptimalCompressDXT.h" +#include "CompressionOptions.h" +#include "OutputOptions.h" +#include "ClusterFit.h" +#include "CompressorDXT1.h" +#include "CompressorDXT5_RGBM.h" + +// squish +#include "squish/colourset.h" +#include "squish/weightedclusterfit.h" + +#include "nvtt.h" + +#include "nvimage/Image.h" +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" + +#include "nvmath/Vector.inl" +#include "nvmath/Color.inl" + +#include "nvcore/Memory.h" + +#include // placement new + +// s3_quant +#if defined(HAVE_S3QUANT) +#include "s3tc/s3_quant.h" +#endif + +// ati tc +#if defined(HAVE_ATITC) +typedef int BOOL; +typedef _W64 unsigned long ULONG_PTR; +typedef ULONG_PTR DWORD_PTR; +#include "atitc/ATI_Compress.h" +#endif + +// squish +#if defined(HAVE_SQUISH) +//#include "squish/squish.h" +#include "squish-1.10/squish.h" +#endif + +// d3dx +#if defined(HAVE_D3DX) +#include +#endif + +// stb +#if defined(HAVE_STB) +#define STB_DEFINE +#include "stb/stb_dxt.h" +#endif + +using namespace nv; +using namespace nvtt; + + +void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT1 * block = new(output) BlockDXT1; + QuickCompress::compressDXT1(rgba, block); +} + +void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT1 * block = new(output) BlockDXT1; + QuickCompress::compressDXT1a(rgba, block); +} + +void FastCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT3 * block = new(output) BlockDXT3; + QuickCompress::compressDXT3(rgba, block); +} + +void FastCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT5 * block = new(output) BlockDXT5; + QuickCompress::compressDXT5(rgba, block); +} + +void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R + + BlockDXT5 * block = new(output) BlockDXT5; + QuickCompress::compressDXT5(rgba, block); +} + + +#if 1 + +void CompressorDXT1::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output); +} + +#else +void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + if (rgba.isSingleColor()) + { + BlockDXT1 * block = new(output) BlockDXT1; + OptimalCompress::compressDXT1(rgba.color(0), block); + } + else + { + nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0); + fit.SetColourSet(&colours, nvsquish::kDxt1); + fit.Compress(output); + } +} +#endif + +void CompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + uint alphaMask = 0; + for (uint i = 0; i < 16; i++) + { + if (rgba.color(i).a == 0) alphaMask |= (3 << (i * 2)); // Set two bits for each color. + } + + const bool isSingleColor = rgba.isSingleColor(); + + if (isSingleColor) + { + BlockDXT1 * block = new(output) BlockDXT1; + OptimalCompress::compressDXT1a(rgba.color(0), alphaMask, block); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = nvsquish::kDxt1; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, nvsquish::kDxt1); + + fit.Compress(output); + } +} + +void CompressorDXT1_Luma::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT1 * block = new(output) BlockDXT1; + OptimalCompress::compressDXT1_Luma(rgba, block); +} + +void CompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT3 * block = new(output) BlockDXT3; + + // Compress explicit alpha. + OptimalCompress::compressDXT3A(rgba, &block->alpha); + + // Compress color. + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block->color); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + } +} + +void CompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT5 * block = new(output) BlockDXT5; + + // Compress alpha. + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT5A(rgba, &block->alpha); + } + else + { + QuickCompress::compressDXT5A(rgba, &block->alpha); + } + + // Compress color. + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block->color); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + } +} + + +void CompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + BlockDXT5 * block = new(output) BlockDXT5; + + // Compress Y. + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT1G(rgba, &block->color); + } + else + { + if (rgba.isSingleColor(Color32(0, 0xFF, 0, 0))) // Mask all but green channel. + { + OptimalCompress::compressDXT1G(rgba.color(0).g, &block->color); + } + else + { + ColorBlock tile = rgba; + tile.swizzle(4, 1, 5, 3); // leave alpha in alpha channel. + + nvsquish::WeightedClusterFit fit; + fit.SetMetric(0, 1, 0); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)tile.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + } + } + + rgba.swizzle(4, 1, 5, 0); // 1, G, 0, R + + // Compress X. + if (compressionOptions.quality == Quality_Highest) + { + OptimalCompress::compressDXT5A(rgba, &block->alpha); + } + else + { + QuickCompress::compressDXT5A(rgba, &block->alpha); + } +} + + +void CompressorBC3_RGBM::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + float min_m = 0.25f; // @@ Get from compression options. + compress_dxt5_rgbm(colors, weights, min_m, (BlockDXT5 *)output); +} + + +#if defined(HAVE_ATITC) + +void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + + // Init source texture + ATI_TC_Texture srcTexture; + srcTexture.dwSize = sizeof(srcTexture); + srcTexture.dwWidth = w; + srcTexture.dwHeight = h; + if (inputFormat == nvtt::InputFormat_BGRA_8UB) + { + srcTexture.dwPitch = w * 4; + srcTexture.format = ATI_TC_FORMAT_ARGB_8888; + } + else + { + // @@ Floating point input is not swizzled. + srcTexture.dwPitch = w * 16; + srcTexture.format = ATI_TC_FORMAT_ARGB_32F; + } + srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); + srcTexture.pData = (ATI_TC_BYTE*) data; + + // Init dest texture + ATI_TC_Texture destTexture; + destTexture.dwSize = sizeof(destTexture); + destTexture.dwWidth = w; + destTexture.dwHeight = h; + destTexture.dwPitch = 0; + destTexture.format = ATI_TC_FORMAT_DXT1; + destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); + destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); + + ATI_TC_CompressOptions options; + options.dwSize = sizeof(options); + options.bUseChannelWeighting = false; + options.bUseAdaptiveWeighting = false; + options.bDXT1UseAlpha = false; + options.nCompressionSpeed = ATI_TC_Speed_Normal; + options.bDisableMultiThreading = false; + //options.bDisableMultiThreading = true; + + // Compress + ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); + } + + mem::free(destTexture.pData); +} + +void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + + // Init source texture + ATI_TC_Texture srcTexture; + srcTexture.dwSize = sizeof(srcTexture); + srcTexture.dwWidth = w; + srcTexture.dwHeight = h; + if (inputFormat == nvtt::InputFormat_BGRA_8UB) + { + srcTexture.dwPitch = w * 4; + srcTexture.format = ATI_TC_FORMAT_ARGB_8888; + } + else + { + srcTexture.dwPitch = w * 16; + srcTexture.format = ATI_TC_FORMAT_ARGB_32F; + } + srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); + srcTexture.pData = (ATI_TC_BYTE*) data; + + // Init dest texture + ATI_TC_Texture destTexture; + destTexture.dwSize = sizeof(destTexture); + destTexture.dwWidth = w; + destTexture.dwHeight = h; + destTexture.dwPitch = 0; + destTexture.format = ATI_TC_FORMAT_DXT5; + destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); + destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); + + // Compress + ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); + } + + mem::free(destTexture.pData); +} + +#endif // defined(HAVE_ATITC) + +#if defined(HAVE_SQUISH) + +void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + nvDebugCheck(false); + +#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB") + /* + Image img(*image); + int count = img.width() * img.height(); + for (int i = 0; i < count; i++) + { + Color32 c = img.pixel(i); + img.pixel(i) = Color32(c.b, c.g, c.r, c.a); + } + + int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1); + void * blocks = mem::malloc(size); + + squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(blocks, size); + } + + mem::free(blocks); + */ +} + +#endif // defined(HAVE_SQUISH) + + +#if defined(HAVE_D3DX) + +void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + + IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION); + + D3DPRESENT_PARAMETERS presentParams; + ZeroMemory(&presentParams, sizeof(presentParams)); + presentParams.Windowed = TRUE; + presentParams.SwapEffect = D3DSWAPEFFECT_COPY; + presentParams.BackBufferWidth = 8; + presentParams.BackBufferHeight = 8; + presentParams.BackBufferFormat = D3DFMT_UNKNOWN; + + HRESULT err; + + IDirect3DDevice9 * device = NULL; + err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device); + + IDirect3DTexture9 * texture = NULL; + err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture); + + IDirect3DSurface9 * surface = NULL; + err = texture->GetSurfaceLevel(0, &surface); + + RECT rect; + rect.left = 0; + rect.top = 0; + rect.bottom = h; + rect.right = w; + + if (inputFormat == nvtt::InputFormat_BGRA_8UB) + { + err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0); + } + else + { + err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0); + } + + if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA) + { + D3DLOCKED_RECT rect; + ZeroMemory(&rect, sizeof(rect)); + + err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY); + + if (outputOptions.outputHandler != NULL) { + int size = rect.Pitch * ((h + 3) / 4); + outputOptions.outputHandler->writeData(rect.pBits, size); + } + + err = surface->UnlockRect(); + } + + surface->Release(); + device->Release(); + d3d->Release(); +} + +#endif // defined(HAVE_D3DX) + + +#if defined(HAVE_STB) + +void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + rgba.swizzle(2, 1, 0, 3); // Swap R and B + stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0); +} + + +#endif // defined(HAVE_STB) Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT1.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT1.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT1.h @@ -0,0 +1,23 @@ + +namespace nv { + + class Color32; + struct ColorBlock; + struct BlockDXT1; + class Vector3; + class Vector4; + + // All these functions return MSE. + + float compress_dxt1_single_color_optimal(Color32 c, BlockDXT1 * output); + float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output); + + float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); + float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); + float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output); + void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); + + + float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); + +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT1.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT1.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT1.cpp @@ -0,0 +1,799 @@ + +#include "CompressorDXT1.h" +#include "SingleColorLookup.h" +#include "ClusterFit.h" + +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" + +#include "nvmath/Color.inl" +#include "nvmath/Vector.inl" +#include "nvmath/Fitting.h" +#include "nvmath/ftoi.h" + +#include "nvcore/Utils.h" // swap + +#include // memset +#include // FLT_MAX + + +using namespace nv; + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Color conversion functions. + +static const float midpoints5[32] = { + 0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f, 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f, + 0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f, 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f +}; + +static const float midpoints6[64] = { + 0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f, 0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f, + 0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f, 0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f, + 0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f, 0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f, + 0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f, 0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f +}; + +/*void init_tables() { + for (int i = 0; i < 31; i++) { + float f0 = float(((i+0) << 3) | ((i+0) >> 2)) / 255.0f; + float f1 = float(((i+1) << 3) | ((i+1) >> 2)) / 255.0f; + midpoints5[i] = (f0 + f1) * 0.5; + } + midpoints5[31] = 1.0f; + + for (int i = 0; i < 63; i++) { + float f0 = float(((i+0) << 2) | ((i+0) >> 4)) / 255.0f; + float f1 = float(((i+1) << 2) | ((i+1) >> 4)) / 255.0f; + midpoints6[i] = (f0 + f1) * 0.5; + } + midpoints6[63] = 1.0f; +}*/ + +static Color16 vector3_to_color16(const Vector3 & v) { + // Truncate. + uint r = ftoi_trunc(clamp(v.x * 31.0f, 0.0f, 31.0f)); + uint g = ftoi_trunc(clamp(v.y * 63.0f, 0.0f, 63.0f)); + uint b = ftoi_trunc(clamp(v.z * 31.0f, 0.0f, 31.0f)); + + // Round exactly according to 565 bit-expansion. + r += (v.x > midpoints5[r]); + g += (v.y > midpoints6[g]); + b += (v.z > midpoints5[b]); + + return Color16((r << 11) | (g << 5) | b); +} + + +static Color32 bitexpand_color16_to_color32(Color16 c16) { + Color32 c32; + //c32.b = (c16.b << 3) | (c16.b >> 2); + //c32.g = (c16.g << 2) | (c16.g >> 4); + //c32.r = (c16.r << 3) | (c16.r >> 2); + //c32.a = 0xFF; + + c32.u = ((c16.u << 3) & 0xf8) | ((c16.u << 5) & 0xfc00) | ((c16.u << 8) & 0xf80000); + c32.u |= (c32.u >> 5) & 0x070007; + c32.u |= (c32.u >> 6) & 0x000300; + + return c32; +} + +/*static Color32 bitexpand_color16_to_color32(int r, int g, int b) { + Color32 c32; + c32.b = (b << 3) | (b >> 2); + c32.g = (g << 2) | (g >> 4); + c32.r = (r << 3) | (r >> 2); + c32.a = 0xFF; + return c32; +}*/ + +static Color16 truncate_color32_to_color16(Color32 c32) { + Color16 c16; + c16.b = (c32.b >> 3); + c16.g = (c32.g >> 2); + c16.r = (c32.r >> 3); + return c16; +} + +/*inline Vector3 r5g6b5_to_vector3(int r, int g, int b) +{ + Vector3 c; + c.x = float((r << 3) | (r >> 2)); + c.y = float((g << 2) | (g >> 4)); + c.z = float((b << 3) | (b >> 2)); + return c; +}*/ + +inline Vector3 color_to_vector3(Color32 c) +{ + const float scale = 1.0f / 255.0f; + return Vector3(c.r * scale, c.g * scale, c.b * scale); +} + +inline Color32 vector3_to_color(Vector3 v) +{ + Color32 color; + color.r = U8(ftoi_round(saturate(v.x) * 255)); + color.g = U8(ftoi_round(saturate(v.y) * 255)); + color.b = U8(ftoi_round(saturate(v.z) * 255)); + color.a = 255; + return color; +} + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Input block processing. + +inline static void color_block_to_vector_block(const ColorBlock & rgba, Vector3 block[16]) +{ + for (int i = 0; i < 16; i++) + { + const Color32 c = rgba.color(i); + block[i] = Vector3(c.r, c.g, c.b); + } +} + +// Find first valid color. +static bool find_valid_color_rgb(const Vector3 * colors, const float * weights, int count, Vector3 * valid_color) +{ + for (int i = 0; i < count; i++) { + if (weights[i] > 0.0f) { + *valid_color = colors[i]; + return true; + } + } + + // No valid colors. + return false; +} + +static bool is_single_color_rgb(const Vector3 * colors, const float * weights, int count, Vector3 color) +{ + for (int i = 0; i < count; i++) { + if (weights[i] > 0.0f) { + if (colors[i] != color) return false; + } + } + + return true; +} + +// Find similar colors and combine them together. +static int reduce_colors(const Vector4 * input_colors, const float * input_weights, Vector3 * colors, float * weights) +{ + int n = 0; + for (int i = 0; i < 16; i++) + { + Vector3 ci = input_colors[i].xyz(); + float wi = input_weights[i]; + + if (wi > 0) { + // Find matching color. + int j; + for (j = 0; j < n; j++) { + if (equal(colors[j].x, ci.x) && equal(colors[j].y, ci.y) && equal(colors[j].z, ci.z)) { + weights[j] += wi; + break; + } + } + + // No match found. Add new color. + if (j == n) { + colors[n] = ci; + weights[n] = wi; + n++; + } + } + } + + nvDebugCheck(n <= 16); + + return n; +} + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Error evaluation. + +// Different ways of estimating the error. +/*static float evaluate_mse(const Vector3 & p, const Vector3 & c) { + //return (square(p.x-c.x) * w2.x + square(p.y-c.y) * w2.y + square(p.z-c.z) * w2.z); + Vector3 d = (p - c); + return dot(d, d); +}*/ + +static float evaluate_mse(const Vector3 & p, const Vector3 & c, const Vector3 & w) { + //return (square(p.x-c.x) * w2.x + square(p.y-c.y) * w2.y + square(p.z-c.z) * w2.z); + Vector3 d = (p - c) * w; + return dot(d, d); +} + +/*static float evaluate_mse(const Vector3 & p, const Vector3 & c, const Vector3 & w) { + return ww.x * square(p.x-c.x) + ww.y * square(p.y-c.y) + ww.z * square(p.z-c.z); +}*/ + +static int evaluate_mse(const Color32 & p, const Color32 & c) { + return (square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b)); +} + +static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) { + float e0 = evaluate_mse(palette[0], c, w); + float e1 = evaluate_mse(palette[1], c, w); + float e2 = evaluate_mse(palette[2], c, w); + float e3 = evaluate_mse(palette[3], c, w); + return min(min(e0, e1), min(e2, e3)); +} + +static int evaluate_mse(const Color32 palette[4], const Color32 & c) { + int e0 = evaluate_mse(palette[0], c); + int e1 = evaluate_mse(palette[1], c); + int e2 = evaluate_mse(palette[2], c); + int e3 = evaluate_mse(palette[3], c); + return min(min(e0, e1), min(e2, e3)); +} + +// Returns MSE error in [0-255] range. +static int evaluate_mse(const BlockDXT1 * output, Color32 color, int index) { + Color32 palette[4]; + output->evaluatePalette(palette, /*d3d9=*/false); + + return evaluate_mse(palette[index], color); +} + +// Returns weighted MSE error in [0-255] range. +static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) { + + float total = 0.0f; + for (int i = 0; i < count; i++) { + total += weights[i] * evaluate_mse(palette, colors[i]); + } + + return total; +} + +#if 0 +static float evaluate_mse(const BlockDXT1 * output, const Vector3 colors[16]) { + Color32 palette[4]; + output->evaluatePalette(palette, /*d3d9=*/false); + + // convert palette to float. + Vector3 vector_palette[4]; + for (int i = 0; i < 4; i++) { + vector_palette[i] = color_to_vector3(palette[i]); + } + + // evaluate error for each index. + float error = 0.0f; + for (int i = 0; i < 16; i++) { + int index = (output->indices >> (2*i)) & 3; // @@ Is this the right order? + error += evaluate_mse(vector_palette[index], colors[i]); + } + + return error; +} +#endif + +static float evaluate_mse(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, const BlockDXT1 * output) { + Color32 palette[4]; + output->evaluatePalette(palette, /*d3d9=*/false); + + // convert palette to float. + Vector3 vector_palette[4]; + for (int i = 0; i < 4; i++) { + vector_palette[i] = color_to_vector3(palette[i]); + } + + // evaluate error for each index. + float error = 0.0f; + for (int i = 0; i < 16; i++) { + int index = (output->indices >> (2 * i)) & 3; + error += input_weights[i] * evaluate_mse(vector_palette[index], input_colors[i].xyz(), color_weights); + } + return error; +} + + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Palette evaluation. + +static void evaluate_palette4(Color32 palette[4]) { + palette[2].r = (2 * palette[0].r + palette[1].r) / 3; + palette[2].g = (2 * palette[0].g + palette[1].g) / 3; + palette[2].b = (2 * palette[0].b + palette[1].b) / 3; + palette[3].r = (2 * palette[1].r + palette[0].r) / 3; + palette[3].g = (2 * palette[1].g + palette[0].g) / 3; + palette[3].b = (2 * palette[1].b + palette[0].b) / 3; +} + +static void evaluate_palette3(Color32 palette[4]) { + palette[2].r = (palette[0].r + palette[1].r) / 2; + palette[2].g = (palette[0].g + palette[1].g) / 2; + palette[2].b = (palette[0].b + palette[1].b) / 2; + palette[3].r = 0; + palette[3].g = 0; + palette[3].b = 0; +} + +static void evaluate_palette(Color16 c0, Color16 c1, Color32 palette[4]) { + palette[0] = bitexpand_color16_to_color32(c0); + palette[1] = bitexpand_color16_to_color32(c1); + if (c0.u > c1.u) { + evaluate_palette4(palette); + } + else { + evaluate_palette3(palette); + } +} + +static void evaluate_palette(Color16 c0, Color16 c1, Vector3 palette[4]) { + Color32 palette32[4]; + evaluate_palette(c0, c1, palette32); + + for (int i = 0; i < 4; i++) { + palette[i] = color_to_vector3(palette32[i]); + } +} + +static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) { + nvDebugCheck(c0.u > c1.u); + + Color32 palette32[4]; + evaluate_palette(c0, c1, palette32); + + for (int i = 0; i < 4; i++) { + palette[i] = color_to_vector3(palette32[i]); + } +} + + + + + +static uint compute_indices4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { + + uint indices = 0; + for (int i = 0; i < 16; i++) { + float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); + float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); + float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); + float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); + + uint b0 = d0 > d3; + uint b1 = d1 > d2; + uint b2 = d0 > d2; + uint b3 = d1 > d3; + uint b4 = d2 > d3; + + uint x0 = b1 & b2; + uint x1 = b0 & b3; + uint x2 = b0 & b4; + + indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); + } + + return indices; +} + + +static uint compute_indices(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { + + uint indices = 0; + for (int i = 0; i < 16; i++) { + float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); + float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); + float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); + float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); + + uint index; + if (d0 < d1 && d0 < d2 && d0 < d3) index = 0; + else if (d1 < d2 && d1 < d3) index = 1; + else if (d2 < d3) index = 2; + else index = 3; + + indices |= index << (2 * i); + } + + return indices; +} + + +static void output_block3(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 & v0, const Vector3 & v1, BlockDXT1 * block) +{ + Color16 color0 = vector3_to_color16(v0); + Color16 color1 = vector3_to_color16(v1); + + if (color0.u > color1.u) { + swap(color0, color1); + } + + Vector3 palette[4]; + evaluate_palette(color0, color1, palette); + + block->col0 = color0; + block->col1 = color1; + block->indices = compute_indices(input_colors, color_weights, palette); +} + +static void output_block4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 & v0, const Vector3 & v1, BlockDXT1 * block) +{ + Color16 color0 = vector3_to_color16(v0); + Color16 color1 = vector3_to_color16(v1); + + if (color0.u < color1.u) { + swap(color0, color1); + } + + Vector3 palette[4]; + evaluate_palette(color0, color1, palette); + + block->col0 = color0; + block->col1 = color1; + block->indices = compute_indices4(input_colors, color_weights, palette); +} + + + + + +// Single color compressor, based on: +// https://mollyrocket.com/forums/viewtopic.php?t=392 +static void compress_dxt1_single_color_optimal(Color32 c, BlockDXT1 * output) +{ + output->col0.r = OMatch5[c.r][0]; + output->col0.g = OMatch6[c.g][0]; + output->col0.b = OMatch5[c.b][0]; + output->col1.r = OMatch5[c.r][1]; + output->col1.g = OMatch6[c.g][1]; + output->col1.b = OMatch5[c.b][1]; + output->indices = 0xaaaaaaaa; + + if (output->col0.u < output->col1.u) + { + swap(output->col0.u, output->col1.u); + output->indices ^= 0x55555555; + } +} + + +float nv::compress_dxt1_single_color_optimal(Color32 c, BlockDXT1 * output) +{ + ::compress_dxt1_single_color_optimal(c, output); + + // Multiply by 16^2, the weight associated to a single color. + // Divide by 255*255 to covert error to [0-1] range. + return (256.0f / (255*255)) * evaluate_mse(output, c, output->indices & 3); +} + + +float nv::compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output) +{ + return compress_dxt1_single_color_optimal(vector3_to_color(color), output); +} + + +// Compress block using the average color. +float nv::compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output) +{ + // Compute block average. + Vector3 color_sum(0); + float weight_sum = 0; + + for (int i = 0; i < count; i++) { + color_sum += colors[i] * weights[i]; + weight_sum += weights[i]; + } + + // Compress optimally. + ::compress_dxt1_single_color_optimal(vector3_to_color(color_sum / weight_sum), output); + + // Decompress block color. + Color32 palette[4]; + output->evaluatePalette(palette, /*d3d9=*/false); + + Vector3 block_color = color_to_vector3(palette[output->indices & 0x3]); + + // Evaluate error. + float error = 0; + for (int i = 0; i < count; i++) { + error += weights[i] * evaluate_mse(block_color, colors[i], color_weights); + } + return error; +} + + +/* @@ Not implemented yet. +// Low quality baseline compressor. +float nv::compress_dxt1_least_squares_fit(const Vector3 * input_colors, const Vector3 * colors, const float * weights, int count, BlockDXT1 * output) +{ + // @@ Iterative best end point fit. + + return FLT_MAX; +}*/ + + +float nv::compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int max_volume, BlockDXT1 * output) +{ + // Compute bounding box. + Vector3 min_color(1.0f); + Vector3 max_color(0.0f); + + for (int i = 0; i < count; i++) { + min_color = min(min_color, colors[i]); + max_color = max(max_color, colors[i]); + } + + // Convert to 5:6:5 + int min_r = ftoi_floor(31 * min_color.x); + int min_g = ftoi_floor(63 * min_color.y); + int min_b = ftoi_floor(31 * min_color.z); + int max_r = ftoi_ceil(31 * max_color.x); + int max_g = ftoi_ceil(63 * max_color.y); + int max_b = ftoi_ceil(31 * max_color.z); + + // Expand the box. + int range_r = max_r - min_r; + int range_g = max_g - min_g; + int range_b = max_b - min_b; + + min_r = max(0, min_r - range_r / 2 - 2); + min_g = max(0, min_g - range_g / 2 - 2); + min_b = max(0, min_b - range_b / 2 - 2); + + max_r = min(31, max_r + range_r / 2 + 2); + max_g = min(63, max_g + range_g / 2 + 2); + max_b = min(31, max_b + range_b / 2 + 2); + + // Estimate size of search space. + int volume = (max_r-min_r+1) * (max_g-min_g+1) * (max_b-min_b+1); + + // if size under search_limit, then proceed. Note that search_volume is sqrt of number of evaluations. + if (volume > max_volume) { + return FLT_MAX; + } + + // @@ Convert to fixed point before building box? + Color32 colors32[16]; + for (int i = 0; i < count; i++) { + colors32[i] = toColor32(Vector4(colors[i], 1)); + } + + float best_error = FLT_MAX; + Color16 best0, best1; // @@ Record endpoints as Color16? + + Color16 c0, c1; + Color32 palette[4]; + + for(int r0 = min_r; r0 <= max_r; r0++) + for(int g0 = min_g; g0 <= max_g; g0++) + for(int b0 = min_b; b0 <= max_b; b0++) + { + c0.r = r0; c0.g = g0; c0.b = b0; + palette[0] = bitexpand_color16_to_color32(c0); + + for(int r1 = min_r; r1 <= max_r; r1++) + for(int g1 = min_g; g1 <= max_g; g1++) + for(int b1 = min_b; b1 <= max_b; b1++) + { + c1.r = r1; c1.g = g1; c1.b = b1; + palette[1] = bitexpand_color16_to_color32(c1); + + if (c0.u > c1.u) { + // Evaluate error in 4 color mode. + evaluate_palette4(palette); + } + else { + if (three_color_mode) { + // Evaluate error in 3 color mode. + evaluate_palette3(palette); + } + else { + // Skip 3 color mode. + continue; + } + } + + float error = evaluate_palette_error(palette, colors32, weights, count); + + if (error < best_error) { + best_error = error; + best0 = c0; + best1 = c1; + } + } + } + + output->col0 = best0; + output->col1 = best1; + + Vector3 vector_palette[4]; + evaluate_palette(output->col0, output->col1, vector_palette); + + output->indices = compute_indices(input_colors, color_weights, vector_palette); + + return best_error / (255 * 255); +} + + +void nv::compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output) +{ + ClusterFit fit; + fit.setColorWeights(Vector4(color_weights, 1)); + fit.setColorSet(colors, weights, count); + + // start & end are in [0, 1] range. + Vector3 start, end; + fit.compress4(&start, &end); + + if (three_color_mode && fit.compress3(&start, &end)) { + output_block3(input_colors, color_weights, start, end, output); + } + else { + output_block4(input_colors, color_weights, start, end, output); + } +} + + + + +float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output) +{ + Vector3 colors[16]; + float weights[16]; + int count = reduce_colors(input_colors, input_weights, colors, weights); + + if (count == 0) { + // Output trivial block. + output->col0.u = 0; + output->col1.u = 0; + output->indices = 0; + return 0; + } + + + float error = FLT_MAX; + + // Sometimes the single color compressor produces better results than the exhaustive. This introduces discontinuities between blocks that + // use different compressors. For this reason, this is not enabled by default. + if (1) { + error = compress_dxt1_single_color(colors, weights, count, color_weights, output); + + if (error == 0.0f || count == 1) { + // Early out. + return error; + } + } + + // This is too expensive, even with a low threshold. + // If high quality: + if (0) { + BlockDXT1 exhaustive_output; + float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, three_color_mode, 1400, &exhaustive_output); + + if (exhaustive_error != FLT_MAX) { + float exhaustive_error2 = evaluate_mse(input_colors, input_weights, color_weights, &exhaustive_output); + + // The exhaustive compressor does not use color_weights, so the results may be different. + //nvCheck(equal(exhaustive_error, exhaustive_error2)); + + if (exhaustive_error2 < error) { + *output = exhaustive_output; + error = exhaustive_error; + } + } + } + + // @@ TODO. + // This is pretty fast and in some cases can produces better quality than cluster fit. + //error = compress_dxt1_least_squares_fit(colors, weigths, error, output); + + // Cluster fit cannot handle single color blocks, so encode them optimally if we haven't encoded them already. + if (error == FLT_MAX && count == 1) { + error = compress_dxt1_single_color_optimal(colors[0], output); + } + + if (count > 1) { + BlockDXT1 cluster_fit_output; + compress_dxt1_cluster_fit(input_colors, colors, weights, count, color_weights, three_color_mode, &cluster_fit_output); + + float cluster_fit_error = evaluate_mse(input_colors, input_weights, color_weights, &cluster_fit_output); + + if (cluster_fit_error < error) { + *output = cluster_fit_output; + error = cluster_fit_error; + } + } + + return error; +} + + +// Once we have an index assignment we have colors grouped in 1-4 clusters. +// If 1 clusters -> Use optimal compressor. +// If 2 clusters -> Try: (0, 1), (1, 2), (0, 2), (0, 3) - [0, 1] +// If 3 clusters -> Try: (0, 1, 2), (0, 1, 3), (0, 2, 3) - [0, 1, 2] +// If 4 clusters -> Try: (0, 1, 2, 3) + +// @@ How do we do the initial index/cluster assignment? Use standard cluster fit. + + +// Least squares fitting of color end points for the given indices. @@ Take weights into account. +static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b) +{ + float alpha2_sum = 0.0f; + float beta2_sum = 0.0f; + float alphabeta_sum = 0.0f; + Vector3 alphax_sum(0.0f); + Vector3 betax_sum(0.0f); + + for (int i = 0; i < count; i++) + { + const uint bits = indices >> (2 * i); + + float beta = float(bits & 1); + if (bits & 2) beta = (1 + beta) / 3.0f; + float alpha = 1.0f - beta; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * colors[i]; + betax_sum += beta * colors[i]; + } + + float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; + if (equal(denom, 0.0f)) return false; + + float factor = 1.0f / denom; + + *a = saturate((alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor); + *b = saturate((betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor); + + return true; +} + + +// Least squares fitting of color end points for the given indices. @@ This does not support black/transparent index. @@ Take weights into account. +static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b) +{ + float alpha2_sum = 0.0f; + float beta2_sum = 0.0f; + float alphabeta_sum = 0.0f; + Vector3 alphax_sum(0.0f); + Vector3 betax_sum(0.0f); + + for (int i = 0; i < count; i++) + { + const uint bits = indices >> (2 * i); + + float beta = float(bits & 1); + if (bits & 2) beta = 0.5f; + float alpha = 1.0f - beta; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * colors[i]; + betax_sum += beta * colors[i]; + } + + float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; + if (equal(denom, 0.0f)) return false; + + float factor = 1.0f / denom; + + *a = saturate((alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor); + *b = saturate((betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor); + + return true; +} + +// @@ After optimization we need to round end points. Round in all possible directions, and pick best. + + + + + + Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT5_RGBM.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT5_RGBM.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT5_RGBM.h @@ -0,0 +1,9 @@ + +namespace nv { + + struct BlockDXT5; + class Vector4; + + float compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output); + +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT5_RGBM.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT5_RGBM.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorDXT5_RGBM.cpp @@ -0,0 +1,428 @@ +#include "CompressorDXT5_RGBM.h" +#include "CompressorDXT1.h" + +#include "OptimalCompressDXT.h" +#include "QuickCompressDXT.h" + +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" + +#include "nvmath/Color.inl" +#include "nvmath/Vector.inl" +#include "nvmath/Fitting.h" +#include "nvmath/ftoi.h" + +#include "nvthread/Atomic.h" +#include + +using namespace nv; + +//static uint atomic_counter = 0; + + +float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) { + + // Convert to RGBM. + Vector4 input_colors_rgbm[16]; // @@ Write over input_colors? + float rgb_weights[16]; + + float weight_sum = 0; + + for (uint i = 0; i < 16; i++) { + const Vector4 & c = input_colors[i]; + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float M = max(max(R, G), max(B, min_m)); + float r = R / M; + float g = G / M; + float b = B / M; + float a = (M - min_m) / (1 - min_m); + + input_colors_rgbm[i] = Vector4(r, g, b, a); + rgb_weights[i] = input_weights[i] * M; + weight_sum += input_weights[i]; + } + + if (weight_sum == 0) { + for (uint i = 0; i < 16; i++) rgb_weights[i] = 1; + } + + // Compress RGB. + compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color); + + // Decompress RGB/M block. + nv::ColorBlock RGB; + output->color.decodeBlock(&RGB); + + // Compute M values to compensate for RGB's error. + AlphaBlock4x4 M; + for (int i = 0; i < 16; i++) { + const Vector4 & c = input_colors[i]; + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float rm = RGB.color(i).r / 255.0f; + float gm = RGB.color(i).g / 255.0f; + float bm = RGB.color(i).b / 255.0f; + + // compute m such that m * (r/M, g/M, b/M) == RGB + + // Three equations, one unknown: + // m * r/M == R + // m * g/M == G + // m * b/M == B + + // Solve in the least squares sense! + + // m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T + + // m == dot(rgb, RGB) / dot(rgb, rgb) + + float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm)); + + m = (m - min_m) / (1 - min_m); + +#if 0 + // IC: This does indeed happen. What does that mean? The best choice of m is above the available range. If this happened too often it would make sense to scale m in + // the pixel shader to allow for more accurate reconstruction. However, that scaling would reduce the precision over the [0-1] range. I haven't measured how much + // error is introduced by the clamping vs. how much the error would change with the increased range. + if (m > 1.0f) { + uint counter = atomicIncrement(&atomic_counter); + printf("It happens %u times!", counter); + } +#endif + + M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); + M.weights[i] = input_weights[i]; + } + + // Compress M. + //if (compressionOptions.quality == Quality_Fastest) { + // QuickCompress::compressDXT5A(M, &output->alpha); + /*} + else {*/ + OptimalCompress::compressDXT5A(M, &output->alpha); + //} + + +#if 0 // Multiple iterations do not seem to help. + // Decompress M. + output->alpha.decodeBlock(&M); + + // Feed it back to the input RGB block. + for (uint i = 0; i < 16; i++) { + const Vector4 & c = input_colors[i]; + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m; + + float r = R / m; + float g = G / m; + float b = B / m; + float a = float(M.alpha[i]) / 255.0f; + + input_colors_rgbm[i] = Vector4(r, g, b, a); + rgb_weights[i] = input_weights[i] * m; + } +#endif + + return 0; // @@ +} + + + + +#if 0 + + BlockDXT5 * block = new(output)BlockDXT5; + + // Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors. + + // Compress the resulting M values optimally. + + // Repeat this several times until compression error does not improve? + + //Vector3 rgb_block[16]; + //float m_block[16]; + + + // Init RGB/M block. +#if 0 + nvsquish::WeightedClusterFit fit; + + ColorBlock rgba; + for (int i = 0; i < 16; i++) { + const Vector4 & c = src.color(i); + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float M = max(max(R, G), max(B, min_m)); + float r = R / M; + float g = G / M; + float b = B / M; + float a = c.w; + + rgba.color(i) = toColor32(Vector4(r, g, b, a)); + } + + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block->color); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + } +#endif +#if 1 + ColorSet rgb; + rgb.allocate(4, 4); + + for (uint i = 0; i < 16; i++) { + const Vector4 & c = colors[i]; + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float M = max(max(R, G), max(B, min_m)); + float r = R / M; + float g = G / M; + float b = B / M; + float a = c.w; + + rgb.colors[i] = Vector4(r, g, b, a); + rgb.indices[i] = i; + rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. + } + + rgb.createMinimalSet(/*ignoreTransparent=*/true); + + if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { + OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); + } + else { + ClusterFit fit; + fit.setColorWeights(compressionOptions.colorWeight); + fit.setColorSet(&rgb); + + Vector3 start, end; + fit.compress4(&start, &end); + + QuickCompress::outputBlock4(rgb, start, end, &block->color); + } +#endif + + // Decompress RGB/M block. + nv::ColorBlock RGB; + block->color.decodeBlock(&RGB); + +#if 1 + AlphaBlock4x4 M; + for (int i = 0; i < 16; i++) { + const Vector4 & c = colors[i]; + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float r = RGB.color(i).r / 255.0f; + float g = RGB.color(i).g / 255.0f; + float b = RGB.color(i).b / 255.0f; + + float m = (R / r + G / g + B / b) / 3.0f; + //float m = max((R / r + G / g + B / b) / 3.0f, min_m); + //float m = max(max(R / r, G / g), max(B / b, min_m)); + //float m = max(max(R, G), max(B, min_m)); + + m = (m - min_m) / (1 - min_m); + + M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); + M.weights[i] = weights[i]; + } + + // Compress M. + if (compressionOptions.quality == Quality_Fastest) { + QuickCompress::compressDXT5A(M, &block->alpha); + } + else { + OptimalCompress::compressDXT5A(M, &block->alpha); + } +#else + OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha); +#endif + +#if 0 + // Decompress M. + block->alpha.decodeBlock(&M); + + rgb.allocate(src.w, src.h); // @@ Handle smaller blocks. + + for (uint i = 0; i < src.colorCount; i++) { + const Vector4 & c = src.color(i); + + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + //float m = max(max(R, G), max(B, min_m)); + float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m; + float r = R / m; + float g = G / m; + float b = B / m; + float a = c.w; + + rgb.colors[i] = Vector4(r, g, b, a); + rgb.indices[i] = i; + rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. + } + + rgb.createMinimalSet(/*ignoreTransparent=*/true); + + if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { + OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); + } + else { + ClusterFit fit; + fit.setMetric(compressionOptions.colorWeight); + fit.setColourSet(&rgb); + + Vector3 start, end; + fit.compress4(&start, &end); + + QuickCompress::outputBlock4(rgb, start, end, &block->color); + } +#endif + +#if 0 + block->color.decodeBlock(&RGB); + + //AlphaBlock4x4 M; + //M.initWeights(src); + + for (int i = 0; i < 16; i++) { + const Vector4 & c = src.color(i); + float R = saturate(c.x); + float G = saturate(c.y); + float B = saturate(c.z); + + float r = RGB.color(i).r / 255.0f; + float g = RGB.color(i).g / 255.0f; + float b = RGB.color(i).b / 255.0f; + + float m = (R / r + G / g + B / b) / 3.0f; + //float m = max((R / r + G / g + B / b) / 3.0f, min_m); + //float m = max(max(R / r, G / g), max(B / b, min_m)); + //float m = max(max(R, G), max(B, min_m)); + + m = (m - min_m) / (1 - min_m); + + M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); + M.weights[i] = src.weights[i]; + } + + // Compress M. + if (compressionOptions.quality == Quality_Fastest) { + QuickCompress::compressDXT5A(M, &block->alpha); + } + else { + OptimalCompress::compressDXT5A(M, &block->alpha); + } +#endif + + + +#if 0 + src.fromRGBM(M, min_m); + + src.createMinimalSet(/*ignoreTransparent=*/true); + + if (src.isSingleColor(/*ignoreAlpha=*/true)) { + OptimalCompress::compressDXT1(src.color(0), &block->color); + } + else { + // @@ Use our improved compressor. + ClusterFit fit; + fit.setMetric(compressionOptions.colorWeight); + fit.setColourSet(&src); + + Vector3 start, end; + fit.compress4(&start, &end); + + if (fit.compress3(&start, &end)) { + QuickCompress::outputBlock3(src, start, end, block->color); + } + else { + QuickCompress::outputBlock4(src, start, end, block->color); + } + } +#endif // 0 + + // @@ Decompress color and compute M that best approximates src with these colors? Then compress M again? + + + + // RGBM encoding. + // Maximize precision. + // - Number of possible grey levels: + // - Naive: 2^3 = 8 + // - Better: 2^3 + 2^2 = 12 + // - How to choose min_m? + // - Ideal = Adaptive per block, don't know where to store. + // - Adaptive per lightmap. How to compute optimal? + // - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range. + + // - Optimal compressor: Interpolation artifacts. + + // - Color transform. + // - Measure error in post-tone-mapping color space. + // - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game. + // - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space. + + // - Enhanced DXT compressor. + // - Typical RGBM encoding as follows: + // rgb -> M = max(rgb), RGB=rgb/M -> RGBM + // - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1. + // - We could ensure that M' >= M during compression. + // - We could clamp RGB anyway. + // - We could add a fixed scale value to take into account compression errors and avoid clamping. + + + + + + // Compress color. + /*if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), &block->color); + } + else + { + nvsquish::WeightedClusterFit fit; + fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); + + int flags = 0; + if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; + + nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); + fit.SetColourSet(&colours, 0); + fit.Compress(&block->color); + }*/ + +#endif // 0 \ No newline at end of file Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorRGB.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorRGB.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorRGB.h @@ -0,0 +1,40 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_COMPRESSORRGB_H +#define NVTT_COMPRESSORRGB_H + +#include "Compressor.h" + +namespace nv +{ + struct PixelFormatConverter : public CompressorInterface + { + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; + +} // nv namespace + + +#endif // NVTT_COMPRESSORRGB_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorRGB.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorRGB.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CompressorRGB.cpp @@ -0,0 +1,568 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "CompressorRGB.h" +#include "CompressionOptions.h" +#include "OutputOptions.h" + +#include "nvimage/Image.h" +#include "nvimage/FloatImage.h" +#include "nvimage/PixelFormat.h" + +#include "nvmath/Color.h" +#include "nvmath/Half.h" +#include "nvmath/ftoi.h" +#include "nvmath/Vector.inl" + +#include "nvcore/Debug.h" + +using namespace nv; +using namespace nvtt; + +namespace +{ + /* 11 and 10 bit floating point numbers according to the OpenGL packed float extension: + http://www.opengl.org/registry/specs/EXT/packed_float.txt + + 2.1.A Unsigned 11-Bit Floating-Point Numbers + + An unsigned 11-bit floating-point number has no sign bit, a 5-bit + exponent (E), and a 6-bit mantissa (M). The value of an unsigned + 11-bit floating-point number (represented as an 11-bit unsigned + integer N) is determined by the following: + + 0.0, if E == 0 and M == 0, + 2^-14 * (M / 64), if E == 0 and M != 0, + 2^(E-15) * (1 + M/64), if 0 < E < 31, + INF, if E == 31 and M == 0, or + NaN, if E == 31 and M != 0, + + where + + E = floor(N / 64), and + M = N mod 64. + + Implementations are also allowed to use any of the following + alternative encodings: + + 0.0, if E == 0 and M != 0 + 2^(E-15) * (1 + M/64) if E == 31 and M == 0 + 2^(E-15) * (1 + M/64) if E == 31 and M != 0 + + When a floating-point value is converted to an unsigned 11-bit + floating-point representation, finite values are rounded to the closet + representable finite value. While less accurate, implementations + are allowed to always round in the direction of zero. This means + negative values are converted to zero. Likewise, finite positive + values greater than 65024 (the maximum finite representable unsigned + 11-bit floating-point value) are converted to 65024. Additionally: + negative infinity is converted to zero; positive infinity is converted + to positive infinity; and both positive and negative NaN are converted + to positive NaN. + + Any representable unsigned 11-bit floating-point value is legal + as input to a GL command that accepts 11-bit floating-point data. + The result of providing a value that is not a floating-point number + (such as infinity or NaN) to such a command is unspecified, but must + not lead to GL interruption or termination. Providing a denormalized + number or negative zero to GL must yield predictable results. + + 2.1.B Unsigned 10-Bit Floating-Point Numbers + + An unsigned 10-bit floating-point number has no sign bit, a 5-bit + exponent (E), and a 5-bit mantissa (M). The value of an unsigned + 10-bit floating-point number (represented as an 10-bit unsigned + integer N) is determined by the following: + + 0.0, if E == 0 and M == 0, + 2^-14 * (M / 32), if E == 0 and M != 0, + 2^(E-15) * (1 + M/32), if 0 < E < 31, + INF, if E == 31 and M == 0, or + NaN, if E == 31 and M != 0, + + where + + E = floor(N / 32), and + M = N mod 32. + + When a floating-point value is converted to an unsigned 10-bit + floating-point representation, finite values are rounded to the closet + representable finite value. While less accurate, implementations + are allowed to always round in the direction of zero. This means + negative values are converted to zero. Likewise, finite positive + values greater than 64512 (the maximum finite representable unsigned + 10-bit floating-point value) are converted to 64512. Additionally: + negative infinity is converted to zero; positive infinity is converted + to positive infinity; and both positive and negative NaN are converted + to positive NaN. + + Any representable unsigned 10-bit floating-point value is legal + as input to a GL command that accepts 10-bit floating-point data. + The result of providing a value that is not a floating-point number + (such as infinity or NaN) to such a command is unspecified, but must + not lead to GL interruption or termination. Providing a denormalized + number or negative zero to GL must yield predictable results. + */ + + // @@ Is this correct? Not tested! + // 6 bits of mantissa, 5 bits of exponent. + static uint toFloat11(float f) { + if (f < 0) f = 0; // Flush to 0 or to epsilon? + if (f > 65024) f = 65024; // Flush to infinity or max? + + Float754 F; + F.value = f; + + uint E = F.field.biasedexponent - 127 + 15; + nvDebugCheck(E < 32); + + uint M = F.field.mantissa >> (23 - 6); + + return (E << 6) | M; + } + + // @@ Is this correct? Not tested! + // 5 bits of mantissa, 5 bits of exponent. + static uint toFloat10(float f) { + if (f < 0) f = 0; // Flush to 0 or to epsilon? + if (f > 64512) f = 64512; // Flush to infinity or max? + + Float754 F; + F.value = f; + + uint E = F.field.biasedexponent - 127 + 15; + nvDebugCheck(E < 32); + + uint M = F.field.mantissa >> (23 - 5); + + return (E << 5) | M; + } + + + // IC: Inf/NaN and denormal handling based on DirectXMath. + static float fromFloat11(uint u) { + // 5 bit exponent + // 6 bit mantissa + + uint E = (u >> 6) & 0x1F; + uint M = u & 0x3F; + + Float754 F; + F.field.negative = 0; + + if (E == 0x1f) { // INF or NAN. + E = 0xFF; + } + else { + if (E != 0) { + F.field.biasedexponent = E + 127 - 15; + F.field.mantissa = M << (23 - 6); + } + else if (M != 0) { + E = 1; + do { + E--; + M <<= 1; + } while((M & 0x40) == 0); + + M &= 0x3F; + } + } + + F.field.biasedexponent = 0xFF; + F.field.mantissa = M << (23 - 6); + + return F.value; +#if 0 + // X Channel (6-bit mantissa) + Mantissa = pSource->xm; + + if ( pSource->xe == 0x1f ) // INF or NAN + { + Result[0] = 0x7f800000 | (pSource->xm << 17); + } + else + { + if ( pSource->xe != 0 ) // The value is normalized + { + Exponent = pSource->xe; + } + else if (Mantissa != 0) // The value is denormalized + { + // Normalize the value in the resulting float + Exponent = 1; + + do + { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x40) == 0); + + Mantissa &= 0x3F; + } + else // The value is zero + { + Exponent = (uint32_t)-112; + } + + Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17); + } +#endif + } + + // https://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt + Float3SE toFloat3SE(float r, float g, float b) + { + const int N = 9; // Mantissa bits. + const int E = 5; // Exponent bits. + const int Emax = (1 << E) - 1; // 31 + const int B = (1 << (E-1)) - 1; // 15 + const float sharedexp_max = float((1 << N) - 1) / (1 << N) * (1 << (Emax-B)); // 65408 + + // Clamp color components. + r = max(0.0f, min(sharedexp_max, r)); + g = max(0.0f, min(sharedexp_max, g)); + b = max(0.0f, min(sharedexp_max, b)); + + // Get max component. + float max_c = max3(r, g, b); + + // Compute shared exponent. + int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B; + + int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N))); + + int exp_shared = exp_shared_p; + if (max_s == (1 << N)) exp_shared++; + + Float3SE v; + v.e = exp_shared; + + // Compute mantissas. + v.xm = ftoi_round(r / (1 << (exp_shared - B - N))); + v.ym = ftoi_round(g / (1 << (exp_shared - B - N))); + v.zm = ftoi_round(b / (1 << (exp_shared - B - N))); + + return v; + } + + Vector3 fromFloat3SE(Float3SE v) { + Float754 f; + f.raw = 0x33800000 + (v.e << 23); + float scale = f.value; + return scale * Vector3(float(v.xm), float(v.ym), float(v.zm)); + } + + // These are based on: http://www.graphics.cornell.edu/~bjw/rgbe/rgbe.c + uint toRGBE(float r, float g, float b) + { + float v = max3(r, g, b); + + uint rgbe; + + if (v < 1e-32) { + rgbe = 0; + } + else { + int e; + float scale = frexpf(v, &e) * 256.0f / v; + //Float754 f; + //f.value = v; + //float scale = f.field.biasedexponent * 256.0f / v; + //e = f.field.biasedexponent - 127 + + rgbe |= U8(ftoi_round(r * scale)) << 0; + rgbe |= U8(ftoi_round(g * scale)) << 8; + rgbe |= U8(ftoi_round(b * scale)) << 16; + rgbe |= U8(e + 128) << 24; + } + + return rgbe; + } + + Vector3 fromRGBE(uint rgbe) { + uint r = (rgbe >> 0) & 0xFF; + uint g = (rgbe >> 8) & 0xFF; + uint b = (rgbe >> 16) & 0xFF; + uint e = (rgbe >> 24); + + if (e != 0) { + float scale = ldexpf(1.0f, e-(int)(128+8)); // +8 to divide by 256. @@ Shouldn't we divide by 255 instead? + return scale * Vector3(float(r), float(g), float(b)); + } + + return Vector3(0); + } + + + struct BitStream + { + BitStream(uint8 * ptr) : ptr(ptr), buffer(0), bits(0) { + } + + void putBits(uint p, int bitCount) + { + nvDebugCheck(bits < 8); + nvDebugCheck(bitCount <= 32); + + uint64 buffer = (this->buffer << bitCount) | p; + uint bits = this->bits + bitCount; + + while (bits >= 8) + { + *ptr++ = (buffer & 0xFF); + + buffer >>= 8; + bits -= 8; + } + + this->buffer = (uint8)buffer; + this->bits = bits; + } + + void putFloat(float f) + { + nvDebugCheck(bits == 0); // @@ Do not require alignment. + *((float *)ptr) = f; + ptr += 4; + } + + void putHalf(float f) + { + nvDebugCheck(bits == 0); // @@ Do not require alignment. + *((uint16 *)ptr) = to_half(f); + ptr += 2; + } + + void putFloat11(float f) + { + putBits(toFloat11(f), 11); + } + + void putFloat10(float f) + { + putBits(toFloat10(f), 10); + } + + void flush() + { + nvDebugCheck(bits < 8); + if (bits) { + *ptr++ = buffer; + buffer = 0; + bits = 0; + } + } + + void align(int alignment) + { + nvDebugCheck(alignment >= 1); + flush(); + int remainder = (int)((uintptr_t)ptr % alignment); + if (remainder != 0) { + putBits(0, (alignment - remainder) * 8); + } + } + + uint8 * ptr; + uint8 buffer; + uint8 bits; + }; + +} // namespace + + + +void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck (compressionOptions.format == nvtt::Format_RGBA); + + uint bitCount; + uint rmask, rshift, rsize; + uint gmask, gshift, gsize; + uint bmask, bshift, bsize; + uint amask, ashift, asize; + + if (compressionOptions.pixelType == nvtt::PixelType_Float) + { + rsize = compressionOptions.rsize; + gsize = compressionOptions.gsize; + bsize = compressionOptions.bsize; + asize = compressionOptions.asize; + + // Other float sizes are not supported and will be zero-padded. + nvDebugCheck(rsize == 0 || rsize == 10 || rsize == 11 || rsize == 16 || rsize == 32); + nvDebugCheck(gsize == 0 || gsize == 10 || gsize == 11 || gsize == 16 || gsize == 32); + nvDebugCheck(bsize == 0 || bsize == 10 || bsize == 11 || bsize == 16 || bsize == 32); + nvDebugCheck(asize == 0 || asize == 10 || asize == 11 || asize == 16 || asize == 32); + + bitCount = rsize + gsize + bsize + asize; + } + else + { + if (compressionOptions.bitcount != 0) + { + bitCount = compressionOptions.bitcount; + nvCheck(bitCount <= 32); + + rmask = compressionOptions.rmask; + gmask = compressionOptions.gmask; + bmask = compressionOptions.bmask; + amask = compressionOptions.amask; + + PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize); + PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize); + PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize); + PixelFormat::maskShiftAndSize(amask, &ashift, &asize); + } + else + { + rsize = compressionOptions.rsize; + gsize = compressionOptions.gsize; + bsize = compressionOptions.bsize; + asize = compressionOptions.asize; + + bitCount = rsize + gsize + bsize + asize; + nvCheck(bitCount <= 32); + + ashift = 0; + bshift = ashift + asize; + gshift = bshift + bsize; + rshift = gshift + gsize; + + rmask = ((1 << rsize) - 1) << rshift; + gmask = ((1 << gsize) - 1) << gshift; + bmask = ((1 << bsize) - 1) << bshift; + amask = ((1 << asize) - 1) << ashift; + } + } + + const uint pitch = computeBytePitch(w, bitCount, compressionOptions.pitchAlignment); + const uint whd = w * h * d; + + // Allocate output scanline. + uint8 * const dst = malloc(pitch); + + for (uint z = 0; z < d; z++) + { + for (uint y = 0; y < h; y++) + { + const float * src = (const float *)data + (z * h + y) * w; + + BitStream stream(dst); + + for (uint x = 0; x < w; x++) + { + float r = src[x + 0 * whd]; + float g = src[x + 1 * whd]; + float b = src[x + 2 * whd]; + float a = src[x + 3 * whd]; + + if (compressionOptions.pixelType == nvtt::PixelType_Float) + { + if (rsize == 32) stream.putFloat(r); + else if (rsize == 16) stream.putHalf(r); + else if (rsize == 11) stream.putFloat11(r); + else if (rsize == 10) stream.putFloat10(r); + else stream.putBits(0, rsize); + + if (gsize == 32) stream.putFloat(g); + else if (gsize == 16) stream.putHalf(g); + else if (gsize == 11) stream.putFloat11(g); + else if (gsize == 10) stream.putFloat10(g); + else stream.putBits(0, gsize); + + if (bsize == 32) stream.putFloat(b); + else if (bsize == 16) stream.putHalf(b); + else if (bsize == 11) stream.putFloat11(b); + else if (bsize == 10) stream.putFloat10(b); + else stream.putBits(0, bsize); + + if (asize == 32) stream.putFloat(a); + else if (asize == 16) stream.putHalf(a); + else if (asize == 11) stream.putFloat11(a); + else if (asize == 10) stream.putFloat10(a); + else stream.putBits(0, asize); + } + else if (compressionOptions.pixelType == nvtt::PixelType_SharedExp) + { + if (rsize == 9 && gsize == 9 && bsize == 9 && asize == 5) { + Float3SE v = toFloat3SE(r, g, b); + stream.putBits(v.v, 32); + } + else if (rsize == 8 && gsize == 8 && bsize == 8 && asize == 8) { + // @@ + } + else { + // @@ Not supported. Filling with zeros. + stream.putBits(0, bitCount); + } + } + else + { + // We first convert to 16 bits, then to the target size. @@ If greater than 16 bits, this will truncate and bitexpand. + + // @@ Add support for nvtt::PixelType_SignedInt, nvtt::PixelType_SignedNorm, nvtt::PixelType_UnsignedInt + + int ir, ig, ib, ia; + if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) { + ir = iround(clamp(r * 65535.0f, 0.0f, 65535.0f)); + ig = iround(clamp(g * 65535.0f, 0.0f, 65535.0f)); + ib = iround(clamp(b * 65535.0f, 0.0f, 65535.0f)); + ia = iround(clamp(a * 65535.0f, 0.0f, 65535.0f)); + } + else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) { + // @@ + } + else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) { + ir = iround(clamp(r, 0.0f, 65535.0f)); + ig = iround(clamp(g, 0.0f, 65535.0f)); + ib = iround(clamp(b, 0.0f, 65535.0f)); + ia = iround(clamp(a, 0.0f, 65535.0f)); + } + else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) { + // @@ + } + + uint p = 0; + p |= PixelFormat::convert(ir, 16, rsize) << rshift; + p |= PixelFormat::convert(ig, 16, gsize) << gshift; + p |= PixelFormat::convert(ib, 16, bsize) << bshift; + p |= PixelFormat::convert(ia, 16, asize) << ashift; + + stream.putBits(p, bitCount); + } + } + + // Zero padding. + stream.align(compressionOptions.pitchAlignment); + nvDebugCheck(stream.ptr == dst + pitch); + + // Scanlines are always byte-aligned. + outputOptions.writeData(dst, pitch); + } + } + + free(dst); +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/Context.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/Context.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/Context.h @@ -0,0 +1,73 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NV_TT_CONTEXT_H +#define NV_TT_CONTEXT_H + +#include "nvcore/Ptr.h" + +#include "nvtt/Compressor.h" +#include "nvtt/cuda/CudaCompressorDXT.h" +#include "nvtt.h" +#include "TaskDispatcher.h" + +namespace nv +{ + class Image; +} + +namespace nvtt +{ + struct Mipmap; + + struct Compressor::Private + { + Private() {} + + bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; + bool compress(const Surface & tex, int face, int mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; + bool compress(AlphaMode alphaMode, int w, int h, int d, int face, int mipmap, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; + + void quantize(Surface & tex, const CompressionOptions::Private & compressionOptions) const; + + bool outputHeader(nvtt::TextureType textureType, int w, int h, int d, int faceCount, int mipmapCount, bool isNormalMap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; + + nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const; + nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const; + + + bool cudaSupported; + bool cudaEnabled; + + nv::AutoPtr cuda; + + TaskDispatcher * dispatcher; + //SequentialTaskDispatcher defaultDispatcher; + ConcurrentTaskDispatcher defaultDispatcher; + }; + +} // nvtt namespace + + +#endif // NV_TT_CONTEXT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/Context.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/Context.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/Context.cpp @@ -0,0 +1,862 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2008-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "Context.h" + +#include "nvtt.h" + +#include "InputOptions.h" +#include "CompressionOptions.h" +#include "OutputOptions.h" +#include "Surface.h" + +#include "CompressorDX9.h" +#include "CompressorDX10.h" +#include "CompressorDX11.h" +#include "CompressorRGB.h" +#include "cuda/CudaUtils.h" +#include "cuda/CudaCompressorDXT.h" + +#include "nvimage/DirectDrawSurface.h" +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" +#include "nvimage/Image.h" +#include "nvimage/FloatImage.h" +#include "nvimage/Filter.h" +#include "nvimage/Quantize.h" +#include "nvimage/NormalMap.h" +#include "nvimage/PixelFormat.h" +#include "nvimage/ColorSpace.h" + +#include "nvcore/Memory.h" +#include "nvcore/Ptr.h" + +using namespace nv; +using namespace nvtt; + +Compressor::Compressor() : m(*new Compressor::Private()) +{ + // CUDA initialization. + m.cudaSupported = cuda::isHardwarePresent(); + m.cudaEnabled = false; + m.cuda = NULL; + + enableCudaAcceleration(m.cudaSupported); + + m.dispatcher = &m.defaultDispatcher; +} + +Compressor::~Compressor() +{ + delete &m; +} + + +void Compressor::enableCudaAcceleration(bool enable) +{ + if (m.cudaSupported) + { + m.cudaEnabled = enable; + } + + if (m.cudaEnabled && m.cuda == NULL) + { + m.cuda = new CudaContext(); + + if (!m.cuda->isValid()) + { + m.cudaEnabled = false; + m.cuda = NULL; + } + } +} + +bool Compressor::isCudaAccelerationEnabled() const +{ + return m.cudaEnabled; +} + +void Compressor::setTaskDispatcher(TaskDispatcher * disp) +{ + if (disp == NULL) { + m.dispatcher = &m.defaultDispatcher; + } + else { + m.dispatcher = disp; + } +} + + +// Input Options API. +bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const +{ + return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m); +} + +int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const +{ + int w = inputOptions.m.width; + int h = inputOptions.m.height; + int d = inputOptions.m.depth; + + getTargetExtent(&w, &h, &d, inputOptions.m.maxExtent, inputOptions.m.roundMode, inputOptions.m.textureType); + + int mipmapCount = 1; + if (inputOptions.m.generateMipmaps) { + mipmapCount = countMipmaps(w, h, d); + if (inputOptions.m.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.m.maxLevel); + } + + return inputOptions.m.faceCount * estimateSize(w, h, d, mipmapCount, compressionOptions); +} + + +// Surface API. +bool Compressor::outputHeader(const Surface & tex, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const +{ + return m.outputHeader(tex.type(), tex.width(), tex.height(), tex.depth(), 1, mipmapCount, tex.isNormalMap(), compressionOptions.m, outputOptions.m); +} + +bool Compressor::compress(const Surface & tex, int face, int mipmap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const +{ + return m.compress(tex, face, mipmap, compressionOptions.m, outputOptions.m); +} + +int Compressor::estimateSize(const Surface & tex, int mipmapCount, const CompressionOptions & compressionOptions) const +{ + const int w = tex.width(); + const int h = tex.height(); + const int d = tex.depth(); + + return estimateSize(w, h, d, mipmapCount, compressionOptions); +} + +bool Compressor::outputHeader(const CubeSurface & cube, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const +{ + return m.outputHeader(TextureType_Cube, cube.edgeLength(), cube.edgeLength(), 1, 1, mipmapCount, false, compressionOptions.m, outputOptions.m); +} + +bool Compressor::compress(const CubeSurface & cube, int mipmap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const +{ + for (int i = 0; i < 6; i++) { + if(!m.compress(cube.face(i), i, mipmap, compressionOptions.m, outputOptions.m)) { + return false; + } + } + return true; +} + +int Compressor::estimateSize(const CubeSurface & cube, int mipmapCount, const CompressionOptions & compressionOptions) const +{ + return 6 * estimateSize(cube.edgeLength(), cube.edgeLength(), 1, mipmapCount, compressionOptions); +} + + +// Raw API. +bool Compressor::outputHeader(TextureType type, int w, int h, int d, int arraySize, int mipmapCount, bool isNormalMap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const +{ + return m.outputHeader(type, w, h, d, arraySize, mipmapCount, isNormalMap, compressionOptions.m, outputOptions.m); +} + +bool Compressor::compress(int w, int h, int d, int face, int mipmap, const float * rgba, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const +{ + return m.compress(AlphaMode_None, w, h, d, face, mipmap, rgba, compressionOptions.m, outputOptions.m); +} + +int Compressor::estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions & compressionOptions) const +{ + const Format format = compressionOptions.m.format; + + const uint bitCount = compressionOptions.m.getBitCount(); + const uint pitchAlignment = compressionOptions.m.pitchAlignment; + + int size = 0; + for (int m = 0; m < mipmapCount; m++) + { + size += computeImageSize(w, h, d, bitCount, pitchAlignment, format); + + // Compute extents of next mipmap: + w = max(1, w / 2); + h = max(1, h / 2); + d = max(1, d / 2); + } + + return size; +} + + + + + +bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const +{ + // Make sure enums match. + nvStaticCheck(FloatImage::WrapMode_Clamp == (FloatImage::WrapMode)WrapMode_Clamp); + nvStaticCheck(FloatImage::WrapMode_Mirror == (FloatImage::WrapMode)WrapMode_Mirror); + nvStaticCheck(FloatImage::WrapMode_Repeat == (FloatImage::WrapMode)WrapMode_Repeat); + + // Get output handler. + if (!outputOptions.hasValidOutputHandler()) { + outputOptions.error(Error_FileOpen); + return false; + } + + nvtt::Surface img; + img.setWrapMode(inputOptions.wrapMode); + img.setAlphaMode(inputOptions.alphaMode); + img.setNormalMap(inputOptions.isNormalMap); + + const int faceCount = inputOptions.faceCount; + int width = inputOptions.width; + int height = inputOptions.height; + int depth = inputOptions.depth; + int arraySize = inputOptions.textureType == TextureType_Array ? faceCount : 1; + + nv::getTargetExtent(&width, &height, &depth, inputOptions.maxExtent, inputOptions.roundMode, inputOptions.textureType); + + // If the extents have not changed, then we can use source images for all mipmaps. + bool canUseSourceImages = (inputOptions.width == width && inputOptions.height == height && inputOptions.depth == depth); + + int mipmapCount = 1; + if (inputOptions.generateMipmaps) { + mipmapCount = countMipmaps(width, height, depth); + if (inputOptions.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.maxLevel); + } + + if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, img.isNormalMap(), compressionOptions, outputOptions)) { + return false; + } + + + // Output images. + for (int f = 0; f < faceCount; f++) + { + int w = width; + int h = height; + int d = depth; + bool canUseSourceImagesForThisFace = canUseSourceImages; + + img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]); + + // To normal map. + if (inputOptions.convertToNormalMap) { + img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w); + img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w); + img.packNormals(); + } + + // To linear space. + if (!img.isNormalMap()) { + img.toLinear(inputOptions.inputGamma); + } + + // Resize input. + img.resize(w, h, d, ResizeFilter_Box); + + nvtt::Surface tmp = img; + if (!img.isNormalMap()) { + tmp.toGamma(inputOptions.outputGamma); + } + + quantize(tmp, compressionOptions); + compress(tmp, f, 0, compressionOptions, outputOptions); + + for (int m = 1; m < mipmapCount; m++) { + w = max(1, w/2); + h = max(1, h/2); + d = max(1, d/2); + + int idx = m * faceCount + f; + + bool useSourceImages = false; + if (canUseSourceImagesForThisFace) { + if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level. + canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images. + } + else { + useSourceImages = true; + } + } + + if (useSourceImages) { + img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]); + + // For already generated mipmaps, we need to convert to linear. + if (!img.isNormalMap()) { + img.toLinear(inputOptions.inputGamma); + } + } + else { + if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) { + float params[2] = { inputOptions.kaiserAlpha, inputOptions.kaiserStretch }; + img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params); + } + else { + img.buildNextMipmap(inputOptions.mipmapFilter); + } + } + nvDebugCheck(img.width() == w); + nvDebugCheck(img.height() == h); + nvDebugCheck(img.depth() == d); + + if (img.isNormalMap()) { + if (inputOptions.normalizeMipmaps) { + img.expandNormals(); + img.normalizeNormalMap(); + img.packNormals(); + } + tmp = img; + } + else { + tmp = img; + tmp.toGamma(inputOptions.outputGamma); + } + + quantize(tmp, compressionOptions); + compress(tmp, f, m, compressionOptions, outputOptions); + } + } + + return true; +} + +bool Compressor::Private::compress(const Surface & tex, int face, int mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const +{ + if (!compress(tex.alphaMode(), tex.width(), tex.height(), tex.depth(), face, mipmap, tex.data(), compressionOptions, outputOptions)) { + return false; + } + + return true; +} + +bool Compressor::Private::compress(AlphaMode alphaMode, int w, int h, int d, int face, int mipmap, const float * rgba, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const +{ + int size = computeImageSize(w, h, d, compressionOptions.getBitCount(), compressionOptions.pitchAlignment, compressionOptions.format); + outputOptions.beginImage(size, w, h, d, face, mipmap); + + // Decide what compressor to use. + AutoPtr compressor; +#if defined HAVE_CUDA + if (cudaEnabled && w * h >= 512) + { + compressor = chooseGpuCompressor(compressionOptions); + } +#endif + if (compressor == NULL) + { + compressor = chooseCpuCompressor(compressionOptions); + } + + if (compressor == NULL) + { + outputOptions.error(Error_UnsupportedFeature); + } + else + { + compressor->compress(alphaMode, w, h, d, rgba, dispatcher, compressionOptions, outputOptions); + } + + outputOptions.endImage(); + + return true; +} + + +void Compressor::Private::quantize(Surface & img, const CompressionOptions::Private & compressionOptions) const +{ + if (compressionOptions.enableColorDithering) { + if (compressionOptions.format >= Format_BC1 && compressionOptions.format <= Format_BC3) { + img.quantize(0, 5, true, true); + img.quantize(1, 6, true, true); + img.quantize(2, 5, true, true); + } + else if (compressionOptions.format == Format_RGB) { + img.quantize(0, compressionOptions.rsize, true, true); + img.quantize(1, compressionOptions.gsize, true, true); + img.quantize(2, compressionOptions.bsize, true, true); + } + } + if (compressionOptions.enableAlphaDithering) { + if (compressionOptions.format == Format_RGB) { + img.quantize(3, compressionOptions.asize, true, true); + } + } + else if (compressionOptions.binaryAlpha) { + img.binarize(3, float(compressionOptions.alphaThreshold)/255.0f, compressionOptions.enableAlphaDithering); + } +} + +bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int h, int d, int arraySize, int mipmapCount, bool isNormalMap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const +{ + if (w <= 0 || h <= 0 || d <= 0 || arraySize <= 0 || mipmapCount <= 0) + { + outputOptions.error(Error_InvalidInput); + return false; + } + + if (!outputOptions.outputHeader) + { + return true; + } + + // Output DDS header. + if (outputOptions.container == Container_DDS || outputOptions.container == Container_DDS10) + { + DDSHeader header; + + header.setUserVersion(outputOptions.version); + + if (textureType == TextureType_2D) { + nvCheck(arraySize == 1); + header.setTexture2D(); + } + else if (textureType == TextureType_Cube) { + nvCheck(arraySize == 1); + header.setTextureCube(); + } + else if (textureType == TextureType_3D) { + nvCheck(arraySize == 1); + header.setTexture3D(); + header.setDepth(d); + } + else if (textureType == TextureType_Array) { + header.setTextureArray(arraySize); + } + + header.setWidth(w); + header.setHeight(h); + header.setMipmapCount(mipmapCount); + + bool supported = true; + + if (outputOptions.container == Container_DDS10) + { + if (compressionOptions.format == Format_RGBA) + { + const uint bitcount = compressionOptions.getBitCount(); + + if (compressionOptions.pixelType == PixelType_Float) { + if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 16 && compressionOptions.asize == 16) { + header.setDX10Format(DXGI_FORMAT_R16G16B16A16_FLOAT); + } + else if (compressionOptions.rsize == 11 && compressionOptions.gsize == 11 && compressionOptions.bsize == 10 && compressionOptions.asize == 0) { + header.setDX10Format(DXGI_FORMAT_R11G11B10_FLOAT); + } + else { + supported = false; + } + } + else { + if (bitcount == 16 && compressionOptions.rsize == 16) { + header.setDX10Format(DXGI_FORMAT_R16_UNORM); + } + else { + uint format = findDXGIFormat(compressionOptions.bitcount, + compressionOptions.rmask, + compressionOptions.gmask, + compressionOptions.bmask, + compressionOptions.amask); + + if (format != DXGI_FORMAT_UNKNOWN) { + header.setDX10Format(format); + } + else { + supported = false; + } + } + } + } + else + { + if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { + header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC1_UNORM_SRGB : DXGI_FORMAT_BC1_UNORM); + if (compressionOptions.format == Format_DXT1a) header.setHasAlphaFlag(true); + if (isNormalMap) header.setNormalFlag(true); + } + else if (compressionOptions.format == Format_DXT3) { + header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC2_UNORM_SRGB : DXGI_FORMAT_BC2_UNORM); + } + else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) { + header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC3_UNORM_SRGB : DXGI_FORMAT_BC3_UNORM); + } + else if (compressionOptions.format == Format_DXT5n) { + header.setDX10Format(DXGI_FORMAT_BC3_UNORM); + if (isNormalMap) header.setNormalFlag(true); + } + else if (compressionOptions.format == Format_BC4) { + header.setDX10Format(DXGI_FORMAT_BC4_UNORM); // DXGI_FORMAT_BC4_SNORM ? + } + else if (compressionOptions.format == Format_BC5 /*|| compressionOptions.format == Format_BC5_Luma*/) { + header.setDX10Format(DXGI_FORMAT_BC5_UNORM); // DXGI_FORMAT_BC5_SNORM ? + if (isNormalMap) header.setNormalFlag(true); + } + else if (compressionOptions.format == Format_BC6) { + if (compressionOptions.pixelType == PixelType_Float) header.setDX10Format(DXGI_FORMAT_BC6H_SF16); + /*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.setDX10Format(DXGI_FORMAT_BC6H_UF16); // By default we assume unsigned. + } + else if (compressionOptions.format == Format_BC7) { + header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM); + if (isNormalMap) header.setNormalFlag(true); + } + else if (compressionOptions.format == Format_CTX1) { + supported = false; + } + else { + supported = false; + } + } + } + else + { + if (compressionOptions.format == Format_RGBA) + { + // Get output bit count. + header.setPitch(computeBytePitch(w, compressionOptions.getBitCount(), compressionOptions.pitchAlignment)); + + if (compressionOptions.pixelType == PixelType_Float) + { + if (compressionOptions.rsize == 16 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) + { + header.setFormatCode(111); // D3DFMT_R16F + } + else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) + { + header.setFormatCode(112); // D3DFMT_G16R16F + } + else if (compressionOptions.rsize == 16 && compressionOptions.gsize == 16 && compressionOptions.bsize == 16 && compressionOptions.asize == 16) + { + header.setFormatCode(113); // D3DFMT_A16B16G16R16F + } + else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 0 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) + { + header.setFormatCode(114); // D3DFMT_R32F + } + else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 0 && compressionOptions.asize == 0) + { + header.setFormatCode(115); // D3DFMT_G32R32F + } + else if (compressionOptions.rsize == 32 && compressionOptions.gsize == 32 && compressionOptions.bsize == 32 && compressionOptions.asize == 32) + { + header.setFormatCode(116); // D3DFMT_A32B32G32R32F + } + else + { + supported = false; + } + } + else // Fixed point + { + const uint bitcount = compressionOptions.getBitCount(); + + if (compressionOptions.bitcount != 0) + { + // Masks already computed. + header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask); + } + else if (bitcount <= 32) + { + // Compute pixel format masks. + const uint ashift = 0; + const uint bshift = ashift + compressionOptions.asize; + const uint gshift = bshift + compressionOptions.bsize; + const uint rshift = gshift + compressionOptions.gsize; + + const uint rmask = ((1 << compressionOptions.rsize) - 1) << rshift; + const uint gmask = ((1 << compressionOptions.gsize) - 1) << gshift; + const uint bmask = ((1 << compressionOptions.bsize) - 1) << bshift; + const uint amask = ((1 << compressionOptions.asize) - 1) << ashift; + + header.setPixelFormat(bitcount, rmask, gmask, bmask, amask); + } + else + { + supported = false; + } + } + } + else + { + header.setLinearSize(computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.pitchAlignment, compressionOptions.format)); + + if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { + header.setFourCC('D', 'X', 'T', '1'); + if (isNormalMap) header.setNormalFlag(true); + } + else if (compressionOptions.format == Format_DXT3) { + header.setFourCC('D', 'X', 'T', '3'); + } + else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) { + header.setFourCC('D', 'X', 'T', '5'); + } + else if (compressionOptions.format == Format_DXT5n) { + header.setFourCC('D', 'X', 'T', '5'); + if (isNormalMap) { + header.setNormalFlag(true); + header.setSwizzleCode('A', '2', 'D', '5'); + //header.setSwizzleCode('x', 'G', 'x', 'R'); + } + } + else if (compressionOptions.format == Format_BC4) { + header.setFourCC('A', 'T', 'I', '1'); + } + else if (compressionOptions.format == Format_BC5 /*|| compressionOptions.format == Format_BC5_Luma*/) { + header.setFourCC('A', 'T', 'I', '2'); + if (isNormalMap) { + header.setNormalFlag(true); + header.setSwizzleCode('A', '2', 'X', 'Y'); + } + } + else if (compressionOptions.format == Format_BC6) { + header.setFourCC('Z', 'O', 'H', ' '); // This is not supported by D3DX. Always use DX10 header with BC6-7 formats. + supported = false; + } + else if (compressionOptions.format == Format_BC7) { + header.setFourCC('Z', 'O', 'L', 'A'); // This is not supported by D3DX. Always use DX10 header with BC6-7 formats. + if (isNormalMap) header.setNormalFlag(true); + supported = false; + } + else if (compressionOptions.format == Format_CTX1) { + header.setFourCC('C', 'T', 'X', '1'); + if (isNormalMap) header.setNormalFlag(true); + } + else { + supported = false; + } + } + + if (outputOptions.srgb) header.setSrgbFlag(true); + } + + if (!supported) + { + // This container does not support the requested format. + outputOptions.error(Error_UnsupportedOutputFormat); + return false; + } + + uint headerSize = 128; + if (header.hasDX10Header()) + { + nvStaticCheck(sizeof(DDSHeader) == 128 + 20); + headerSize = 128 + 20; + } + + // Swap bytes if necessary. + header.swapBytes(); + + bool writeSucceed = outputOptions.writeData(&header, headerSize); + if (!writeSucceed) + { + outputOptions.error(Error_FileWrite); + } + + return writeSucceed; + } + + return true; +} + + +CompressorInterface * Compressor::Private::chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const +{ + if (compressionOptions.format == Format_RGB) + { + return new PixelFormatConverter; + } + else if (compressionOptions.format == Format_DXT1) + { +#if defined(HAVE_ATITC) + if (compressionOptions.externalCompressor == "ati") return new AtiCompressorDXT1; + else +#endif + +#if defined(HAVE_SQUISH) + if (compressionOptions.externalCompressor == "squish") return new SquishCompressorDXT1; + else +#endif + +#if defined(HAVE_D3DX) + if (compressionOptions.externalCompressor == "d3dx") return new D3DXCompressorDXT1; + else +#endif + +#if defined(HAVE_D3DX) + if (compressionOptions.externalCompressor == "stb") return new StbCompressorDXT1; + else +#endif + + if (compressionOptions.quality == Quality_Fastest) + { + return new FastCompressorDXT1; + } + + return new CompressorDXT1; + } + else if (compressionOptions.format == Format_DXT1a) + { + if (compressionOptions.quality == Quality_Fastest) + { + return new FastCompressorDXT1a; + } + + return new CompressorDXT1a; + } + else if (compressionOptions.format == Format_DXT1n) + { + // Not supported. + } + else if (compressionOptions.format == Format_DXT3) + { + if (compressionOptions.quality == Quality_Fastest) + { + return new FastCompressorDXT3; + } + + return new CompressorDXT3; + } + else if (compressionOptions.format == Format_DXT5) + { +#if defined(HAVE_ATITC) + if (compressionOptions.externalCompressor == "ati") return new AtiCompressorDXT5; + else +#endif + + if (compressionOptions.quality == Quality_Fastest) + { + return new FastCompressorDXT5; + } + + return new CompressorDXT5; + } + else if (compressionOptions.format == Format_DXT5n) + { + if (compressionOptions.quality == Quality_Fastest) + { + return new FastCompressorDXT5n; + } + + return new CompressorDXT5n; + } + else if (compressionOptions.format == Format_BC4) + { + if (compressionOptions.quality == Quality_Fastest || compressionOptions.quality == Quality_Normal) + { + return new FastCompressorBC4; + } + + return new ProductionCompressorBC4; + } + else if (compressionOptions.format == Format_BC5) + { + if (compressionOptions.quality == Quality_Fastest || compressionOptions.quality == Quality_Normal) + { + return new FastCompressorBC5; + } + + return new ProductionCompressorBC5; + } + else if (compressionOptions.format == Format_CTX1) + { + // Not supported. + } + else if (compressionOptions.format == Format_BC6) + { + return new CompressorBC6; + } + else if (compressionOptions.format == Format_BC7) + { + return new CompressorBC7; + } + /*else if (compressionOptions.format == Format_BC5_Luma) + { + return new ProductionCompressorBC5_Luma; + }*/ + else if (compressionOptions.format == Format_BC3_RGBM) + { + return new CompressorBC3_RGBM; + } + + return NULL; +} + + +CompressorInterface * Compressor::Private::chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const +{ + nvDebugCheck(cudaSupported); + + if (compressionOptions.quality == Quality_Fastest) + { + // Do not use CUDA compressors in fastest quality mode. + return NULL; + } + +#if defined HAVE_CUDA + if (compressionOptions.format == Format_DXT1) + { + return new CudaCompressorDXT1(*cuda); + } + else if (compressionOptions.format == Format_DXT1a) + { + //#pragma NV_MESSAGE("TODO: Implement CUDA DXT1a compressor.") + } + else if (compressionOptions.format == Format_DXT1n) + { + // Not supported. + } + else if (compressionOptions.format == Format_DXT3) + { + //return new CudaCompressorDXT3(*cuda); + } + else if (compressionOptions.format == Format_DXT5) + { + //return new CudaCompressorDXT5(*cuda); + } + else if (compressionOptions.format == Format_DXT5n) + { + // @@ Return CUDA compressor. + } + else if (compressionOptions.format == Format_BC4) + { + // Not supported. + } + else if (compressionOptions.format == Format_BC5) + { + // Not supported. + } + else if (compressionOptions.format == Format_CTX1) + { + // @@ Return CUDA compressor. + } + else if (compressionOptions.format == Format_BC6) + { + // Not supported. + } + else if (compressionOptions.format == Format_BC7) + { + // Not supported. + } +#endif // defined HAVE_CUDA + + return NULL; +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.h @@ -0,0 +1,110 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_CUBEIMAGE_H +#define NVTT_CUBEIMAGE_H + +#include "nvtt.h" +#include "Surface.h" + +#include "nvimage/FloatImage.h" + +#include "nvmath/Vector.h" + +#include "nvcore/RefCounted.h" +#include "nvcore/Ptr.h" +#include "nvcore/Array.h" + + +namespace nvtt +{ + struct TexelTable { + TexelTable(uint edgeLength); + + float solidAngle(uint f, uint x, uint y) const; + const nv::Vector3 & direction(uint f, uint x, uint y) const; + + uint size; + nv::Array solidAngleArray; + nv::Array directionArray; + }; + + + struct CubeSurface::Private : public nv::RefCounted + { + void operator=(const Private &); + public: + Private() + { + nvDebugCheck( refCount() == 0 ); + + edgeLength = 0; + texelTable = NULL; + } + Private(const Private & p) : RefCounted() // Copy ctor. inits refcount to 0. + { + nvDebugCheck( refCount() == 0 ); + + edgeLength = p.edgeLength; + for (uint i = 0; i < 6; i++) { + face[i] = p.face[i]; + } + texelTable = NULL; // @@ Transfer tables. Needs refcounting? + } + ~Private() + { + delete texelTable; + } + + void allocate(uint edgeLength) + { + this->edgeLength = edgeLength; + for (uint i = 0; i < 6; i++) { + face[i].detach(); + face[i].m->image = new nv::FloatImage; + face[i].m->image->allocate(4, edgeLength, edgeLength, 1); + } + } + + void allocateTexelTable() + { + if (texelTable == NULL) { + texelTable = new TexelTable(edgeLength); + } + } + + // Filtering helpers: + nv::Vector3 applyAngularFilter(const nv::Vector3 & dir, float coneAngle, float * filterTable, int tableSize); + nv::Vector3 applyCosinePowerFilter(const nv::Vector3 & dir, float coneAngle, float cosinePower); + + nv::Vector3 sample(const nv::Vector3 & dir); + + uint edgeLength; + Surface face[6]; + TexelTable * texelTable; + }; + +} // nvtt namespace + + +#endif // NVTT_CUBEIMAGE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.cpp @@ -0,0 +1,1042 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "CubeSurface.h" +#include "Surface.h" + +#include "nvimage/DirectDrawSurface.h" + +#include "nvmath/Vector.inl" + +#include "nvcore/Array.inl" +#include "nvcore/StrLib.h" + +using namespace nv; +using namespace nvtt; + + + +// Solid angle of an axis aligned quad from (0,0,1) to (x,y,1) +// See: http://www.fizzmoll11.com/thesis/ for a derivation of this formula. +static float areaElement(float x, float y) { + return atan2(x*y, sqrtf(x*x + y*y + 1)); +} + +// Solid angle of a hemicube texel. +static float solidAngleTerm(uint x, uint y, float inverseEdgeLength) { + // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center. + float u = (float(x) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; + float v = (float(y) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; + nvDebugCheck(u >= -1.0f && u <= 1.0f); + nvDebugCheck(v >= -1.0f && v <= 1.0f); + +#if 1 + // Exact solid angle: + float x0 = u - inverseEdgeLength; + float y0 = v - inverseEdgeLength; + float x1 = u + inverseEdgeLength; + float y1 = v + inverseEdgeLength; + float solidAngle = areaElement(x0, y0) - areaElement(x0, y1) - areaElement(x1, y0) + areaElement(x1, y1); + nvDebugCheck(solidAngle > 0.0f); + + return solidAngle; +#else + // This formula is equivalent, but not as precise. + float pixel_area = nv::square(2.0f * inverseEdgeLength); + float dist_square = 1.0f + nv::square(u) + nv::square(v); + float cos_theta = 1.0f / sqrt(dist_square); + float cos_theta_d2 = cos_theta / dist_square; // Funny this is just 1/dist^3 or cos(tetha)^3 + + return pixel_area * cos_theta_d2; +#endif +} + + +static Vector3 texelDirection(uint face, uint x, uint y, int edgeLength, EdgeFixup fixupMethod) +{ + float u, v; + if (fixupMethod == EdgeFixup_Stretch) { + // Transform x,y to [-1, 1] range, match up edges exactly. + u = float(x) * 2.0f / (edgeLength - 1) - 1.0f; + v = float(y) * 2.0f / (edgeLength - 1) - 1.0f; + } + else { + // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center. + u = (float(x) + 0.5f) * (2.0f / edgeLength) - 1.0f; + v = (float(y) + 0.5f) * (2.0f / edgeLength) - 1.0f; + } + + if (fixupMethod == EdgeFixup_Warp) { + // Warp texel centers in the proximity of the edges. + float a = powf(float(edgeLength), 2.0f) / powf(float(edgeLength - 1), 3.0f); + u = a * powf(u, 3) + u; + v = a * powf(v, 3) + v; + } + + nvDebugCheck(u >= -1.0f && u <= 1.0f); + nvDebugCheck(v >= -1.0f && v <= 1.0f); + + Vector3 n; + + if (face == 0) { + n.x = 1; + n.y = -v; + n.z = -u; + } + if (face == 1) { + n.x = -1; + n.y = -v; + n.z = u; + } + + if (face == 2) { + n.x = u; + n.y = 1; + n.z = v; + } + if (face == 3) { + n.x = u; + n.y = -1; + n.z = -v; + } + + if (face == 4) { + n.x = u; + n.y = -v; + n.z = 1; + } + if (face == 5) { + n.x = -u; + n.y = -v; + n.z = -1; + } + + return normalizeFast(n); +} + + +TexelTable::TexelTable(uint edgeLength) : size(edgeLength) { + + uint hsize = size/2; + + // Allocate a small solid angle table that takes into account cube map symmetry. + solidAngleArray.resize(hsize * hsize); + + for (uint y = 0; y < hsize; y++) { + for (uint x = 0; x < hsize; x++) { + solidAngleArray[y * hsize + x] = solidAngleTerm(hsize+x, hsize+y, 1.0f/edgeLength); + } + } + + + directionArray.resize(size*size*6); + + for (uint f = 0; f < 6; f++) { + for (uint y = 0; y < size; y++) { + for (uint x = 0; x < size; x++) { + directionArray[(f * size + y) * size + x] = texelDirection(f, x, y, edgeLength, EdgeFixup_None); + } + } + } +} + +const Vector3 & TexelTable::direction(uint f, uint x, uint y) const { + nvDebugCheck(f < 6 && x < size && y < size); + return directionArray[(f * size + y) * size + x]; +} + +float TexelTable::solidAngle(uint f, uint x, uint y) const { + uint hsize = size/2; + if (x >= hsize) x -= hsize; + else if (x < hsize) x = hsize - x - 1; + if (y >= hsize) y -= hsize; + else if (y < hsize) y = hsize - y - 1; + + return solidAngleArray[y * hsize + x]; +} + + +static const Vector3 faceNormals[6] = { + Vector3(1, 0, 0), + Vector3(-1, 0, 0), + Vector3(0, 1, 0), + Vector3(0, -1, 0), + Vector3(0, 0, 1), + Vector3(0, 0, -1), +}; + +static const Vector3 faceU[6] = { + Vector3(0, 0, -1), + Vector3(0, 0, 1), + Vector3(1, 0, 0), + Vector3(1, 0, 0), + Vector3(1, 0, 0), + Vector3(-1, 0, 0), +}; + +static const Vector3 faceV[6] = { + Vector3(0, -1, 0), + Vector3(0, -1, 0), + Vector3(0, 0, 1), + Vector3(0, 0, -1), + Vector3(0, -1, 0), + Vector3(0, -1, 0), +}; + + +static Vector2 toPolar(Vector3::Arg v) { + Vector2 p; + p.x = atan2(v.x, v.y); // theta + p.y = acosf(v.z); // phi + return p; +} + +static Vector2 toPlane(float theta, float phi) { + float x = sin(phi) * cos(theta); + float y = sin(phi) * sin(theta); + float z = cos(phi); + + Vector2 p; + p.x = x / fabs(z); + p.y = y / fabs(z); + //p.x = tan(phi) * cos(theta); + //p.y = tan(phi) * sin(theta); + + return p; +} + +static Vector2 toPlane(Vector3::Arg v) { + Vector2 p; + p.x = v.x / fabs(v.z); + p.y = v.y / fabs(v.z); + return p; +} + + + + + +CubeSurface::CubeSurface() : m(new CubeSurface::Private()) +{ + m->addRef(); +} + +CubeSurface::CubeSurface(const CubeSurface & cube) : m(cube.m) +{ + if (m != NULL) m->addRef(); +} + +CubeSurface::~CubeSurface() +{ + if (m != NULL) m->release(); + m = NULL; +} + +void CubeSurface::operator=(const CubeSurface & cube) +{ + if (cube.m != NULL) cube.m->addRef(); + if (m != NULL) m->release(); + m = cube.m; +} + +void CubeSurface::detach() +{ + if (m->refCount() > 1) + { + m->release(); + m = new CubeSurface::Private(*m); + m->addRef(); + nvDebugCheck(m->refCount() == 1); + } +} + + + +bool CubeSurface::isNull() const +{ + return m->edgeLength == 0; +} + +int CubeSurface::edgeLength() const +{ + return m->edgeLength; +} + +int CubeSurface::countMipmaps() const +{ + return nv::countMipmaps(m->edgeLength); +} + +Surface & CubeSurface::face(int f) +{ + nvDebugCheck(f >= 0 && f < 6); + return m->face[f]; +} + +const Surface & CubeSurface::face(int f) const +{ + nvDebugCheck(f >= 0 && f < 6); + return m->face[f]; +} + + +bool CubeSurface::load(const char * fileName, int mipmap) +{ + if (strEqual(Path::extension(fileName), ".dds")) { + nv::DirectDrawSurface dds(fileName); + + if (!dds.isValid()/* || !dds.isSupported()*/) { + return false; + } + + if (!dds.isTextureCube()) { + return false; + } + + // Make sure it's a valid cube. + if (dds.header.width != dds.header.height) return false; + //if ((dds.header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES) return false; + + if (mipmap < 0) { + mipmap = dds.mipmapCount() - 1 - mipmap; + } + if (mipmap < 0 || mipmap > I32(dds.mipmapCount())) return false; + + + nvtt::InputFormat inputFormat = nvtt::InputFormat_RGBA_16F; + + if (dds.header.hasDX10Header()) { + if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R16G16B16A16_FLOAT) inputFormat = nvtt::InputFormat_RGBA_16F; + else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32G32B32A32_FLOAT) inputFormat = nvtt::InputFormat_RGBA_32F; + else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32_FLOAT) inputFormat = nvtt::InputFormat_R_32F; + else return false; + } + else { + if ((dds.header.pf.flags & DDPF_FOURCC) != 0) { + if (dds.header.pf.fourcc == D3DFMT_A16B16G16R16F) inputFormat = nvtt::InputFormat_RGBA_16F; + else if (dds.header.pf.fourcc == D3DFMT_A32B32G32R32F) inputFormat = nvtt::InputFormat_RGBA_32F; + else if (dds.header.pf.fourcc == D3DFMT_R32F) inputFormat = nvtt::InputFormat_R_32F; + else return false; + } + else { + if (dds.header.pf.bitcount == 32 /*&& ...*/) inputFormat = nvtt::InputFormat_BGRA_8UB; + else return false; // @@ Do pixel format conversions! + } + } + + uint edgeLength = dds.surfaceWidth(mipmap); + uint size = dds.surfaceSize(mipmap); + + void * data = malloc(size); + + for (int f = 0; f < 6; f++) { + dds.readSurface(f, mipmap, data, size); + m->face[f].setImage(inputFormat, edgeLength, edgeLength, 1, data); + } + + m->edgeLength = edgeLength; + + free(data); + + return true; + } + + return false; +} + +bool CubeSurface::save(const char * fileName) const +{ + // @@ TODO + return false; +} + +struct ivec2 { + uint x; + uint y; +}; +// posx negx posy negy posz negz +static const ivec2 foldOffsetVerticalCross[6] = { {2, 1}, {0, 1}, {1, 0}, {1, 2}, {1, 1}, {1, 3} }; +static const ivec2 foldOffsetHorizontalCross[6] = { {2, 1}, {0, 1}, {1, 0}, {1, 2}, {1, 1}, {3, 1} }; +static const ivec2 foldOffsetColumn[6] = { {0, 0}, {0, 1}, {0, 2}, {0, 3}, {0, 4}, {0, 5} }; +static const ivec2 foldOffsetRow[6] = { {0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0} }; + +void CubeSurface::fold(const Surface & tex, CubeLayout layout) +{ + ivec2 const* offsets = 0; + uint edgeLength; + + switch(layout) { + case CubeLayout_LatitudeLongitude: + case CubeLayout_VerticalCross: + edgeLength = tex.height() / 4; + offsets = foldOffsetVerticalCross; + break; + case CubeLayout_HorizontalCross: + edgeLength = tex.width() / 4; + offsets = foldOffsetHorizontalCross; + break; + case CubeLayout_Column: + edgeLength = tex.width(); + offsets = foldOffsetColumn; + break; + case CubeLayout_Row: + edgeLength = tex.height(); + offsets = foldOffsetRow; + break; + } + + m->edgeLength = edgeLength; + for(uint f = 0; f < 6; f++) { + uint x = offsets[f].x * edgeLength; + uint y = offsets[f].y * edgeLength; + m->face[f] = tex.createSubImage(x, x + edgeLength - 1, y, y + edgeLength - 1, 0, 0); + } + + if(layout == CubeLayout_VerticalCross || layout == CubeLayout_LatitudeLongitude) { + // Back face needs to be rotated 180 degrees + m->face[5].flipX(); + m->face[5].flipY(); + } +} + +Surface CubeSurface::unfold(CubeLayout layout) const +{ + ivec2 const* offsets = 0; + uint edgeLength = m->edgeLength; + uint width; + uint height; + + switch(layout) { + case CubeLayout_LatitudeLongitude: + case CubeLayout_VerticalCross: + offsets = foldOffsetVerticalCross; + width = 3 * edgeLength; + height = 4 * edgeLength; + // Back face needs to be rotated 180 degrees + m->face[5].flipX(); + m->face[5].flipY(); + break; + case CubeLayout_HorizontalCross: + offsets = foldOffsetHorizontalCross; + width = 4 * edgeLength; + height = 3 * edgeLength; + break; + case CubeLayout_Column: + offsets = foldOffsetColumn; + width = edgeLength; + height = 6 * edgeLength; + break; + case CubeLayout_Row: + offsets = foldOffsetRow; + width = 6 * edgeLength; + height = edgeLength; + break; + } + + Surface surface; + surface.setImage(width, height, 1); + for(uint f = 0; f < 6; f++) { + uint x = offsets[f].x * edgeLength; + uint y = offsets[f].y * edgeLength; + surface.copy(m->face[f], 0, 0, 0, edgeLength, edgeLength, 1, x, y, 0); + } + + if(layout == CubeLayout_VerticalCross || layout == CubeLayout_LatitudeLongitude) { + // Undo back face rotation + m->face[5].flipY(); + m->face[5].flipX(); + } + return surface; +} + +float CubeSurface::average(int channel) const +{ + const uint edgeLength = m->edgeLength; + m->allocateTexelTable(); + + float total = 0.0f; + float sum = 0.0f; + + for (int f = 0; f < 6; f++) { + float * c = m->face[f].m->image->channel(channel); + + for (uint y = 0; y < edgeLength; y++) { + for (uint x = 0; x < edgeLength; x++) { + float solidAngle = m->texelTable->solidAngle(f, x, y); + + total += solidAngle; + sum += c[y * edgeLength + x] * solidAngle; + } + } + } + + return sum / total; +} + +void CubeSurface::range(int channel, float * minimum_ptr, float * maximum_ptr) const +{ + const uint edgeLength = m->edgeLength; + m->allocateTexelTable(); + + float minimum = NV_FLOAT_MAX; + float maximum = 0.0f; + + for (int f = 0; f < 6; f++) { + float * c = m->face[f].m->image->channel(channel); + + for (uint y = 0; y < edgeLength; y++) { + for (uint x = 0; x < edgeLength; x++) { + + minimum = nv::min(minimum, c[y * edgeLength + x]); + maximum = nv::max(maximum, c[y * edgeLength + x]); + } + } + } + + *minimum_ptr = minimum; + *maximum_ptr = maximum; +} + +void CubeSurface::clamp(int channel, float low/*= 0.0f*/, float high/*= 1.0f*/) { + for (int f = 0; f < 6; f++) { + m->face[f].clamp(channel, low, high); + } +} + + + +#include "nvmath/SphericalHarmonic.h" + +CubeSurface CubeSurface::irradianceFilter(int size, EdgeFixup fixupMethod) const +{ + m->allocateTexelTable(); + + // Transform this cube to spherical harmonic basis + Sh2 sh; + + // For each texel of the input cube. + const uint edgeLength = m->edgeLength; + for (uint f = 0; f < 6; f++) { + for (uint y = 0; y < edgeLength; y++) { + for (uint x = 0; x < edgeLength; x++) { + + Vector3 dir = m->texelTable->direction(f, x, y); + float solidAngle = m->texelTable->solidAngle(f, x, y); + + Sh2 shDir; + shDir.eval(dir); + + sh.addScaled(sh, solidAngle); + } + } + } + + + // Evaluate spherical harmonic for each output texel. + CubeSurface output; + output.m->allocate(size); + + + + + // @@ TODO + return CubeSurface(); +} + + + + +// Convolve filter against this cube. +Vector3 CubeSurface::Private::applyAngularFilter(const Vector3 & filterDir, float coneAngle, float * filterTable, int tableSize) +{ + const float cosineConeAngle = cos(coneAngle); + nvDebugCheck(cosineConeAngle >= 0); + + Vector3 color(0); + float sum = 0; + + // Things I have tried to speed this up: + // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere. + // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case. + // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex. + + // What AMD CubeMapGen does: + // - Compute conservative bounds on the primary face, wrap around the adjacent faces. + + + // For each texel of the input cube. + for (uint f = 0; f < 6; f++) { + + // Test face cone agains filter cone. + float cosineFaceAngle = dot(filterDir, faceNormals[f]); + float faceAngle = acosf(cosineFaceAngle); + + if (faceAngle > coneAngle + atanf(sqrtf(2))) { + // Skip face. + continue; + } + + const int L = I32(edgeLength-1); + int x0 = 0, x1 = L; + int y0 = 0, y1 = L; + +#if 0 + float u0 = -1; + float u1 = 1; + float v0 = -1; + float v1 = 1; + + // @@ Compute uvs. + + // Expand uv coordinates from [-1,1] to [0, edgeLength) + u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f; + v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f; + u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f; + v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f; + nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f); + nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f); + nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f); + nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f); + + x0 = clamp(ifloor(u0), 0, L); + y0 = clamp(ifloor(v0), 0, L); + x1 = clamp(iceil(u1), 0, L); + y1 = clamp(iceil(v1), 0, L); +#endif + + nvDebugCheck(x1 >= x0); + nvDebugCheck(y1 >= y0); + + if (x1 == x0 || y1 == y0) { + // Skip this face. + continue; + } + + + const Surface & inputFace = face[f]; + const FloatImage * inputImage = inputFace.m->image; + + for (int y = y0; y <= y1; y++) { + bool inside = false; + for (int x = x0; x <= x1; x++) { + + Vector3 dir = texelTable->direction(f, x, y); + float cosineAngle = dot(dir, filterDir); + + if (cosineAngle > cosineConeAngle) { + float solidAngle = texelTable->solidAngle(f, x, y); + //float scale = powf(saturate(cosineAngle), cosinePower); + + int idx = int(saturate(cosineAngle) * (tableSize - 1)); + float scale = filterTable[idx]; // @@ Do bilinear interpolation? + + float contribution = solidAngle * scale; + + sum += contribution; + color.x += contribution * inputImage->pixel(0, x, y, 0); + color.y += contribution * inputImage->pixel(1, x, y, 0); + color.z += contribution * inputImage->pixel(2, x, y, 0); + + inside = true; + } + else if (inside) { + // Filter scale is monotonic, if we have been inside once and we just exit, then we can skip the rest of the row. + // We could do the same thing for the columns and skip entire rows. + break; + } + } + } + } + + color *= (1.0f / sum); + + return color; +} + +// We want to find the alpha such that: +// cos(alpha)^cosinePower = epsilon +// That's: acos(epsilon^(1/cosinePower)) + +// We can cull texels in two different ways: +// - culling faces that do not touch the cone. +// - computing one rectangle per face, find intersection between cone and face. +// - + +// Other speedups: +// - parallelize. Done. +// - use ISPC? + + +// Convolve filter against this cube. +Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, float coneAngle, float cosinePower) +{ + const float cosineConeAngle = cos(coneAngle); + nvDebugCheck(cosineConeAngle >= 0); + + Vector3 color(0); + float sum = 0; + + // Things I have tried to speed this up: + // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere. + // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case. + // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex. + + // What AMD CubeMapGen does: + // - Compute conservative bounds on the primary face, wrap around the adjacent faces. + + + // For each texel of the input cube. + for (uint f = 0; f < 6; f++) { + + // Test face cone agains filter cone. + float cosineFaceAngle = dot(filterDir, faceNormals[f]); + float faceAngle = acosf(cosineFaceAngle); + + if (faceAngle > coneAngle + atanf(sqrtf(2))) { + // Skip face. + continue; + } + + const int L = I32(edgeLength-1); + int x0 = 0, x1 = L; + int y0 = 0, y1 = L; + +#if 0 + float u0 = -1; + float u1 = 1; + float v0 = -1; + float v1 = 1; + + // @@ Compute uvs. + + // Expand uv coordinates from [-1,1] to [0, edgeLength) + u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f; + v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f; + u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f; + v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f; + nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f); + nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f); + nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f); + nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f); + + x0 = clamp(ifloor(u0), 0, L); + y0 = clamp(ifloor(v0), 0, L); + x1 = clamp(iceil(u1), 0, L); + y1 = clamp(iceil(v1), 0, L); +#endif + + nvDebugCheck(x1 >= x0); + nvDebugCheck(y1 >= y0); + + if (x1 == x0 || y1 == y0) { + // Skip this face. + continue; + } + + + const Surface & inputFace = face[f]; + const FloatImage * inputImage = inputFace.m->image; + + for (int y = y0; y <= y1; y++) { + bool inside = false; + for (int x = x0; x <= x1; x++) { + + Vector3 dir = texelTable->direction(f, x, y); + float cosineAngle = dot(dir, filterDir); + + if (cosineAngle > cosineConeAngle) { + float solidAngle = texelTable->solidAngle(f, x, y); + float scale = powf(saturate(cosineAngle), cosinePower); + float contribution = solidAngle * scale; + + sum += contribution; + color.x += contribution * inputImage->pixel(0, x, y, 0); + color.y += contribution * inputImage->pixel(1, x, y, 0); + color.z += contribution * inputImage->pixel(2, x, y, 0); + + inside = true; + } + else if (inside) { + // Filter scale is monotonic, if we have been inside once and we just exit, then we can skip the rest of the row. + // We could do the same thing for the columns and skip entire rows. + break; + } + } + } + } + + color *= (1.0f / sum); + + return color; +} + +#include "nvthread/ParallelFor.h" + +struct ApplyAngularFilterContext { + CubeSurface::Private * inputCube; + CubeSurface::Private * filteredCube; + float coneAngle; + float * filterTable; + int tableSize; + EdgeFixup fixupMethod; +}; + +void ApplyAngularFilterTask(void * context, int id) +{ + ApplyAngularFilterContext * ctx = (ApplyAngularFilterContext *)context; + + int size = ctx->filteredCube->edgeLength; + + int f = id / (size * size); + int idx = id % (size * size); + int y = idx / size; + int x = idx % size; + + nvtt::Surface & filteredFace = ctx->filteredCube->face[f]; + FloatImage * filteredImage = filteredFace.m->image; + + const Vector3 filterDir = texelDirection(f, x, y, size, ctx->fixupMethod); + + // Convolve filter against cube. + Vector3 color = ctx->inputCube->applyAngularFilter(filterDir, ctx->coneAngle, ctx->filterTable, ctx->tableSize); + + filteredImage->pixel(0, idx) = color.x; + filteredImage->pixel(1, idx) = color.y; + filteredImage->pixel(2, idx) = color.z; +} + + +CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, EdgeFixup fixupMethod) const +{ + // Allocate output cube. + CubeSurface filteredCube; + filteredCube.m->allocate(size); + + // Texel table is stored along with the surface so that it's compute only once. + m->allocateTexelTable(); + + const float threshold = 0.001f; + const float coneAngle = acosf(powf(threshold, 1.0f/cosinePower)); + + + // For each texel of the output cube. + /*for (uint f = 0; f < 6; f++) { + nvtt::Surface filteredFace = filteredCube.m->face[f]; + FloatImage * filteredImage = filteredFace.m->image; + + for (uint y = 0; y < uint(size); y++) { + for (uint x = 0; x < uint(size); x++) { + + const Vector3 filterDir = texelDirection(f, x, y, size, fixupMethod); + + // Convolve filter against cube. + Vector3 color = m->applyCosinePowerFilter(filterDir, coneAngle, cosinePower); + + filteredImage->pixel(0, x, y, 0) = color.x; + filteredImage->pixel(1, x, y, 0) = color.y; + filteredImage->pixel(2, x, y, 0) = color.z; + } + } + }*/ + + ApplyAngularFilterContext context; + context.inputCube = m; + context.filteredCube = filteredCube.m; + context.coneAngle = coneAngle; + context.fixupMethod = fixupMethod; + + context.tableSize = 512; + context.filterTable = new float[context.tableSize]; + + // @@ Instead of looking up table between [0 - 1] we should probably use [cos(coneAngle), 1] + + for (int i = 0; i < context.tableSize; i++) { + float f = float(i) / (context.tableSize - 1); + context.filterTable[i] = powf(f, cosinePower); + } + + + nv::ParallelFor parallelFor(ApplyAngularFilterTask, &context); + parallelFor.run(6 * size * size); + + // @@ Implement edge averaging. + if (fixupMethod == EdgeFixup_Average) { + for (uint f = 0; f < 6; f++) { + nvtt::Surface filteredFace = filteredCube.m->face[f]; + FloatImage * filteredImage = filteredFace.m->image; + + // For each component. + for (uint c = 0; c < 3; c++) { + // @@ For each corner, sample the two adjacent faces. + filteredImage->pixel(c, 0, 0, 0); + filteredImage->pixel(c, size-1, 0, 0); + filteredImage->pixel(c, 0, size-1, 0); + filteredImage->pixel(c, size-1, size-1, 0); + + // @@ For each edge, sample the adjacent face. + + } + } + } + + return filteredCube; +} + + +// Sample cubemap in the given direction. +Vector3 CubeSurface::Private::sample(const Vector3 & dir) +{ + int f = -1; + if (fabs(dir.x) > fabs(dir.y) && fabs(dir.x) > fabs(dir.z)) { + if (dir.x > 0) f = 0; + else f = 1; + } + else if (fabs(dir.y) > fabs(dir.z)) { + if (dir.y > 0) f = 2; + else f = 3; + } + else { + if (dir.z > 0) f = 4; + else f = 5; + } + nvDebugCheck(f != -1); + + // uv coordinates corresponding to filterDir. + float u = dot(dir, faceU[f]); + float v = dot(dir, faceV[f]); + + FloatImage * img = face[f].m->image; + + Vector3 color; + color.x = img->sampleLinearClamp(0, u, v); + color.y = img->sampleLinearClamp(1, u, v); + color.z = img->sampleLinearClamp(2, u, v); + + return color; +} + +// @@ Not tested! +CubeSurface CubeSurface::fastResample(int size, EdgeFixup fixupMethod) const +{ + // Allocate output cube. + CubeSurface resampledCube; + resampledCube.m->allocate(size); + + // For each texel of the output cube. + for (uint f = 0; f < 6; f++) { + nvtt::Surface resampledFace = resampledCube.m->face[f]; + FloatImage * resampledImage = resampledFace.m->image; + + for (uint y = 0; y < uint(size); y++) { + for (uint x = 0; x < uint(size); x++) { + + const Vector3 filterDir = texelDirection(f, x, y, size, fixupMethod); + + Vector3 color = m->sample(filterDir); + + resampledImage->pixel(0, x, y, 0) = color.x; + resampledImage->pixel(1, x, y, 0) = color.y; + resampledImage->pixel(2, x, y, 0) = color.z; + } + } + } + + // @@ Implement edge averaging. Share this code with cosinePowerFilter + if (fixupMethod == EdgeFixup_Average) { + } + + return resampledCube; +} + + +void CubeSurface::toLinear(float gamma) +{ + if (isNull()) return; + + detach(); + + for (int i = 0; i < 6; i++) { + m->face[i].toLinear(gamma); + } +} + +void CubeSurface::toGamma(float gamma) +{ + if (isNull()) return; + + detach(); + + for (int i = 0; i < 6; i++) { + m->face[i].toGamma(gamma); + } +} + + +#if 0 +// @@ Provide solar azimuth. +#include "ArHoseSkyModel.h" +void CubeSurface::sky(float turbidity, float albedo[3], float solarElevation) { + + ArHosekSkyModelState * skymodel_state[3]; + + for (int i = 0; i < num_channels; i++) { + skymodel_state[i] = arhosekskymodelstate_alloc_init(turbidity, albedo[i], solarElevation); + } + + // 700 nm (red), 546.1 nm (green) and 435.8 nm (blue). + float channel_center[3] = { + 700, // Red 620–740, + 546.1, // Green 520–570, + 435.8, // Blue 450–490, + }; + + // @@ For each pixel: + // What's the channel center for the RGB model? + double skydome_result[3]; + for (unsigned int i = 0; i < num_channels; i++) { + skydome_result[i] = arhosekskymodel_radiance(skymodel_state[i], theta, gamma, channel_center[i]); + } + + for (int i = 0; i < num_channels; i++) { + arhosek_skymodelstate_free(skymodel_state[i]); + } + + /* + ArHosekXYZSkyModelState * skymodel_state[3]; + + for (int i = 0; i < num_channels; i++) { + skymodel_state[i] = arhosek_xyz_skymodelstate_alloc_init(turbidity, albedo[i], solarElevation); + } + + // @@ For each pixel. + double skydome_result[3]; + for (unsigned int i = 0; i < num_channels; i++) { + skydome_result[i] = arhosek_xyz_skymodel_radiance(skymodel_state[i], theta, gamma, i); + } + + for (int i = 0; i < num_channels; i++) { + arhosek_xyz_skymodelstate_free(skymodel_state[i]); + } + */ +} +#endif \ No newline at end of file Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/InputOptions.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/InputOptions.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/InputOptions.h @@ -1,4 +1,5 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation @@ -21,93 +22,61 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#ifndef NV_TT_INPUTOPTIONS_H -#define NV_TT_INPUTOPTIONS_H +#ifndef NVTT_INPUTOPTIONS_H +#define NVTT_INPUTOPTIONS_H -#include -#include -#include -#include #include "nvtt.h" +#include "nvmath/Vector.h" + + namespace nvtt { - struct InputOptions::Private - { - Private() : images(NULL) {} - - WrapMode wrapMode; - TextureType textureType; - InputFormat inputFormat; - AlphaMode alphaMode; - - uint faceCount; - uint mipmapCount; - uint imageCount; - - struct InputImage; - InputImage * images; - - // Gamma conversion. - float inputGamma; - float outputGamma; - - // Color transform. - ColorTransform colorTransform; - nv::Matrix linearTransform; - - // Mipmap generation options. - bool generateMipmaps; - int maxLevel; - MipmapFilter mipmapFilter; - - // Kaiser filter parameters. - float kaiserWidth; - float kaiserAlpha; - float kaiserStretch; - - // Normal map options. - bool isNormalMap; - bool normalizeMipmaps; - bool convertToNormalMap; - nv::Vector4 heightFactors; - nv::Vector4 bumpFrequencyScale; - - // Adjust extents. - uint maxExtent; - RoundMode roundMode; - - // @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere... - mutable uint targetWidth; - mutable uint targetHeight; - mutable uint targetDepth; - mutable uint targetMipmapCount; - - void computeTargetExtents() const; - - int realMipmapCount() const; - - const nv::Image * image(uint face, uint mipmap) const; - const nv::Image * image(uint idx) const; - - }; - - // Internal image structure. - struct InputOptions::Private::InputImage - { - InputImage() {} - - int mipLevel; - int face; - - int width; - int height; - int depth; - - nv::AutoPtr data; - }; + struct InputOptions::Private + { + Private() : images(NULL) {} + + WrapMode wrapMode; + TextureType textureType; + InputFormat inputFormat; + AlphaMode alphaMode; + + uint width; + uint height; + uint depth; + uint faceCount; + uint mipmapCount; + uint imageCount; + + void ** images; + + // Gamma conversion. + float inputGamma; + float outputGamma; + + // Mipmap generation options. + bool generateMipmaps; + int maxLevel; + MipmapFilter mipmapFilter; + + // Kaiser filter parameters. + float kaiserWidth; + float kaiserAlpha; + float kaiserStretch; + + // Normal map options. + bool isNormalMap; + bool normalizeMipmaps; + bool convertToNormalMap; + nv::Vector4 heightFactors; + nv::Vector4 bumpFrequencyScale; + + // Adjust extents. + uint maxExtent; + RoundMode roundMode; + }; } // nvtt namespace -#endif // NV_TT_INPUTOPTIONS_H +#endif // NVTT_INPUTOPTIONS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/InputOptions.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/InputOptions.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/InputOptions.cpp @@ -1,408 +1,342 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include // memcpy - -#include - -#include "nvtt.h" -#include "InputOptions.h" - -using namespace nv; -using namespace nvtt; - -namespace -{ - - static uint countMipmaps(int w, int h, int d) - { - uint mipmap = 0; - - while (w != 1 || h != 1 || d != 1) { - w = max(1, w / 2); - h = max(1, h / 2); - d = max(1, d / 2); - mipmap++; - } - - return mipmap + 1; - } - - // 1 -> 1, 2 -> 2, 3 -> 2, 4 -> 4, 5 -> 4, ... - static uint previousPowerOfTwo(const uint v) - { - return nextPowerOfTwo(v + 1) / 2; - } - - static uint nearestPowerOfTwo(const uint v) - { - const uint np2 = nextPowerOfTwo(v); - const uint pp2 = previousPowerOfTwo(v); - - if (np2 - v <= v - pp2) - { - return np2; - } - else - { - return pp2; - } - } - -} // namespace - - -/// Constructor. -InputOptions::InputOptions() : m(*new InputOptions::Private()) -{ - reset(); -} - -// Delete images. -InputOptions::~InputOptions() -{ - resetTextureLayout(); - - delete &m; -} - - -// Reset input options. -void InputOptions::reset() -{ - m.wrapMode = WrapMode_Mirror; - m.textureType = TextureType_2D; - m.inputFormat = InputFormat_BGRA_8UB; - - m.alphaMode = AlphaMode_None; - - m.inputGamma = 2.2f; - m.outputGamma = 2.2f; - - m.colorTransform = ColorTransform_None; - m.linearTransform = Matrix(identity); - - m.generateMipmaps = true; - m.maxLevel = -1; - m.mipmapFilter = MipmapFilter_Box; - - m.kaiserWidth = 3; - m.kaiserAlpha = 4.0f; - m.kaiserStretch = 1.0f; - - m.isNormalMap = false; - m.normalizeMipmaps = true; - m.convertToNormalMap = false; - m.heightFactors.set(0.0f, 0.0f, 0.0f, 1.0f); - m.bumpFrequencyScale = Vector4(1.0f, 0.5f, 0.25f, 0.125f) / (1.0f + 0.5f + 0.25f + 0.125f); - - m.maxExtent = 0; - m.roundMode = RoundMode_None; -} - - -// Setup the input image. -void InputOptions::setTextureLayout(TextureType type, int width, int height, int depth /*= 1*/) -{ - // Validate arguments. - nvCheck(width >= 0); - nvCheck(height >= 0); - nvCheck(depth >= 0); - - // Correct arguments. - if (width == 0) width = 1; - if (height == 0) height = 1; - if (depth == 0) depth = 1; - - // Delete previous images. - resetTextureLayout(); - - m.textureType = type; - - // Allocate images. - m.mipmapCount = countMipmaps(width, height, depth); - m.faceCount = (type == TextureType_Cube) ? 6 : 1; - m.imageCount = m.mipmapCount * m.faceCount; - - m.images = new Private::InputImage[m.imageCount]; - - for(uint f = 0; f < m.faceCount; f++) - { - uint w = width; - uint h = height; - uint d = depth; - - for (uint mipLevel = 0; mipLevel < m.mipmapCount; mipLevel++) - { - Private::InputImage & img = m.images[f * m.mipmapCount + mipLevel]; - img.width = w; - img.height = h; - img.depth = d; - img.mipLevel = mipLevel; - img.face = f; - - img.data = NULL; - - w = max(1U, w / 2); - h = max(1U, h / 2); - d = max(1U, d / 2); - } - } -} - - -void InputOptions::resetTextureLayout() -{ - if (m.images != NULL) - { - // Delete image array. - delete [] m.images; - m.images = NULL; - - m.faceCount = 0; - m.mipmapCount = 0; - m.imageCount = 0; - } -} - - -// Copies the data to our internal structures. -bool InputOptions::setMipmapData(const void * data, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/) -{ - nvCheck(depth == 1); - - const int idx = face * m.mipmapCount + mipLevel; - - if (m.images[idx].width != width || m.images[idx].height != height || m.images[idx].depth != depth || m.images[idx].mipLevel != mipLevel || m.images[idx].face != face) - { - // Invalid dimension or index. - return false; - } - - m.images[idx].data = new nv::Image(); - m.images[idx].data->allocate(width, height); - memcpy(m.images[idx].data->pixels(), data, width * height * 4); - - return true; -} - - -/// Describe the format of the input. -void InputOptions::setFormat(InputFormat format) -{ - m.inputFormat = format; -} - - -/// Set the way the input alpha channel is interpreted. -void InputOptions::setAlphaMode(AlphaMode alphaMode) -{ - m.alphaMode = alphaMode; -} - - -/// Set gamma settings. -void InputOptions::setGamma(float inputGamma, float outputGamma) -{ - m.inputGamma = inputGamma; - m.outputGamma = outputGamma; -} - - -/// Set texture wrappign mode. -void InputOptions::setWrapMode(WrapMode mode) -{ - m.wrapMode = mode; -} - - -/// Set mipmap filter. -void InputOptions::setMipmapFilter(MipmapFilter filter) -{ - m.mipmapFilter = filter; -} - -/// Set mipmap generation. -void InputOptions::setMipmapGeneration(bool enabled, int maxLevel/*= -1*/) -{ - m.generateMipmaps = enabled; - m.maxLevel = maxLevel; -} - -/// Set Kaiser filter parameters. -void InputOptions::setKaiserParameters(float width, float alpha, float stretch) -{ - m.kaiserWidth = width; - m.kaiserAlpha = alpha; - m.kaiserStretch = stretch; -} - -/// Indicate whether input is a normal map or not. -void InputOptions::setNormalMap(bool b) -{ - m.isNormalMap = b; -} - -/// Enable normal map conversion. -void InputOptions::setConvertToNormalMap(bool convert) -{ - m.convertToNormalMap = convert; -} - -/// Set height evaluation factors. -void InputOptions::setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale) -{ - // Do not normalize height factors. -// float total = redScale + greenScale + blueScale + alphaScale; - m.heightFactors = Vector4(redScale, greenScale, blueScale, alphaScale); -} - -/// Set normal map conversion filter. -void InputOptions::setNormalFilter(float small, float medium, float big, float large) -{ - float total = small + medium + big + large; - m.bumpFrequencyScale = Vector4(small, medium, big, large) / total; -} - -/// Enable mipmap normalization. -void InputOptions::setNormalizeMipmaps(bool normalize) -{ - m.normalizeMipmaps = normalize; -} - -/// Set color transform. -void InputOptions::setColorTransform(ColorTransform t) -{ - m.colorTransform = t; -} - -// Set linear transform for the given channel. -void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2, float w3) -{ - nvCheck(channel >= 0 && channel < 4); - - Vector4 w(w0, w1, w2, w3); - //m.linearTransform.setRow(channel, w); -} - -void InputOptions::setMaxExtents(int e) -{ - nvDebugCheck(e > 0); - m.maxExtent = e; -} - -void InputOptions::setRoundMode(RoundMode mode) -{ - m.roundMode = mode; -} - - -void InputOptions::Private::computeTargetExtents() const -{ - nvCheck(images != NULL); - - uint maxExtent = this->maxExtent; - if (roundMode != RoundMode_None) - { - // rounded max extent should never be higher than original max extent. - maxExtent = previousPowerOfTwo(maxExtent); - } - - uint w = images->width; - uint h = images->height; - uint d = images->depth; - - nvDebugCheck(w > 0); - nvDebugCheck(h > 0); - nvDebugCheck(d > 0); - - // Scale extents without changing aspect ratio. - uint maxwhd = max(max(w, h), d); - if (maxExtent != 0 && maxwhd > maxExtent) - { - w = max((w * maxExtent) / maxwhd, 1U); - h = max((h * maxExtent) / maxwhd, 1U); - d = max((d * maxExtent) / maxwhd, 1U); - } - - // Round to power of two. - if (roundMode == RoundMode_ToNextPowerOfTwo) - { - w = nextPowerOfTwo(w); - h = nextPowerOfTwo(h); - d = nextPowerOfTwo(d); - } - else if (roundMode == RoundMode_ToNearestPowerOfTwo) - { - w = nearestPowerOfTwo(w); - h = nearestPowerOfTwo(h); - d = nearestPowerOfTwo(d); - } - else if (roundMode == RoundMode_ToPreviousPowerOfTwo) - { - w = previousPowerOfTwo(w); - h = previousPowerOfTwo(h); - d = previousPowerOfTwo(d); - } - - this->targetWidth = w; - this->targetHeight = h; - this->targetDepth = d; - - this->targetMipmapCount = countMipmaps(w, h, d); -} - - -// Return real number of mipmaps, including first level. -// computeTargetExtents should have been called before. -int InputOptions::Private::realMipmapCount() const -{ - int mipmapCount = targetMipmapCount; - - if (!generateMipmaps) mipmapCount = 1; - else if (maxLevel != -1 && maxLevel < mipmapCount - 1) mipmapCount = maxLevel + 1; - - return mipmapCount; -} - - -const Image * InputOptions::Private::image(uint face, uint mipmap) const -{ - nvDebugCheck(face < faceCount); - nvDebugCheck(mipmap < mipmapCount); - - const InputImage & image = this->images[face * mipmapCount + mipmap]; - nvDebugCheck(image.face == face); - nvDebugCheck(image.mipLevel == mipmap); - - return image.data.ptr(); -} - -const Image * InputOptions::Private::image(uint idx) const -{ - nvDebugCheck(idx < faceCount * mipmapCount); - - const InputImage & image = this->images[idx]; - - return image.data.ptr(); -} +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "InputOptions.h" + +#include "nvmath/Vector.inl" + +#include "nvcore/Utils.h" // nextPowerOfTwo +#include "nvcore/Memory.h" + +#include // memcpy, memset + + + +using namespace nv; +using namespace nvtt; + +namespace +{ + + static uint countMipmaps(int w, int h, int d) + { + uint mipmap = 0; + + while (w != 1 || h != 1 || d != 1) { + w = max(1, w / 2); + h = max(1, h / 2); + d = max(1, d / 2); + mipmap++; + } + + return mipmap + 1; + } + + // 1 -> 1, 2 -> 2, 3 -> 2, 4 -> 4, 5 -> 4, ... + static uint previousPowerOfTwo(const uint v) + { + return nextPowerOfTwo(v + 1) / 2; + } + + static uint nearestPowerOfTwo(const uint v) + { + const uint np2 = nextPowerOfTwo(v); + const uint pp2 = previousPowerOfTwo(v); + + if (np2 - v <= v - pp2) + { + return np2; + } + else + { + return pp2; + } + } + +} // namespace + + +/// Constructor. +InputOptions::InputOptions() : m(*new InputOptions::Private()) +{ + reset(); +} + +// Delete images. +InputOptions::~InputOptions() +{ + resetTextureLayout(); + + delete &m; +} + + +// Reset input options. +void InputOptions::reset() +{ + m.wrapMode = WrapMode_Mirror; + m.textureType = TextureType_2D; + m.inputFormat = InputFormat_BGRA_8UB; + + m.alphaMode = AlphaMode_None; + + m.inputGamma = 2.2f; + m.outputGamma = 2.2f; + + m.generateMipmaps = true; + m.maxLevel = -1; + m.mipmapFilter = MipmapFilter_Box; + + m.kaiserWidth = 3; + m.kaiserAlpha = 4.0f; + m.kaiserStretch = 1.0f; + + m.isNormalMap = false; + m.normalizeMipmaps = true; + m.convertToNormalMap = false; + m.heightFactors.set(0.0f, 0.0f, 0.0f, 1.0f); + m.bumpFrequencyScale = Vector4(1.0f, 0.5f, 0.25f, 0.125f) / (1.0f + 0.5f + 0.25f + 0.125f); + + m.maxExtent = 0; + m.roundMode = RoundMode_None; +} + + +// Setup the input image. +void InputOptions::setTextureLayout(TextureType type, int width, int height, int depth /*= 1*/, int arraySize /*= 1*/) +{ + // Validate arguments. + nvCheck(width >= 0); + nvCheck(height >= 0); + nvCheck(depth >= 0); + nvCheck(arraySize >= 0); + + // Correct arguments. + if (width == 0) width = 1; + if (height == 0) height = 1; + if (depth == 0) depth = 1; + if (arraySize == 0) arraySize = 1; + + // Delete previous images. + resetTextureLayout(); + + m.textureType = type; + m.width = width; + m.height = height; + m.depth = depth; + + // Allocate images. + if (type == TextureType_Cube) { + nvCheck(arraySize == 1); + m.faceCount = 6; + } + else if (type == TextureType_Array) { + m.faceCount = arraySize; + } else { + nvCheck(arraySize == 1); + m.faceCount = 1; + } + m.mipmapCount = countMipmaps(width, height, depth); + m.imageCount = m.mipmapCount * m.faceCount; + m.images = new void *[m.imageCount]; + + memset(m.images, 0, sizeof(void *) * m.imageCount); +} + + +void InputOptions::resetTextureLayout() +{ + if (m.images != NULL) + { + // Delete images. + for (uint i = 0; i < m.imageCount; i++) { + free(m.images[i]); + } + + // Delete image array. + delete [] m.images; + m.images = NULL; + + m.faceCount = 0; + m.mipmapCount = 0; + m.imageCount = 0; + } +} + + +// Copies the data to our internal structures. +bool InputOptions::setMipmapData(const void * data, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/) +{ + if (uint(face) >= m.faceCount) { + return false; + } + if (uint(mipLevel) >= m.mipmapCount) { + return false; + } + + const uint idx = mipLevel * m.faceCount + face; + if (idx >= m.imageCount) { + return false; + } + + // Compute expected width, height and depth for this mipLevel. Return false if it doesn't match. + int w = m.width; + int h = m.height; + int d = m.depth; + for (int i = 0; i < mipLevel; i++) { + w = max(1, w/2); + h = max(1, h/2); + d = max(1, d/2); + } + if (w != width || h != height || d != depth) { + return false; + } + + int imageSize = width * height * depth; + if (m.inputFormat == InputFormat_BGRA_8UB) + { + imageSize *= 4 * sizeof(uint8); + } + else if (m.inputFormat == InputFormat_RGBA_16F) + { + imageSize *= 4 * sizeof(uint16); + } + else if (m.inputFormat == InputFormat_RGBA_32F) + { + imageSize *= 4 * sizeof(float); + } + else if (m.inputFormat == InputFormat_R_32F) + { + imageSize *= 1 * sizeof(float); + } + else + { + return false; + } + + m.images[idx] = realloc(m.images[idx], imageSize); + if (m.images[idx] == NULL) { + // Out of memory. + return false; + } + + memcpy(m.images[idx], data, imageSize); + + return true; +} + + +/// Describe the format of the input. +void InputOptions::setFormat(InputFormat format) +{ + m.inputFormat = format; +} + + +/// Set the way the input alpha channel is interpreted. +void InputOptions::setAlphaMode(AlphaMode alphaMode) +{ + m.alphaMode = alphaMode; +} + + +/// Set gamma settings. +void InputOptions::setGamma(float inputGamma, float outputGamma) +{ + m.inputGamma = inputGamma; + m.outputGamma = outputGamma; +} + + +/// Set texture wrappign mode. +void InputOptions::setWrapMode(WrapMode mode) +{ + m.wrapMode = mode; +} + + +/// Set mipmap filter. +void InputOptions::setMipmapFilter(MipmapFilter filter) +{ + m.mipmapFilter = filter; +} + +/// Set mipmap generation. +void InputOptions::setMipmapGeneration(bool enabled, int maxLevel/*= -1*/) +{ + m.generateMipmaps = enabled; + m.maxLevel = maxLevel; +} + +/// Set Kaiser filter parameters. +void InputOptions::setKaiserParameters(float width, float alpha, float stretch) +{ + m.kaiserWidth = width; + m.kaiserAlpha = alpha; + m.kaiserStretch = stretch; +} + +/// Indicate whether input is a normal map or not. +void InputOptions::setNormalMap(bool b) +{ + m.isNormalMap = b; +} + +/// Enable normal map conversion. +void InputOptions::setConvertToNormalMap(bool convert) +{ + m.convertToNormalMap = convert; +} + +/// Set height evaluation factors. +void InputOptions::setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale) +{ + // Do not normalize height factors. +// float total = redScale + greenScale + blueScale + alphaScale; + m.heightFactors = Vector4(redScale, greenScale, blueScale, alphaScale); +} + +/// Set normal map conversion filter. +void InputOptions::setNormalFilter(float small, float medium, float big, float large) +{ + float total = small + medium + big + large; + m.bumpFrequencyScale = Vector4(small, medium, big, large) / total; +} + +/// Enable mipmap normalization. +void InputOptions::setNormalizeMipmaps(bool normalize) +{ + m.normalizeMipmaps = normalize; +} + +void InputOptions::setMaxExtents(int e) +{ + nvDebugCheck(e > 0); + m.maxExtent = e; +} + +void InputOptions::setRoundMode(RoundMode mode) +{ + m.roundMode = mode; +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/OptimalCompressDXT.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/OptimalCompressDXT.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/OptimalCompressDXT.h @@ -1,49 +1,63 @@ -// Copyright NVIDIA Corporation 2008 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_OPTIMALCOMPRESSDXT_H -#define NV_TT_OPTIMALCOMPRESSDXT_H - -#include - -namespace nv -{ - struct ColorBlock; - struct BlockDXT1; - struct BlockDXT3; - struct BlockDXT5; - struct AlphaBlockDXT3; - struct AlphaBlockDXT5; - - namespace OptimalCompress - { - void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock); - void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock); - - void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block); - void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock); - void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock); - } -} // nv namespace - -#endif // NV_TT_OPTIMALCOMPRESSDXT_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NV_TT_OPTIMALCOMPRESSDXT_H +#define NV_TT_OPTIMALCOMPRESSDXT_H + +//#include "nvimage/nvimage.h" + +#include "nvmath/Color.h" + +namespace nv +{ + struct ColorSet; + struct ColorBlock; + struct BlockDXT1; + struct BlockDXT3; + struct BlockDXT5; + struct AlphaBlockDXT3; + struct AlphaBlockDXT5; + struct AlphaBlock4x4; + + namespace OptimalCompress + { + // Single color compressors: + void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock); + void compressDXT1a(Color32 rgba, uint alphaMask, BlockDXT1 * dxtBlock); + void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock); + + void compressDXT3A(const AlphaBlock4x4 & src, AlphaBlockDXT3 * dst); + void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst); + + void compressDXT1G(const ColorBlock & src, BlockDXT1 * dst); + void compressDXT3A(const ColorBlock & src, AlphaBlockDXT3 * dst); + void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst); + + void compressDXT1_Luma(const ColorBlock & src, BlockDXT1 * dst); + + void compressDXT5A_RGBM(const ColorSet & src, const ColorBlock & RGB, AlphaBlockDXT5 * dst); + } +} // nv namespace + +#endif // NV_TT_OPTIMALCOMPRESSDXT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/OptimalCompressDXT.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/OptimalCompressDXT.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/OptimalCompressDXT.cpp @@ -1,368 +1,812 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include // swap - -#include - -#include -#include - -#include "OptimalCompressDXT.h" -#include "SingleColorLookup.h" - - -using namespace nv; -using namespace OptimalCompress; - - - -namespace -{ - static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block) - { - nvDebugCheck(block != NULL); - - int palette[4]; - palette[0] = (block->col0.g << 2) | (block->col0.g >> 4); - palette[1] = (block->col1.g << 2) | (block->col1.g >> 4); - palette[2] = (2 * palette[0] + palette[1]) / 3; - palette[3] = (2 * palette[1] + palette[0]) / 3; - - int totalError = 0; - - for (int i = 0; i < 16; i++) - { - const int green = rgba.color(i).g; - - int error = abs(green - palette[0]); - error = min(error, abs(green - palette[1])); - error = min(error, abs(green - palette[2])); - error = min(error, abs(green - palette[3])); - - totalError += error; - } - - return totalError; - } - - static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4]) - { - const int color0 = palette[0].g; - const int color1 = palette[1].g; - const int color2 = palette[2].g; - const int color3 = palette[3].g; - - uint indices = 0; - for (int i = 0; i < 16; i++) - { - const int color = rgba.color(i).g; - - uint d0 = abs(color0 - color); - uint d1 = abs(color1 - color); - uint d2 = abs(color2 - color); - uint d3 = abs(color3 - color); - - uint b0 = d0 > d3; - uint b1 = d1 > d2; - uint b2 = d0 > d2; - uint b3 = d1 > d3; - uint b4 = d2 > d3; - - uint x0 = b1 & b2; - uint x1 = b0 & b3; - uint x2 = b0 & b4; - - indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); - } - - return indices; - } - - // Choose quantized color that produces less error. Used by DXT3 compressor. - inline static uint quantize4(uint8 a) - { - int q0 = (a >> 4) - 1; - int q1 = (a >> 4); - int q2 = (a >> 4) + 1; - - q0 = (q0 << 4) | q0; - q1 = (q1 << 4) | q1; - q2 = (q2 << 4) | q2; - - int d0 = abs(q0 - a); - int d1 = abs(q1 - a); - int d2 = abs(q2 - a); - - if (d0 < d1 && d0 < d2) return q0 >> 4; - if (d1 < d2) return q1 >> 4; - return q2 >> 4; - } - - static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block) - { - uint8 alphas[8]; - block->evaluatePalette(alphas); - - uint totalError = 0; - - for (uint i = 0; i < 16; i++) - { - uint8 alpha = rgba.color(i).a; - - uint besterror = 256*256; - uint best; - for (uint p = 0; p < 8; p++) - { - int d = alphas[p] - alpha; - uint error = d * d; - - if (error < besterror) - { - besterror = error; - best = p; - } - } - - totalError += besterror; - } - - return totalError; - } - - static void computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block) - { - uint8 alphas[8]; - block->evaluatePalette(alphas); - - for (uint i = 0; i < 16; i++) - { - uint8 alpha = rgba.color(i).a; - - uint besterror = 256*256; - uint best = 8; - for(uint p = 0; p < 8; p++) - { - int d = alphas[p] - alpha; - uint error = d * d; - - if (error < besterror) - { - besterror = error; - best = p; - } - } - nvDebugCheck(best < 8); - - block->setIndex(i, best); - } - } - -} // namespace - - - - - -// Single color compressor, based on: -// https://mollyrocket.com/forums/viewtopic.php?t=392 -void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock) -{ - dxtBlock->col0.r = OMatch5[c.r][0]; - dxtBlock->col0.g = OMatch6[c.g][0]; - dxtBlock->col0.b = OMatch5[c.b][0]; - dxtBlock->col1.r = OMatch5[c.r][1]; - dxtBlock->col1.g = OMatch6[c.g][1]; - dxtBlock->col1.b = OMatch5[c.b][1]; - dxtBlock->indices = 0xaaaaaaaa; - - if (dxtBlock->col0.u < dxtBlock->col1.u) - { - swap(dxtBlock->col0.u, dxtBlock->col1.u); - dxtBlock->indices ^= 0x55555555; - } -} - -void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock) -{ - if (rgba.a < 128) - { - dxtBlock->col0.u = 0; - dxtBlock->col1.u = 0; - dxtBlock->indices = 0xFFFFFFFF; - } - else - { - compressDXT1(rgba, dxtBlock); - } -} - - -// Brute force green channel compressor -void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block) -{ - nvDebugCheck(block != NULL); - - uint8 ming = 63; - uint8 maxg = 0; - - // Get min/max green. - for (uint i = 0; i < 16; i++) - { - uint8 green = rgba.color(i).g >> 2; - ming = min(ming, green); - maxg = max(maxg, green); - } - - block->col0.r = 31; - block->col1.r = 31; - block->col0.g = maxg; - block->col1.g = ming; - block->col0.b = 0; - block->col1.b = 0; - - if (maxg - ming > 4) - { - int besterror = computeGreenError(rgba, block); - int bestg0 = maxg; - int bestg1 = ming; - - for (int g0 = ming+5; g0 < maxg; g0++) - { - for (int g1 = ming; g1 < g0-4; g1++) - { - if ((maxg-g0) + (g1-ming) > besterror) - continue; - - block->col0.g = g0; - block->col1.g = g1; - int error = computeGreenError(rgba, block); - - if (error < besterror) - { - besterror = error; - bestg0 = g0; - bestg1 = g1; - } - } - } - - block->col0.g = bestg0; - block->col1.g = bestg1; - } - - Color32 palette[4]; - block->evaluatePalette(palette); - block->indices = computeGreenIndices(rgba, palette); -} - -void OptimalCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock) -{ - dxtBlock->alpha0 = quantize4(rgba.color(0).a); - dxtBlock->alpha1 = quantize4(rgba.color(1).a); - dxtBlock->alpha2 = quantize4(rgba.color(2).a); - dxtBlock->alpha3 = quantize4(rgba.color(3).a); - dxtBlock->alpha4 = quantize4(rgba.color(4).a); - dxtBlock->alpha5 = quantize4(rgba.color(5).a); - dxtBlock->alpha6 = quantize4(rgba.color(6).a); - dxtBlock->alpha7 = quantize4(rgba.color(7).a); - dxtBlock->alpha8 = quantize4(rgba.color(8).a); - dxtBlock->alpha9 = quantize4(rgba.color(9).a); - dxtBlock->alphaA = quantize4(rgba.color(10).a); - dxtBlock->alphaB = quantize4(rgba.color(11).a); - dxtBlock->alphaC = quantize4(rgba.color(12).a); - dxtBlock->alphaD = quantize4(rgba.color(13).a); - dxtBlock->alphaE = quantize4(rgba.color(14).a); - dxtBlock->alphaF = quantize4(rgba.color(15).a); -} - - -void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock) -{ - uint8 mina = 255; - uint8 maxa = 0; - - // Get min/max alpha. - for (uint i = 0; i < 16; i++) - { - uint8 alpha = rgba.color(i).a; - mina = min(mina, alpha); - maxa = max(maxa, alpha); - } - - dxtBlock->alpha0 = maxa; - dxtBlock->alpha1 = mina; - - /*int centroidDist = 256; - int centroid; - - // Get the closest to the centroid. - for (uint i = 0; i < 16; i++) - { - uint8 alpha = rgba.color(i).a; - int dist = abs(alpha - (maxa + mina) / 2); - if (dist < centroidDist) - { - centroidDist = dist; - centroid = alpha; - } - }*/ - - if (maxa - mina > 8) - { - int besterror = computeAlphaError(rgba, dxtBlock); - int besta0 = maxa; - int besta1 = mina; - - for (int a0 = mina+9; a0 < maxa; a0++) - { - for (int a1 = mina; a1 < a0-8; a1++) - //for (int a1 = mina; a1 < maxa; a1++) - { - //nvCheck(abs(a1-a0) > 8); - - //if (abs(a0 - a1) < 8) continue; - //if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror) - if ((maxa-a0) + (a1-mina) > besterror) - continue; - - dxtBlock->alpha0 = a0; - dxtBlock->alpha1 = a1; - int error = computeAlphaError(rgba, dxtBlock); - - if (error < besterror) - { - besterror = error; - besta0 = a0; - besta1 = a1; - } - } - } - - dxtBlock->alpha0 = besta0; - dxtBlock->alpha1 = besta1; - } - - computeAlphaIndices(rgba, dxtBlock); -} - +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "OptimalCompressDXT.h" +#include "SingleColorLookup.h" + +#include +#include + +#include + +#include // swap + +#include // INT_MAX +#include // FLT_MAX + +using namespace nv; +using namespace OptimalCompress; + + + +namespace +{ + static int greenDistance(int g0, int g1) + { + //return abs(g0 - g1); + int d = g0 - g1; + return d * d; + } + + static int alphaDistance(int a0, int a1) + { + //return abs(a0 - a1); + int d = a0 - a1; + return d * d; + } + + /*static uint nearestGreen4(uint green, uint maxGreen, uint minGreen) + { + uint bias = maxGreen + (maxGreen - minGreen) / 6; + + uint index = 0; + if (maxGreen - minGreen != 0) index = clamp(3 * (bias - green) / (maxGreen - minGreen), 0U, 3U); + + return (index * minGreen + (3 - index) * maxGreen) / 3; + }*/ + + static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block, int bestError = INT_MAX) + { + nvDebugCheck(block != NULL); + + // uint g0 = (block->col0.g << 2) | (block->col0.g >> 4); + // uint g1 = (block->col1.g << 2) | (block->col1.g >> 4); + + int palette[4]; + palette[0] = (block->col0.g << 2) | (block->col0.g >> 4); + palette[1] = (block->col1.g << 2) | (block->col1.g >> 4); + palette[2] = (2 * palette[0] + palette[1]) / 3; + palette[3] = (2 * palette[1] + palette[0]) / 3; + + int totalError = 0; + for (int i = 0; i < 16; i++) + { + const int green = rgba.color(i).g; + + int error = greenDistance(green, palette[0]); + error = min(error, greenDistance(green, palette[1])); + error = min(error, greenDistance(green, palette[2])); + error = min(error, greenDistance(green, palette[3])); + + totalError += error; + + // totalError += nearestGreen4(green, g0, g1); + + if (totalError > bestError) + { + // early out + return totalError; + } + } + + return totalError; + } + + static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4]) + { + const int color0 = palette[0].g; + const int color1 = palette[1].g; + const int color2 = palette[2].g; + const int color3 = palette[3].g; + + uint indices = 0; + for (int i = 0; i < 16; i++) + { + const int color = rgba.color(i).g; + + uint d0 = greenDistance(color0, color); + uint d1 = greenDistance(color1, color); + uint d2 = greenDistance(color2, color); + uint d3 = greenDistance(color3, color); + + uint b0 = d0 > d3; + uint b1 = d1 > d2; + uint b2 = d0 > d2; + uint b3 = d1 > d3; + uint b4 = d2 > d3; + + uint x0 = b1 & b2; + uint x1 = b0 & b3; + uint x2 = b0 & b4; + + indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); + } + + return indices; + } + + // Choose quantized color that produces less error. Used by DXT3 compressor. + inline static uint quantize4(uint8 a) + { + int q0 = max(int(a >> 4) - 1, 0); + int q1 = (a >> 4); + int q2 = min(int(a >> 4) + 1, 0xF); + + q0 = (q0 << 4) | q0; + q1 = (q1 << 4) | q1; + q2 = (q2 << 4) | q2; + + int d0 = alphaDistance(q0, a); + int d1 = alphaDistance(q1, a); + int d2 = alphaDistance(q2, a); + + if (d0 < d1 && d0 < d2) return q0 >> 4; + if (d1 < d2) return q1 >> 4; + return q2 >> 4; + } + + static uint nearestAlpha8(uint alpha, uint maxAlpha, uint minAlpha) + { + float bias = maxAlpha + float(maxAlpha - minAlpha) / (2.0f * 7.0f); + float scale = 7.0f / float(maxAlpha - minAlpha); + + uint index = (uint)clamp((bias - float(alpha)) * scale, 0.0f, 7.0f); + + return (index * minAlpha + (7 - index) * maxAlpha) / 7; + } + + /*static uint computeAlphaError8(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX) + { + int totalError = 0; + + for (uint i = 0; i < 16; i++) + { + uint8 alpha = rgba.color(i).a; + + totalError += alphaDistance(alpha, nearestAlpha8(alpha, block->alpha0, block->alpha1)); + + if (totalError > bestError) + { + // early out + return totalError; + } + } + + return totalError; + }*/ + + static float computeAlphaError(const AlphaBlock4x4 & src, const AlphaBlockDXT5 * dst, float bestError = FLT_MAX) + { + uint8 alphas[8]; + dst->evaluatePalette(alphas, false); // @@ Use target decoder. + + float totalError = 0; + + for (uint i = 0; i < 16; i++) + { + uint8 alpha = src.alpha[i]; + + int minDist = INT_MAX; + for (uint p = 0; p < 8; p++) + { + int dist = alphaDistance(alpha, alphas[p]); + minDist = min(dist, minDist); + } + + totalError += minDist * src.weights[i]; + + if (totalError > bestError) + { + // early out + return totalError; + } + } + + return totalError; + } + + static void computeAlphaIndices(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst) + { + uint8 alphas[8]; + dst->evaluatePalette(alphas, /*d3d9=*/false); // @@ Use target decoder. + + for (uint i = 0; i < 16; i++) + { + uint8 alpha = src.alpha[i]; + + int minDist = INT_MAX; + int bestIndex = 8; + for (uint p = 0; p < 8; p++) + { + int dist = alphaDistance(alpha, alphas[p]); + + if (dist < minDist) + { + minDist = dist; + bestIndex = p; + } + } + nvDebugCheck(bestIndex < 8); + + dst->setIndex(i, bestIndex); + } + } + +} // namespace + + + + + +// Single color compressor, based on: +// https://mollyrocket.com/forums/viewtopic.php?t=392 +void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock) +{ + dxtBlock->col0.r = OMatch5[c.r][0]; + dxtBlock->col0.g = OMatch6[c.g][0]; + dxtBlock->col0.b = OMatch5[c.b][0]; + dxtBlock->col1.r = OMatch5[c.r][1]; + dxtBlock->col1.g = OMatch6[c.g][1]; + dxtBlock->col1.b = OMatch5[c.b][1]; + dxtBlock->indices = 0xaaaaaaaa; + + if (dxtBlock->col0.u < dxtBlock->col1.u) + { + swap(dxtBlock->col0.u, dxtBlock->col1.u); + dxtBlock->indices ^= 0x55555555; + } +} + +void OptimalCompress::compressDXT1a(Color32 c, uint alphaMask, BlockDXT1 * dxtBlock) +{ + if (alphaMask == 0) { + compressDXT1(c, dxtBlock); + } + else { + dxtBlock->col0.r = OMatchAlpha5[c.r][0]; + dxtBlock->col0.g = OMatchAlpha6[c.g][0]; + dxtBlock->col0.b = OMatchAlpha5[c.b][0]; + dxtBlock->col1.r = OMatchAlpha5[c.r][1]; + dxtBlock->col1.g = OMatchAlpha6[c.g][1]; + dxtBlock->col1.b = OMatchAlpha5[c.b][1]; + dxtBlock->indices = 0xaaaaaaaa; // 0b1010..1010 + + if (dxtBlock->col0.u > dxtBlock->col1.u) + { + swap(dxtBlock->col0.u, dxtBlock->col1.u); + } + + dxtBlock->indices |= alphaMask; + } +} + +void OptimalCompress::compressDXT1G(uint8 g, BlockDXT1 * dxtBlock) +{ + dxtBlock->col0.r = 31; + dxtBlock->col0.g = OMatch6[g][0]; + dxtBlock->col0.b = 0; + dxtBlock->col1.r = 31; + dxtBlock->col1.g = OMatch6[g][1]; + dxtBlock->col1.b = 0; + dxtBlock->indices = 0xaaaaaaaa; + + if (dxtBlock->col0.u < dxtBlock->col1.u) + { + swap(dxtBlock->col0.u, dxtBlock->col1.u); + dxtBlock->indices ^= 0x55555555; + } +} + + +// Brute force green channel compressor +void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block) +{ + nvDebugCheck(block != NULL); + + uint8 ming = 63; + uint8 maxg = 0; + + bool isSingleColor = true; + uint8 singleColor = rgba.color(0).g; + + // Get min/max green. + for (uint i = 0; i < 16; i++) + { + uint8 green = (rgba.color(i).g + 1) >> 2; + ming = min(ming, green); + maxg = max(maxg, green); + + if (rgba.color(i).g != singleColor) isSingleColor = false; + } + + if (isSingleColor) + { + compressDXT1G(singleColor, block); + return; + } + + block->col0.r = 31; + block->col1.r = 31; + block->col0.g = maxg; + block->col1.g = ming; + block->col0.b = 0; + block->col1.b = 0; + + int bestError = computeGreenError(rgba, block); + int bestg0 = maxg; + int bestg1 = ming; + + // Expand search space a bit. + const int greenExpand = 4; + ming = (ming <= greenExpand) ? 0 : ming - greenExpand; + maxg = (maxg >= 63-greenExpand) ? 63 : maxg + greenExpand; + + for (int g0 = ming+1; g0 <= maxg; g0++) + { + for (int g1 = ming; g1 < g0; g1++) + { + block->col0.g = g0; + block->col1.g = g1; + int error = computeGreenError(rgba, block, bestError); + + if (error < bestError) + { + bestError = error; + bestg0 = g0; + bestg1 = g1; + } + } + } + + block->col0.g = bestg0; + block->col1.g = bestg1; + + nvDebugCheck(bestg0 == bestg1 || block->isFourColorMode()); + + + Color32 palette[4]; + block->evaluatePalette(palette, false); // @@ Use target decoder. + block->indices = computeGreenIndices(rgba, palette); +} + + +/*void OptimalCompress::initLumaTables() { + + // For all possible color pairs: + for (int c0 = 0; c0 < 65536; c0++) { + for (int c1 = 0; c1 < 65536; c1++) { + + // Compute + + } + } + + + for (int r = 0; r < 1<<5; r++) { + for (int g = 0; g < 1<<6; g++) { + for (int b = 0; b < 1<<5; b++) { + + + } + } + } +}*/ + + +// Brute force Luma compressor +void OptimalCompress::compressDXT1_Luma(const ColorBlock & rgba, BlockDXT1 * block) +{ + nvDebugCheck(block != NULL); + + // F_YR = 19595/65536.0f, F_YG = 38470/65536.0f, F_YB = 7471/65536.0f; + // 195841 + //if ( + + + /* + uint8 ming = 63; + uint8 maxg = 0; + + bool isSingleColor = true; + uint8 singleColor = rgba.color(0).g; + + // Get min/max green. + for (uint i = 0; i < 16; i++) + { + uint8 green = (rgba.color(i).g + 1) >> 2; + ming = min(ming, green); + maxg = max(maxg, green); + + if (rgba.color(i).g != singleColor) isSingleColor = false; + } + + if (isSingleColor) + { + compressDXT1G(singleColor, block); + return; + } + + block->col0.r = 31; + block->col1.r = 31; + block->col0.g = maxg; + block->col1.g = ming; + block->col0.b = 0; + block->col1.b = 0; + + int bestError = computeGreenError(rgba, block); + int bestg0 = maxg; + int bestg1 = ming; + + // Expand search space a bit. + const int greenExpand = 4; + ming = (ming <= greenExpand) ? 0 : ming - greenExpand; + maxg = (maxg >= 63-greenExpand) ? 63 : maxg + greenExpand; + + for (int g0 = ming+1; g0 <= maxg; g0++) + { + for (int g1 = ming; g1 < g0; g1++) + { + block->col0.g = g0; + block->col1.g = g1; + int error = computeGreenError(rgba, block, bestError); + + if (error < bestError) + { + bestError = error; + bestg0 = g0; + bestg1 = g1; + } + } + } + + block->col0.g = bestg0; + block->col1.g = bestg1; + + nvDebugCheck(bestg0 == bestg1 || block->isFourColorMode()); + */ + + Color32 palette[4]; + block->evaluatePalette(palette, false); // @@ Use target decoder. + block->indices = computeGreenIndices(rgba, palette); +} + + +void OptimalCompress::compressDXT3A(const AlphaBlock4x4 & src, AlphaBlockDXT3 * dst) +{ + dst->alpha0 = quantize4(src.alpha[0]); + dst->alpha1 = quantize4(src.alpha[1]); + dst->alpha2 = quantize4(src.alpha[2]); + dst->alpha3 = quantize4(src.alpha[3]); + dst->alpha4 = quantize4(src.alpha[4]); + dst->alpha5 = quantize4(src.alpha[5]); + dst->alpha6 = quantize4(src.alpha[6]); + dst->alpha7 = quantize4(src.alpha[7]); + dst->alpha8 = quantize4(src.alpha[8]); + dst->alpha9 = quantize4(src.alpha[9]); + dst->alphaA = quantize4(src.alpha[10]); + dst->alphaB = quantize4(src.alpha[11]); + dst->alphaC = quantize4(src.alpha[12]); + dst->alphaD = quantize4(src.alpha[13]); + dst->alphaE = quantize4(src.alpha[14]); + dst->alphaF = quantize4(src.alpha[15]); +} + +void OptimalCompress::compressDXT3A(const ColorBlock & src, AlphaBlockDXT3 * dst) +{ + AlphaBlock4x4 tmp; + tmp.init(src, 3); + compressDXT3A(tmp, dst); +} + +void OptimalCompress::compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst) +{ + uint8 mina = 255; + uint8 maxa = 0; + + uint8 mina_no01 = 255; + uint8 maxa_no01 = 0; + + // Get min/max alpha. + for (uint i = 0; i < 16; i++) + { + uint8 alpha = src.alpha[i]; + mina = min(mina, alpha); + maxa = max(maxa, alpha); + + if (alpha != 0 && alpha != 255) { + mina_no01 = min(mina_no01, alpha); + maxa_no01 = max(maxa_no01, alpha); + } + } + + if (maxa - mina < 8) { + dst->alpha0 = maxa; + dst->alpha1 = mina; + + nvDebugCheck(computeAlphaError(src, dst) == 0); + } + else if (maxa_no01 - mina_no01 < 6) { + dst->alpha0 = mina_no01; + dst->alpha1 = maxa_no01; + + nvDebugCheck(computeAlphaError(src, dst) == 0); + } + else { + float besterror = computeAlphaError(src, dst); + int besta0 = maxa; + int besta1 = mina; + + // Expand search space a bit. + const int alphaExpand = 8; + mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand; + maxa = (maxa >= 255-alphaExpand) ? 255 : maxa + alphaExpand; + + for (int a0 = mina+9; a0 < maxa; a0++) + { + for (int a1 = mina; a1 < a0-8; a1++) + { + nvDebugCheck(a0 - a1 > 8); + + dst->alpha0 = a0; + dst->alpha1 = a1; + float error = computeAlphaError(src, dst, besterror); + + if (error < besterror) + { + besterror = error; + besta0 = a0; + besta1 = a1; + } + } + } + + // Try using the 6 step encoding. + /*if (mina == 0 || maxa == 255)*/ { + + // Expand search space a bit. + const int alphaExpand = 6; + mina_no01 = (mina_no01 <= alphaExpand) ? 0 : mina_no01 - alphaExpand; + maxa_no01 = (maxa_no01 >= 255 - alphaExpand) ? 255 : maxa_no01 + alphaExpand; + + for (int a0 = mina_no01 + 9; a0 < maxa_no01; a0++) + { + for (int a1 = mina_no01; a1 < a0 - 8; a1++) + { + nvDebugCheck(a0 - a1 > 8); + + dst->alpha0 = a1; + dst->alpha1 = a0; + float error = computeAlphaError(src, dst, besterror); + + if (error < besterror) + { + besterror = error; + besta0 = a1; + besta1 = a0; + } + } + } + } + + dst->alpha0 = besta0; + dst->alpha1 = besta1; + } + + computeAlphaIndices(src, dst); +} + + +void OptimalCompress::compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst) +{ + AlphaBlock4x4 tmp; + tmp.init(src, 3); + compressDXT5A(tmp, dst); +} + +#if 0 +#include "nvmath/Vector.inl" +#include "nvmath/ftoi.h" +const float threshold = 0.15f; + +static float computeAlphaError_RGBM(const ColorSet & src, const ColorBlock & RGB, const AlphaBlockDXT5 * dst, float bestError = FLT_MAX) +{ + uint8 alphas[8]; + dst->evaluatePalette(alphas, /*d3d9=*/false); // @@ Use target decoder. + + float totalError = 0; + + for (uint i = 0; i < 16; i++) + { + float R = src.color(i).x; + float G = src.color(i).y; + float B = src.color(i).z; + + float r = float(RGB.color(i).r) / 255.0f; + float g = float(RGB.color(i).g) / 255.0f; + float b = float(RGB.color(i).b) / 255.0f; + + float minDist = FLT_MAX; + for (uint p = 0; p < 8; p++) + { + // Compute M. + float M = float(alphas[p]) / 255.0f * (1 - threshold) + threshold; + + // Decode color. + float fr = r * M; + float fg = g * M; + float fb = b * M; + + // Measure error. + float error = square(R - fr) + square(G - fg) + square(B - fb); + + minDist = min(error, minDist); + } + + totalError += minDist * src.weights[i]; + + if (totalError > bestError) + { + // early out + return totalError; + } + } + + return totalError; +} + +static void computeAlphaIndices_RGBM(const ColorSet & src, const ColorBlock & RGB, AlphaBlockDXT5 * dst) +{ + uint8 alphas[8]; + dst->evaluatePalette(alphas, /*d3d9=*/false); // @@ Use target decoder. + + for (uint i = 0; i < 16; i++) + { + float R = src.color(i).x; + float G = src.color(i).y; + float B = src.color(i).z; + + float r = float(RGB.color(i).r) / 255.0f; + float g = float(RGB.color(i).g) / 255.0f; + float b = float(RGB.color(i).b) / 255.0f; + + float minDist = FLT_MAX; + int bestIndex = 8; + for (uint p = 0; p < 8; p++) + { + // Compute M. + float M = float(alphas[p]) / 255.0f * (1 - threshold) + threshold; + + // Decode color. + float fr = r * M; + float fg = g * M; + float fb = b * M; + + // Measure error. + float error = square(R - fr) + square(G - fg) + square(B - fb); + + if (error < minDist) + { + minDist = error; + bestIndex = p; + } + } + nvDebugCheck(bestIndex < 8); + + dst->setIndex(i, bestIndex); + } +} + + +void OptimalCompress::compressDXT5A_RGBM(const ColorSet & src, const ColorBlock & RGB, AlphaBlockDXT5 * dst) +{ + uint8 mina = 255; + uint8 maxa = 0; + + uint8 mina_no01 = 255; + uint8 maxa_no01 = 0; + + // Get min/max alpha. + /*for (uint i = 0; i < 16; i++) + { + uint8 alpha = src.alpha[i]; + mina = min(mina, alpha); + maxa = max(maxa, alpha); + + if (alpha != 0 && alpha != 255) { + mina_no01 = min(mina_no01, alpha); + maxa_no01 = max(maxa_no01, alpha); + } + }*/ + mina = 0; + maxa = 255; + mina_no01 = 0; + maxa_no01 = 255; + + /*if (maxa - mina < 8) { + dst->alpha0 = maxa; + dst->alpha1 = mina; + + nvDebugCheck(computeAlphaError(src, dst) == 0); + } + else if (maxa_no01 - mina_no01 < 6) { + dst->alpha0 = mina_no01; + dst->alpha1 = maxa_no01; + + nvDebugCheck(computeAlphaError(src, dst) == 0); + } + else*/ + { + float besterror = computeAlphaError_RGBM(src, RGB, dst); + int besta0 = maxa; + int besta1 = mina; + + // Expand search space a bit. + const int alphaExpand = 8; + mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand; + maxa = (maxa >= 255 - alphaExpand) ? 255 : maxa + alphaExpand; + + for (int a0 = mina + 9; a0 < maxa; a0++) + { + for (int a1 = mina; a1 < a0 - 8; a1++) + { + nvDebugCheck(a0 - a1 > 8); + + dst->alpha0 = a0; + dst->alpha1 = a1; + float error = computeAlphaError_RGBM(src, RGB, dst, besterror); + + if (error < besterror) + { + besterror = error; + besta0 = a0; + besta1 = a1; + } + } + } + + // Try using the 6 step encoding. + /*if (mina == 0 || maxa == 255)*/ { + + // Expand search space a bit. + const int alphaExpand = 6; + mina_no01 = (mina_no01 <= alphaExpand) ? 0 : mina_no01 - alphaExpand; + maxa_no01 = (maxa_no01 >= 255 - alphaExpand) ? 255 : maxa_no01 + alphaExpand; + + for (int a0 = mina_no01 + 9; a0 < maxa_no01; a0++) + { + for (int a1 = mina_no01; a1 < a0 - 8; a1++) + { + nvDebugCheck(a0 - a1 > 8); + + dst->alpha0 = a1; + dst->alpha1 = a0; + float error = computeAlphaError_RGBM(src, RGB, dst, besterror); + + if (error < besterror) + { + besterror = error; + besta0 = a1; + besta1 = a0; + } + } + } + } + + dst->alpha0 = besta0; + dst->alpha1 = besta1; + } + + computeAlphaIndices_RGBM(src, RGB, dst); +} +#endif // 0 \ No newline at end of file Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/OutputOptions.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/OutputOptions.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/OutputOptions.h @@ -1,76 +1,95 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_OUTPUTOPTIONS_H -#define NV_TT_OUTPUTOPTIONS_H - -#include -#include -#include "nvtt.h" - -namespace nvtt -{ - - struct DefaultOutputHandler : public nvtt::OutputHandler - { - DefaultOutputHandler(const char * fileName) : stream(fileName) {} - - virtual ~DefaultOutputHandler() - { - } - - virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) - { - // ignore. - } - - // Output data. - virtual bool writeData(const void * data, int size) - { - stream.serialize(const_cast(data), size); - - //return !stream.isError(); - return true; - } - - nv::StdOutputStream stream; - }; - - - struct OutputOptions::Private - { - nv::Path fileName; - - mutable OutputHandler * outputHandler; - ErrorHandler * errorHandler; - bool outputHeader; - - bool openFile() const; - void closeFile() const; - }; - - -} // nvtt namespace - - -#endif // NV_TT_OUTPUTOPTIONS_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NV_TT_OUTPUTOPTIONS_H +#define NV_TT_OUTPUTOPTIONS_H + +#include "nvtt.h" + +#include "nvcore/StrLib.h" // Path +#include "nvcore/StdStream.h" + + +namespace nvtt +{ + + struct DefaultOutputHandler : public nvtt::OutputHandler + { + DefaultOutputHandler(const char * fileName) : stream(fileName) {} + DefaultOutputHandler(FILE * fp) : stream(fp, false) {} + + virtual ~DefaultOutputHandler() {} + + virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) + { + // ignore. + } + + // Output data. + virtual bool writeData(const void * data, int size) + { + stream.serialize(const_cast(data), size); + + //return !stream.isError(); + return true; + } + + virtual void endImage() + { + // ignore. + } + + nv::StdOutputStream stream; + }; + + + struct OutputOptions::Private + { + nv::Path fileName; + FILE * fileHandle; + + OutputHandler * outputHandler; + ErrorHandler * errorHandler; + + bool outputHeader; + Container container; + int version; + bool srgb; + bool deleteOutputHandler; + + void * wrapperProxy; // For the C/C# wrapper. + + bool hasValidOutputHandler() const; + + void beginImage(int size, int width, int height, int depth, int face, int miplevel) const; + bool writeData(const void * data, int size) const; + void endImage() const; + void error(Error e) const; + }; + + +} // nvtt namespace + + +#endif // NV_TT_OUTPUTOPTIONS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/OutputOptions.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/OutputOptions.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/OutputOptions.cpp @@ -1,102 +1,177 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "OutputOptions.h" - -using namespace nvtt; - - -OutputOptions::OutputOptions() : m(*new OutputOptions::Private()) -{ - reset(); -} - -OutputOptions::~OutputOptions() -{ - delete &m; -} - -/// Set default output options. -void OutputOptions::reset() -{ - m.fileName.reset(); - m.outputHandler = NULL; - m.errorHandler = NULL; - m.outputHeader = true; -} - - -/// Set output file name. -void OutputOptions::setFileName(const char * fileName) -{ - m.fileName = fileName; - m.outputHandler = NULL; -} - -/// Set output handler. -void OutputOptions::setOutputHandler(OutputHandler * outputHandler) -{ - m.fileName.reset(); - m.outputHandler = outputHandler; -} - -/// Set error handler. -void OutputOptions::setErrorHandler(ErrorHandler * errorHandler) -{ - m.errorHandler = errorHandler; -} - -/// Set output header. -void OutputOptions::setOutputHeader(bool outputHeader) -{ - m.outputHeader = outputHeader; -} - - -bool OutputOptions::Private::openFile() const -{ - if (!fileName.isNull()) - { - nvCheck(outputHandler == NULL); - - DefaultOutputHandler * oh = new DefaultOutputHandler(fileName.str()); - if (oh->stream.isError()) - { - return false; - } - - outputHandler = oh; - } - - return true; -} - -void OutputOptions::Private::closeFile() const -{ - if (!fileName.isNull()) - { - delete outputHandler; - outputHandler = NULL; - } -} - +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "OutputOptions.h" + +using namespace nvtt; + + +OutputOptions::OutputOptions() : m(*new OutputOptions::Private()) +{ + reset(); +} + +OutputOptions::~OutputOptions() +{ + // Cleanup output handler. + setOutputHandler(NULL); + + delete &m; +} + +/// Set default output options. +void OutputOptions::reset() +{ + m.fileName.reset(); + m.fileHandle = NULL; + + m.outputHandler = NULL; + m.errorHandler = NULL; + + m.outputHeader = true; + m.container = Container_DDS; + m.version = 0; + m.srgb = false; + m.deleteOutputHandler = false; +} + + +/// Set output file name. +void OutputOptions::setFileName(const char * fileName) +{ + if (m.deleteOutputHandler) + { + delete m.outputHandler; + } + + m.fileName = fileName; + m.fileHandle = NULL; + m.outputHandler = NULL; + m.deleteOutputHandler = false; + + DefaultOutputHandler * oh = new DefaultOutputHandler(fileName); + if (oh->stream.isError()) { + delete oh; + } + else { + m.deleteOutputHandler = true; + m.outputHandler = oh; + } +} + +/// Set output file handle. +void OutputOptions::setFileHandle(void * fp) +{ + if (m.deleteOutputHandler) { + delete m.outputHandler; + } + + m.fileName.reset(); + m.fileHandle = (FILE *)fp; + m.outputHandler = NULL; + m.deleteOutputHandler = false; + + DefaultOutputHandler * oh = new DefaultOutputHandler(m.fileHandle); + if (oh->stream.isError()) { + delete oh; + } + else { + m.deleteOutputHandler = true; + m.outputHandler = oh; + } +} + + +/// Set output handler. +void OutputOptions::setOutputHandler(OutputHandler * outputHandler) +{ + if (m.deleteOutputHandler) { + delete m.outputHandler; + } + + m.fileName.reset(); + m.fileHandle = NULL; + m.outputHandler = outputHandler; + m.deleteOutputHandler = false; +} + +/// Set error handler. +void OutputOptions::setErrorHandler(ErrorHandler * errorHandler) +{ + m.errorHandler = errorHandler; +} + +/// Set output header. +void OutputOptions::setOutputHeader(bool outputHeader) +{ + m.outputHeader = outputHeader; +} + +/// Set container. +void OutputOptions::setContainer(Container container) +{ + m.container = container; +} + +/// Set user version. +void OutputOptions::setUserVersion(int version) +{ + m.version = version; +} + +/// Set SRGB flag. +void OutputOptions::setSrgbFlag(bool b) +{ + m.srgb = b; +} + +bool OutputOptions::Private::hasValidOutputHandler() const +{ + if (!fileName.isNull() || fileHandle != NULL) + { + return outputHandler != NULL; + } + + return true; +} + +void OutputOptions::Private::beginImage(int size, int width, int height, int depth, int face, int miplevel) const +{ + if (outputHandler != NULL) outputHandler->beginImage(size, width, height, depth, face, miplevel); +} + +bool OutputOptions::Private::writeData(const void * data, int size) const +{ + return outputHandler == NULL || outputHandler->writeData(data, size); +} + +void OutputOptions::Private::endImage() const +{ + if (outputHandler != NULL) outputHandler->endImage(); +} + +void OutputOptions::Private::error(Error e) const +{ + if (errorHandler != NULL) errorHandler->error(e); +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/QuickCompressDXT.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/QuickCompressDXT.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/QuickCompressDXT.h @@ -1,50 +1,59 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_QUICKCOMPRESSDXT_H -#define NV_TT_QUICKCOMPRESSDXT_H - -#include - -namespace nv -{ - struct ColorBlock; - struct BlockDXT1; - struct BlockDXT3; - struct BlockDXT5; - struct AlphaBlockDXT3; - struct AlphaBlockDXT5; - - namespace QuickCompress - { - void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock); - void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock); - - void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock); - - void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8); - void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8); - } -} // nv namespace - -#endif // NV_TT_QUICKCOMPRESSDXT_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NV_TT_QUICKCOMPRESSDXT_H +#define NV_TT_QUICKCOMPRESSDXT_H + +#include + +namespace nv +{ + struct ColorBlock; + struct ColorSet; + struct AlphaBlock4x4; + struct BlockDXT1; + struct BlockDXT3; + struct BlockDXT5; + struct AlphaBlockDXT3; + struct AlphaBlockDXT5; + class Vector3; + + namespace QuickCompress + { + void compressDXT1(const ColorBlock & src, BlockDXT1 * dst); + void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst); + + void compressDXT3(const ColorBlock & src, BlockDXT3 * dst); + + void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8); + void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8); + + void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8); + + void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); + void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); + } +} // nv namespace + +#endif // NV_TT_QUICKCOMPRESSDXT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/QuickCompressDXT.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/QuickCompressDXT.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/QuickCompressDXT.cpp @@ -1,585 +1,870 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include - -#include -#include - -#include "QuickCompressDXT.h" -#include "OptimalCompressDXT.h" - - -using namespace nv; -using namespace QuickCompress; - - - -inline static void extractColorBlockRGB(const ColorBlock & rgba, Vector3 block[16]) -{ - for (int i = 0; i < 16; i++) - { - const Color32 c = rgba.color(i); - block[i] = Vector3(c.r, c.g, c.b); - } -} - -inline static uint extractColorBlockRGBA(const ColorBlock & rgba, Vector3 block[16]) -{ - int num = 0; - - for (int i = 0; i < 16; i++) - { - const Color32 c = rgba.color(i); - if (c.a > 127) - { - block[num++] = Vector3(c.r, c.g, c.b); - } - } - - return num; -} - - -// find minimum and maximum colors based on bounding box in color space -inline static void findMinMaxColorsBox(const Vector3 * block, uint num, Vector3 * restrict maxColor, Vector3 * restrict minColor) -{ - *maxColor = Vector3(0, 0, 0); - *minColor = Vector3(255, 255, 255); - - for (uint i = 0; i < num; i++) - { - *maxColor = max(*maxColor, block[i]); - *minColor = min(*minColor, block[i]); - } -} - - -inline static void selectDiagonal(const Vector3 * block, uint num, Vector3 * restrict maxColor, Vector3 * restrict minColor) -{ - Vector3 center = (*maxColor + *minColor) * 0.5; - - Vector2 covariance = Vector2(zero); - for (uint i = 0; i < num; i++) - { - Vector3 t = block[i] - center; - covariance += t.xy() * t.z(); - } - - float x0 = maxColor->x(); - float y0 = maxColor->y(); - float x1 = minColor->x(); - float y1 = minColor->y(); - - if (covariance.x() < 0) { - swap(x0, x1); - } - if (covariance.y() < 0) { - swap(y0, y1); - } - - maxColor->set(x0, y0, maxColor->z()); - minColor->set(x1, y1, minColor->z()); -} - -inline static void insetBBox(Vector3 * restrict maxColor, Vector3 * restrict minColor) -{ - Vector3 inset = (*maxColor - *minColor) / 16.0f - (8.0f / 255.0f) / 16.0f; - *maxColor = clamp(*maxColor - inset, 0.0f, 255.0f); - *minColor = clamp(*minColor + inset, 0.0f, 255.0f); -} - -inline static uint16 roundAndExpand(Vector3 * restrict v) -{ - uint r = uint(clamp(v->x() * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f); - uint g = uint(clamp(v->y() * (63.0f / 255.0f), 0.0f, 63.0f) + 0.5f); - uint b = uint(clamp(v->z() * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f); - - uint16 w = (r << 11) | (g << 5) | b; - - r = (r << 3) | (r >> 2); - g = (g << 2) | (g >> 4); - b = (b << 3) | (b >> 2); - *v = Vector3(float(r), float(g), float(b)); - - return w; -} - -inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1) -{ - return dot(c0-c1, c0-c1); -} - -inline static uint computeIndices4(Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) -{ - Vector3 palette[4]; - palette[0] = maxColor; - palette[1] = minColor; - palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f); - palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f); - - uint indices = 0; - for(int i = 0; i < 16; i++) - { - float d0 = colorDistance(palette[0], block[i]); - float d1 = colorDistance(palette[1], block[i]); - float d2 = colorDistance(palette[2], block[i]); - float d3 = colorDistance(palette[3], block[i]); - - uint b0 = d0 > d3; - uint b1 = d1 > d2; - uint b2 = d0 > d2; - uint b3 = d1 > d3; - uint b4 = d2 > d3; - - uint x0 = b1 & b2; - uint x1 = b0 & b3; - uint x2 = b0 & b4; - - indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); - } - - return indices; -} - -inline static uint computeIndices3(const ColorBlock & rgba, Vector3::Arg maxColor, Vector3::Arg minColor) -{ - Vector3 palette[4]; - palette[0] = minColor; - palette[1] = maxColor; - palette[2] = (palette[0] + palette[1]) * 0.5f; - - uint indices = 0; - for(int i = 0; i < 16; i++) - { - Color32 c = rgba.color(i); - Vector3 color = Vector3(c.r, c.g, c.b); - - float d0 = colorDistance(palette[0], color); - float d1 = colorDistance(palette[1], color); - float d2 = colorDistance(palette[2], color); - - uint index; - if (c.a < 128) index = 3; - else if (d0 < d1 && d0 < d2) index = 0; - else if (d1 < d2) index = 1; - else index = 2; - - indices |= index << (2 * i); - } - - return indices; -} - - -static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) -{ - float alpha2_sum = 0.0f; - float beta2_sum = 0.0f; - float alphabeta_sum = 0.0f; - Vector3 alphax_sum(zero); - Vector3 betax_sum(zero); - - for( int i = 0; i < 16; ++i ) - { - const uint bits = dxtBlock->indices >> (2 * i); - - float beta = float(bits & 1); - if (bits & 2) beta = (1 + beta) / 3.0f; - float alpha = 1.0f - beta; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * block[i]; - betax_sum += beta * block[i]; - } - - float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; - if (equal(denom, 0.0f)) return; - - float factor = 1.0f / denom; - - Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - a = clamp(a, 0, 255); - b = clamp(b, 0, 255); - - uint16 color0 = roundAndExpand(&a); - uint16 color1 = roundAndExpand(&b); - - if (color0 < color1) - { - swap(a, b); - swap(color0, color1); - } - - dxtBlock->col0 = Color16(color0); - dxtBlock->col1 = Color16(color1); - dxtBlock->indices = computeIndices4(block, a, b); -} - -/*static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock) -{ - float alpha2_sum = 0.0f; - float beta2_sum = 0.0f; - float alphabeta_sum = 0.0f; - Vector3 alphax_sum(zero); - Vector3 betax_sum(zero); - - for( int i = 0; i < 16; ++i ) - { - const uint bits = dxtBlock->indices >> (2 * i); - - float beta = (bits & 1); - if (bits & 2) beta = 0.5f; - float alpha = 1.0f - beta; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * block[i]; - betax_sum += beta * block[i]; - } - - float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; - if (equal(denom, 0.0f)) return; - - float factor = 1.0f / denom; - - Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - a = clamp(a, 0, 255); - b = clamp(b, 0, 255); - - uint16 color0 = roundAndExpand(&a); - uint16 color1 = roundAndExpand(&b); - - if (color0 < color1) - { - swap(a, b); - swap(color0, color1); - } - - dxtBlock->col0 = Color16(color1); - dxtBlock->col1 = Color16(color0); - dxtBlock->indices = computeIndices3(block, a, b); -}*/ - -namespace -{ - - static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block) - { - uint8 alphas[8]; - block->evaluatePalette(alphas); - - uint totalError = 0; - - for (uint i = 0; i < 16; i++) - { - uint8 alpha = rgba.color(i).a; - - uint besterror = 256*256; - uint best = 8; - for(uint p = 0; p < 8; p++) - { - int d = alphas[p] - alpha; - uint error = d * d; - - if (error < besterror) - { - besterror = error; - best = p; - } - } - nvDebugCheck(best < 8); - - totalError += besterror; - block->setIndex(i, best); - } - - return totalError; - } - - static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block) - { - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; - - for (int i = 0; i < 16; i++) - { - uint idx = block->index(i); - float alpha; - if (idx < 2) alpha = 1.0f - idx; - else alpha = (8.0f - idx) / 7.0f; - - float beta = 1 - alpha; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * rgba.color(i).a; - betax_sum += beta * rgba.color(i).a; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); - uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); - - if (alpha0 < alpha1) - { - swap(alpha0, alpha1); - - // Flip indices: - for (int i = 0; i < 16; i++) - { - uint idx = block->index(i); - if (idx < 2) block->setIndex(i, 1 - idx); - else block->setIndex(i, 9 - idx); - } - } - else if (alpha0 == alpha1) - { - for (int i = 0; i < 16; i++) - { - block->setIndex(i, 0); - } - } - - block->alpha0 = alpha0; - block->alpha1 = alpha1; - } - - /* - static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block) - { - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; - - for (int i = 0; i < 16; i++) - { - uint8 x = rgba.color(i).a; - if (x == 0 || x == 255) continue; - - uint bits = block->index(i); - if (bits == 6 || bits == 7) continue; - - float alpha; - if (bits == 0) alpha = 1.0f; - else if (bits == 1) alpha = 0.0f; - else alpha = (6.0f - block->index(i)) / 5.0f; - - float beta = 1 - alpha; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * x; - betax_sum += beta * x; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); - uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); - - if (alpha0 > alpha1) - { - swap(alpha0, alpha1); - } - - block->alpha0 = alpha0; - block->alpha1 = alpha1; - } - */ - - static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1) - { - const uint64 mask = ~uint64(0xFFFF); - return (block0.u | mask) == (block1.u | mask); - } - -} // namespace - - - -void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) -{ - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), dxtBlock); - } - else - { - // read block - Vector3 block[16]; - extractColorBlockRGB(rgba, block); - - // find min and max colors - Vector3 maxColor, minColor; - findMinMaxColorsBox(block, 16, &maxColor, &minColor); - - selectDiagonal(block, 16, &maxColor, &minColor); - - insetBBox(&maxColor, &minColor); - - uint16 color0 = roundAndExpand(&maxColor); - uint16 color1 = roundAndExpand(&minColor); - - if (color0 < color1) - { - swap(maxColor, minColor); - swap(color0, color1); - } - - dxtBlock->col0 = Color16(color0); - dxtBlock->col1 = Color16(color1); - dxtBlock->indices = computeIndices4(block, maxColor, minColor); - - optimizeEndPoints4(block, dxtBlock); - } -} - - -void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock) -{ - bool hasAlpha = false; - - for (uint i = 0; i < 16; i++) - { - if (rgba.color(i).a < 128) { - hasAlpha = true; - break; - } - } - - if (!hasAlpha) - { - compressDXT1(rgba, dxtBlock); - } - // @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode. - //else if (rgba.isSingleColorNoAlpha()) { ... } - else - { - // read block - Vector3 block[16]; - uint num = extractColorBlockRGBA(rgba, block); - - // find min and max colors - Vector3 maxColor, minColor; - findMinMaxColorsBox(block, num, &maxColor, &minColor); - - selectDiagonal(block, num, &maxColor, &minColor); - - insetBBox(&maxColor, &minColor); - - uint16 color0 = roundAndExpand(&maxColor); - uint16 color1 = roundAndExpand(&minColor); - - if (color0 < color1) - { - swap(maxColor, minColor); - swap(color0, color1); - } - - dxtBlock->col0 = Color16(color1); - dxtBlock->col1 = Color16(color0); - dxtBlock->indices = computeIndices3(rgba, maxColor, minColor); - - // optimizeEndPoints(block, dxtBlock); - } -} - - -void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock) -{ - compressDXT1(rgba, &dxtBlock->color); - OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha); -} - - -void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/) -{ - uint8 alpha0 = 0; - uint8 alpha1 = 255; - - // Get min/max alpha. - for (uint i = 0; i < 16; i++) - { - uint8 alpha = rgba.color(i).a; - alpha0 = max(alpha0, alpha); - alpha1 = min(alpha1, alpha); - } - - AlphaBlockDXT5 block; - block.alpha0 = alpha0 - (alpha0 - alpha1) / 34; - block.alpha1 = alpha1 + (alpha0 - alpha1) / 34; - uint besterror = computeAlphaIndices(rgba, &block); - - AlphaBlockDXT5 bestblock = block; - - for (int i = 0; i < iterationCount; i++) - { - optimizeAlpha8(rgba, &block); - uint error = computeAlphaIndices(rgba, &block); - - if (error >= besterror) - { - // No improvement, stop. - break; - } - if (sameIndices(block, bestblock)) - { - bestblock = block; - break; - } - - besterror = error; - bestblock = block; - }; - - // Copy best block to result; - *dxtBlock = bestblock; -} - -void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/) -{ - compressDXT1(rgba, &dxtBlock->color); - compressDXT5A(rgba, &dxtBlock->alpha, iterationCount); -} +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "QuickCompressDXT.h" +#include "OptimalCompressDXT.h" + +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" + +#include "nvmath/Color.inl" +#include "nvmath/Vector.inl" +#include "nvmath/Fitting.h" + +#include "nvcore/Utils.h" // swap + +#include // memset +#include // FLT_MAX + +using namespace nv; +using namespace QuickCompress; + + + +inline static void extractColorBlockRGB(const ColorBlock & rgba, Vector3 block[16]) +{ + for (int i = 0; i < 16; i++) + { + const Color32 c = rgba.color(i); + block[i] = Vector3(c.r, c.g, c.b); + } +} + +inline static uint extractColorBlockRGBA(const ColorBlock & rgba, Vector3 block[16]) +{ + int num = 0; + + for (int i = 0; i < 16; i++) + { + const Color32 c = rgba.color(i); + if (c.a > 127) + { + block[num++] = Vector3(c.r, c.g, c.b); + } + } + + return num; +} + + +// find minimum and maximum colors based on bounding box in color space +inline static void findMinMaxColorsBox(const Vector3 * block, uint num, Vector3 * restrict maxColor, Vector3 * restrict minColor) +{ + *maxColor = Vector3(0, 0, 0); + *minColor = Vector3(255, 255, 255); + + for (uint i = 0; i < num; i++) + { + *maxColor = max(*maxColor, block[i]); + *minColor = min(*minColor, block[i]); + } +} + + +inline static void selectDiagonal(const Vector3 * block, uint num, Vector3 * restrict maxColor, Vector3 * restrict minColor) +{ + Vector3 center = (*maxColor + *minColor) * 0.5f; + + Vector2 covariance = Vector2(0.0f); + for (uint i = 0; i < num; i++) + { + Vector3 t = block[i] - center; + covariance += t.xy() * t.z; + } + + float x0 = maxColor->x; + float y0 = maxColor->y; + float x1 = minColor->x; + float y1 = minColor->y; + + if (covariance.x < 0) { + swap(x0, x1); + } + if (covariance.y < 0) { + swap(y0, y1); + } + + maxColor->set(x0, y0, maxColor->z); + minColor->set(x1, y1, minColor->z); +} + +inline static void insetBBox(Vector3 * restrict maxColor, Vector3 * restrict minColor) +{ + Vector3 inset = (*maxColor - *minColor) / 16.0f - (8.0f / 255.0f) / 16.0f; + *maxColor = clamp(*maxColor - inset, 0.0f, 255.0f); + *minColor = clamp(*minColor + inset, 0.0f, 255.0f); +} + +#include "nvmath/ftoi.h" + +// Takes a normalized color in [0, 255] range and returns +inline static uint16 roundAndExpand(Vector3 * restrict v) +{ + uint r = ftoi_floor(clamp(v->x * (31.0f / 255.0f), 0.0f, 31.0f)); + uint g = ftoi_floor(clamp(v->y * (63.0f / 255.0f), 0.0f, 63.0f)); + uint b = ftoi_floor(clamp(v->z * (31.0f / 255.0f), 0.0f, 31.0f)); + + float r0 = float(((r+0) << 3) | ((r+0) >> 2)); + float r1 = float(((r+1) << 3) | ((r+1) >> 2)); + if (fabs(v->x - r1) < fabs(v->x - r0)) r = min(r+1, 31U); + + float g0 = float(((g+0) << 2) | ((g+0) >> 4)); + float g1 = float(((g+1) << 2) | ((g+1) >> 4)); + if (fabs(v->y - g1) < fabs(v->y - g0)) g = min(g+1, 63U); + + float b0 = float(((b+0) << 3) | ((b+0) >> 2)); + float b1 = float(((b+1) << 3) | ((b+1) >> 2)); + if (fabs(v->z - b1) < fabs(v->z - b0)) b = min(b+1, 31U); + + + uint16 w = (r << 11) | (g << 5) | b; + + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + *v = Vector3(float(r), float(g), float(b)); + + return w; +} + +// Takes a normalized color in [0, 255] range and returns +inline static uint16 roundAndExpand01(Vector3 * restrict v) +{ + uint r = ftoi_floor(clamp(v->x * 31.0f, 0.0f, 31.0f)); + uint g = ftoi_floor(clamp(v->y * 63.0f, 0.0f, 63.0f)); + uint b = ftoi_floor(clamp(v->z * 31.0f, 0.0f, 31.0f)); + + float r0 = float(((r+0) << 3) | ((r+0) >> 2)); + float r1 = float(((r+1) << 3) | ((r+1) >> 2)); + if (fabs(v->x - r1) < fabs(v->x - r0)) r = min(r+1, 31U); + + float g0 = float(((g+0) << 2) | ((g+0) >> 4)); + float g1 = float(((g+1) << 2) | ((g+1) >> 4)); + if (fabs(v->y - g1) < fabs(v->y - g0)) g = min(g+1, 63U); + + float b0 = float(((b+0) << 3) | ((b+0) >> 2)); + float b1 = float(((b+1) << 3) | ((b+1) >> 2)); + if (fabs(v->z - b1) < fabs(v->z - b0)) b = min(b+1, 31U); + + + uint16 w = (r << 11) | (g << 5) | b; + + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + *v = Vector3(float(r) / 255.0f, float(g) / 255.0f, float(b) / 255.0f); + + return w; +} + + + +inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1) +{ + return dot(c0-c1, c0-c1); +} + +Vector3 round255(const Vector3 & v) { + //return Vector3(ftoi_round(255 * v.x), ftoi_round(255 * v.y), ftoi_round(255 * v.z)) * (1.0f / 255); + //return Vector3(floorf(v.x + 0.5f), floorf(v.y + 0.5f), floorf(v.z + 0.5f)); + return v; +} + + +inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) +{ + Vector3 palette[4]; + palette[0] = maxColor; + palette[1] = minColor; + //palette[2] = round255((2 * palette[0] + palette[1]) / 3.0f); + //palette[3] = round255((2 * palette[1] + palette[0]) / 3.0f); + palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f); + palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f); + + uint indices = 0; + for(int i = 0; i < 16; i++) + { + float d0 = colorDistance(palette[0], block[i]); + float d1 = colorDistance(palette[1], block[i]); + float d2 = colorDistance(palette[2], block[i]); + float d3 = colorDistance(palette[3], block[i]); + + uint b0 = d0 > d3; + uint b1 = d1 > d2; + uint b2 = d0 > d2; + uint b3 = d1 > d3; + uint b4 = d2 > d3; + + uint x0 = b1 & b2; + uint x1 = b0 & b3; + uint x2 = b0 & b4; + + indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); + } + + return indices; +} + +// maxColor and minColor are expected to be in the same range as the color set. +/* +inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor) +{ + Vector3 palette[4]; + palette[0] = maxColor; + palette[1] = minColor; + palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f); + palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f); + + Vector3 mem[(4+2)*2]; + memset(mem, 0, sizeof(mem)); + + Vector3 * row0 = mem; + Vector3 * row1 = mem + (4+2); + + uint indices = 0; + //for(int i = 0; i < 16; i++) + for (uint y = 0; y < 4; y++) { + for (uint x = 0; x < 4; x++) { + int i = y*4+x; + + if (!set.isValidIndex(i)) { + // Skip masked pixels and out of bounds. + continue; + } + + Vector3 color = set.color(i).xyz(); + + // Add error. + color += row0[1+x]; + + float d0 = colorDistance(palette[0], color); + float d1 = colorDistance(palette[1], color); + float d2 = colorDistance(palette[2], color); + float d3 = colorDistance(palette[3], color); + + uint b0 = d0 > d3; + uint b1 = d1 > d2; + uint b2 = d0 > d2; + uint b3 = d1 > d3; + uint b4 = d2 > d3; + + uint x0 = b1 & b2; + uint x1 = b0 & b3; + uint x2 = b0 & b4; + + int index = x2 | ((x0 | x1) << 1); + indices |= index << (2 * i); + + // Compute new error. + Vector3 diff = color - palette[index]; + + // Propagate new error. + //row0[1+x+1] += 7.0f / 16.0f * diff; + //row1[1+x-1] += 3.0f / 16.0f * diff; + //row1[1+x+0] += 5.0f / 16.0f * diff; + //row1[1+x+1] += 1.0f / 16.0f * diff; + } + + swap(row0, row1); + memset(row1, 0, sizeof(Vector3) * (4+2)); + } + + return indices; +}*/ + +inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) +{ + Vector3 palette[4]; + palette[0] = maxColor; + palette[1] = minColor; + //palette[2] = round255((2 * palette[0] + palette[1]) / 3.0f); + //palette[3] = round255((2 * palette[1] + palette[0]) / 3.0f); + palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f); + palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f); + + float total = 0.0f; + for (int i = 0; i < 16; i++) + { + float d0 = colorDistance(palette[0], block[i]); + float d1 = colorDistance(palette[1], block[i]); + float d2 = colorDistance(palette[2], block[i]); + float d3 = colorDistance(palette[3], block[i]); + + total += min(min(d0, d1), min(d2, d3)); + } + + return total; +} + +inline static float evaluatePaletteError3(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) +{ + Vector3 palette[4]; + palette[0] = minColor; + palette[1] = maxColor; + palette[2] = (palette[0] + palette[1]) * 0.5f; + palette[3] = Vector3(0); + + float total = 0.0f; + for (int i = 0; i < 16; i++) + { + float d0 = colorDistance(palette[0], block[i]); + float d1 = colorDistance(palette[1], block[i]); + float d2 = colorDistance(palette[2], block[i]); + //float d3 = colorDistance(palette[3], block[i]); + + //total += min(min(d0, d1), min(d2, d3)); + total += min(min(d0, d1), d2); + } + + return total; +} + + +// maxColor and minColor are expected to be in the same range as the color set. +/*inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor) +{ + Vector3 palette[4]; + palette[0] = minColor; + palette[1] = maxColor; + palette[2] = (palette[0] + palette[1]) * 0.5f; + + uint indices = 0; + for(int i = 0; i < 16; i++) + { + if (!set.isValidIndex(i)) { + // Skip masked pixels and out of bounds. + indices |= 3 << (2 * i); + continue; + } + + Vector3 color = set.color(i).xyz(); + + float d0 = colorDistance(palette[0], color); + float d1 = colorDistance(palette[1], color); + float d2 = colorDistance(palette[2], color); + + uint index; + if (d0 < d1 && d0 < d2) index = 0; + else if (d1 < d2) index = 1; + else index = 2; + + indices |= index << (2 * i); + } + + return indices; +}*/ + +inline static uint computeIndices3(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor) +{ + Vector3 palette[4]; + palette[0] = minColor; + palette[1] = maxColor; + palette[2] = (palette[0] + palette[1]) * 0.5f; + + uint indices = 0; + for(int i = 0; i < 16; i++) + { + float d0 = colorDistance(palette[0], block[i]); + float d1 = colorDistance(palette[1], block[i]); + float d2 = colorDistance(palette[2], block[i]); + + uint index; + if (d0 < d1 && d0 < d2) index = 0; + else if (d1 < d2) index = 1; + else index = 2; + + indices |= index << (2 * i); + } + + return indices; +} + + + + +static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock) +{ + float alpha2_sum = 0.0f; + float beta2_sum = 0.0f; + float alphabeta_sum = 0.0f; + Vector3 alphax_sum(0.0f); + Vector3 betax_sum(0.0f); + + for( int i = 0; i < 16; ++i ) + { + const uint bits = dxtBlock->indices >> (2 * i); + + float beta = float(bits & 1); + if (bits & 2) beta = (1 + beta) / 3.0f; + float alpha = 1.0f - beta; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * block[i]; + betax_sum += beta * block[i]; + } + + float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; + if (equal(denom, 0.0f)) return; + + float factor = 1.0f / denom; + + Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + a = clamp(a, 0, 255); + b = clamp(b, 0, 255); + + uint16 color0 = roundAndExpand(&a); + uint16 color1 = roundAndExpand(&b); + + if (color0 < color1) + { + swap(a, b); + swap(color0, color1); + } + + dxtBlock->col0 = Color16(color0); + dxtBlock->col1 = Color16(color1); + dxtBlock->indices = computeIndices4(block, a, b); +} + +static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock) +{ + float alpha2_sum = 0.0f; + float beta2_sum = 0.0f; + float alphabeta_sum = 0.0f; + Vector3 alphax_sum(0.0f); + Vector3 betax_sum(0.0f); + + for( int i = 0; i < 16; ++i ) + { + const uint bits = dxtBlock->indices >> (2 * i); + + float beta = float(bits & 1); + if (bits & 2) beta = 0.5f; + float alpha = 1.0f - beta; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * block[i]; + betax_sum += beta * block[i]; + } + + float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; + if (equal(denom, 0.0f)) return; + + float factor = 1.0f / denom; + + Vector3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + Vector3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + a = clamp(a, 0, 255); + b = clamp(b, 0, 255); + + uint16 color0 = roundAndExpand(&a); + uint16 color1 = roundAndExpand(&b); + + if (color0 < color1) + { + swap(a, b); + swap(color0, color1); + } + + dxtBlock->col0 = Color16(color1); + dxtBlock->col1 = Color16(color0); + dxtBlock->indices = computeIndices3(block, a, b); +} + +namespace +{ + + static uint computeAlphaIndices(const AlphaBlock4x4 & src, AlphaBlockDXT5 * block) + { + uint8 alphas[8]; + block->evaluatePalette(alphas, false); // @@ Use target decoder. + + uint totalError = 0; + + for (uint i = 0; i < 16; i++) + { + uint8 alpha = src.alpha[i]; + + uint besterror = 256*256; + uint best = 8; + for(uint p = 0; p < 8; p++) + { + int d = alphas[p] - alpha; + uint error = d * d; + + if (error < besterror) + { + besterror = error; + best = p; + } + } + nvDebugCheck(best < 8); + + totalError += besterror; + block->setIndex(i, best); + } + + return totalError; + } + + static void optimizeAlpha8(const AlphaBlock4x4 & src, AlphaBlockDXT5 * block) + { + float alpha2_sum = 0; + float beta2_sum = 0; + float alphabeta_sum = 0; + float alphax_sum = 0; + float betax_sum = 0; + + for (int i = 0; i < 16; i++) + { + uint idx = block->index(i); + float alpha; + if (idx < 2) alpha = 1.0f - idx; + else alpha = (8.0f - idx) / 7.0f; + + float beta = 1 - alpha; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * src.alpha[i]; + betax_sum += beta * src.alpha[i]; + } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); + uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); + + if (alpha0 < alpha1) + { + swap(alpha0, alpha1); + + // Flip indices: + for (int i = 0; i < 16; i++) + { + uint idx = block->index(i); + if (idx < 2) block->setIndex(i, 1 - idx); + else block->setIndex(i, 9 - idx); + } + } + else if (alpha0 == alpha1) + { + for (int i = 0; i < 16; i++) + { + block->setIndex(i, 0); + } + } + + block->alpha0 = alpha0; + block->alpha1 = alpha1; + } + + /* + static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block) + { + float alpha2_sum = 0; + float beta2_sum = 0; + float alphabeta_sum = 0; + float alphax_sum = 0; + float betax_sum = 0; + + for (int i = 0; i < 16; i++) + { + uint8 x = rgba.color(i).a; + if (x == 0 || x == 255) continue; + + uint bits = block->index(i); + if (bits == 6 || bits == 7) continue; + + float alpha; + if (bits == 0) alpha = 1.0f; + else if (bits == 1) alpha = 0.0f; + else alpha = (6.0f - block->index(i)) / 5.0f; + + float beta = 1 - alpha; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * x; + betax_sum += beta * x; + } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + uint alpha0 = uint(min(max(a, 0.0f), 255.0f)); + uint alpha1 = uint(min(max(b, 0.0f), 255.0f)); + + if (alpha0 > alpha1) + { + swap(alpha0, alpha1); + } + + block->alpha0 = alpha0; + block->alpha1 = alpha1; + } + */ + + static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1) + { + const uint64 mask = ~uint64(0xFFFF); + return (block0.u | mask) == (block1.u | mask); + } + +} // namespace + + + +void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) +{ + if (rgba.isSingleColor()) + { + OptimalCompress::compressDXT1(rgba.color(0), dxtBlock); + } + else + { + // read block + Vector3 block[16]; + extractColorBlockRGB(rgba, block); + +#if 1 + // find min and max colors + Vector3 maxColor, minColor; + findMinMaxColorsBox(block, 16, &maxColor, &minColor); + + selectDiagonal(block, 16, &maxColor, &minColor); + + insetBBox(&maxColor, &minColor); +#else + float weights[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + Vector3 cluster[4]; + int count = Compute4Means(16, block, weights, Vector3(1, 1, 1), cluster); + + Vector3 maxColor, minColor; + float bestError = FLT_MAX; + + for (int i = 1; i < 4; i++) + { + for (int j = 0; j < i; j++) + { + uint16 color0 = roundAndExpand(&cluster[i]); + uint16 color1 = roundAndExpand(&cluster[j]); + + float error = evaluatePaletteError4(block, cluster[i], cluster[j]); + if (error < bestError) { + bestError = error; + maxColor = cluster[i]; + minColor = cluster[j]; + } + } + } +#endif + + uint16 color0 = roundAndExpand(&maxColor); + uint16 color1 = roundAndExpand(&minColor); + + if (color0 < color1) + { + swap(maxColor, minColor); + swap(color0, color1); + } + + dxtBlock->col0 = Color16(color0); + dxtBlock->col1 = Color16(color1); + dxtBlock->indices = computeIndices4(block, maxColor, minColor); + + optimizeEndPoints4(block, dxtBlock); + } +} + + +void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock) +{ + bool hasAlpha = false; + + for (uint i = 0; i < 16; i++) + { + if (rgba.color(i).a == 0) { + hasAlpha = true; + break; + } + } + + if (!hasAlpha) + { + compressDXT1(rgba, dxtBlock); + } + // @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode. + //else if (rgba.isSingleColorNoAlpha()) { ... } + else + { + // read block + Vector3 block[16]; + uint num = extractColorBlockRGBA(rgba, block); + + // find min and max colors + Vector3 maxColor, minColor; + findMinMaxColorsBox(block, num, &maxColor, &minColor); + + selectDiagonal(block, num, &maxColor, &minColor); + + insetBBox(&maxColor, &minColor); + + uint16 color0 = roundAndExpand(&maxColor); + uint16 color1 = roundAndExpand(&minColor); + + if (color0 < color1) + { + swap(maxColor, minColor); + swap(color0, color1); + } + + dxtBlock->col0 = Color16(color1); + dxtBlock->col1 = Color16(color0); + dxtBlock->indices = computeIndices3(block, maxColor, minColor); + + // optimizeEndPoints(block, dxtBlock); + } +} + + +void QuickCompress::compressDXT3(const ColorBlock & src, BlockDXT3 * dxtBlock) +{ + compressDXT1(src, &dxtBlock->color); + OptimalCompress::compressDXT3A(src, &dxtBlock->alpha); +} + +void QuickCompress::compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount/*=8*/) +{ + AlphaBlock4x4 tmp; + tmp.init(src, 3); + compressDXT5A(tmp, dst, iterationCount); +} + +void QuickCompress::compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount/*=8*/) +{ + uint8 alpha0 = 0; + uint8 alpha1 = 255; + + // Get min/max alpha. + for (uint i = 0; i < 16; i++) + { + uint8 alpha = src.alpha[i]; + alpha0 = max(alpha0, alpha); + alpha1 = min(alpha1, alpha); + } + + AlphaBlockDXT5 block; + block.alpha0 = alpha0 - (alpha0 - alpha1) / 34; + block.alpha1 = alpha1 + (alpha0 - alpha1) / 34; + uint besterror = computeAlphaIndices(src, &block); + + AlphaBlockDXT5 bestblock = block; + + for (int i = 0; i < iterationCount; i++) + { + optimizeAlpha8(src, &block); + uint error = computeAlphaIndices(src, &block); + + if (error >= besterror) + { + // No improvement, stop. + break; + } + if (sameIndices(block, bestblock)) + { + bestblock = block; + break; + } + + besterror = error; + bestblock = block; + }; + + // Copy best block to result; + *dst = bestblock; +} + +void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/) +{ + compressDXT1(rgba, &dxtBlock->color); + compressDXT5A(rgba, &dxtBlock->alpha, iterationCount); +} + + + +/*void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block) +{ + Vector3 minColor = start * 255.0f; + Vector3 maxColor = end * 255.0f; + uint16 color0 = roundAndExpand(&maxColor); + uint16 color1 = roundAndExpand(&minColor); + + if (color0 < color1) + { + swap(maxColor, minColor); + swap(color0, color1); + } + + block->col0 = Color16(color0); + block->col1 = Color16(color1); + block->indices = computeIndices4(set, maxColor / 255.0f, minColor / 255.0f); + + //optimizeEndPoints4(set, block); +} + +void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block) +{ + Vector3 minColor = start * 255.0f; + Vector3 maxColor = end * 255.0f; + uint16 color0 = roundAndExpand(&minColor); + uint16 color1 = roundAndExpand(&maxColor); + + if (color0 > color1) + { + swap(maxColor, minColor); + swap(color0, color1); + } + + block->col0 = Color16(color0); + block->col1 = Color16(color1); + block->indices = computeIndices3(set, maxColor / 255.0f, minColor / 255.0f); + + //optimizeEndPoints3(set, block); +} +*/ Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/SingleColorLookup.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/SingleColorLookup.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/SingleColorLookup.h @@ -1,588 +1,9 @@ -/* -typedef unsigned char uint8; +#include "nvcore/nvcore.h" // uint8 -static int Mul8Bit(int a, int b) -{ - int t = a * b + 128; - return (t + (t >> 8)) >> 8; -} - -static inline int Lerp13(int fm, int to) -{ - return (fm * 2 + to) / 3; -} - -static void PrepareOptTable(uint8 * Table, const uint8 * expand, int size) -{ - for (int i = 0; i < 256; i++) - { - float bestErr = 256; - - for (int min = 0; min < size; min++) - { - for (int max = 0; max < size; max++) - { - int mine = expand[min]; - int maxe = expand[max]; - float err = abs(maxe + Mul8Bit(mine-maxe, 0x55) - i); - err += 0.03f * abs(max - min); - - if (err < bestErr) - { - Table[i*2+0] = max; - Table[i*2+1] = min; - bestErr = err; - } - } - } - } -} - - -void initTables() -{ - uint8 Expand5[32]; - uint8 Expand6[64]; - - for(sInt i=0;i<32;i++) - Expand5[i] = (i<<3)|(i>>2); - - for(sInt i=0;i<64;i++) - Expand6[i] = (i<<2)|(i>>4); - - PrepareOptTable(OMatch5, Expand5, 32) - PrepareOptTable(OMatch6, Expand6, 64) -}; -*/ - -#if __CUDACC__ -__constant__ unsigned short -#else -const static uint8 -#endif -OMatch5[256][2] = -{ - {0x00, 0x00}, - {0x00, 0x00}, - {0x00, 0x01}, - {0x00, 0x01}, - {0x01, 0x00}, - {0x01, 0x00}, - {0x01, 0x00}, - {0x01, 0x01}, - {0x01, 0x01}, - {0x01, 0x01}, - {0x01, 0x02}, - {0x00, 0x04}, - {0x02, 0x01}, - {0x02, 0x01}, - {0x02, 0x01}, - {0x02, 0x02}, - {0x02, 0x02}, - {0x02, 0x02}, - {0x02, 0x03}, - {0x01, 0x05}, - {0x03, 0x02}, - {0x03, 0x02}, - {0x04, 0x00}, - {0x03, 0x03}, - {0x03, 0x03}, - {0x03, 0x03}, - {0x03, 0x04}, - {0x03, 0x04}, - {0x03, 0x04}, - {0x03, 0x05}, - {0x04, 0x03}, - {0x04, 0x03}, - {0x05, 0x02}, - {0x04, 0x04}, - {0x04, 0x04}, - {0x04, 0x05}, - {0x04, 0x05}, - {0x05, 0x04}, - {0x05, 0x04}, - {0x05, 0x04}, - {0x06, 0x03}, - {0x05, 0x05}, - {0x05, 0x05}, - {0x05, 0x06}, - {0x04, 0x08}, - {0x06, 0x05}, - {0x06, 0x05}, - {0x06, 0x05}, - {0x06, 0x06}, - {0x06, 0x06}, - {0x06, 0x06}, - {0x06, 0x07}, - {0x05, 0x09}, - {0x07, 0x06}, - {0x07, 0x06}, - {0x08, 0x04}, - {0x07, 0x07}, - {0x07, 0x07}, - {0x07, 0x07}, - {0x07, 0x08}, - {0x07, 0x08}, - {0x07, 0x08}, - {0x07, 0x09}, - {0x08, 0x07}, - {0x08, 0x07}, - {0x09, 0x06}, - {0x08, 0x08}, - {0x08, 0x08}, - {0x08, 0x09}, - {0x08, 0x09}, - {0x09, 0x08}, - {0x09, 0x08}, - {0x09, 0x08}, - {0x0A, 0x07}, - {0x09, 0x09}, - {0x09, 0x09}, - {0x09, 0x0A}, - {0x08, 0x0C}, - {0x0A, 0x09}, - {0x0A, 0x09}, - {0x0A, 0x09}, - {0x0A, 0x0A}, - {0x0A, 0x0A}, - {0x0A, 0x0A}, - {0x0A, 0x0B}, - {0x09, 0x0D}, - {0x0B, 0x0A}, - {0x0B, 0x0A}, - {0x0C, 0x08}, - {0x0B, 0x0B}, - {0x0B, 0x0B}, - {0x0B, 0x0B}, - {0x0B, 0x0C}, - {0x0B, 0x0C}, - {0x0B, 0x0C}, - {0x0B, 0x0D}, - {0x0C, 0x0B}, - {0x0C, 0x0B}, - {0x0D, 0x0A}, - {0x0C, 0x0C}, - {0x0C, 0x0C}, - {0x0C, 0x0D}, - {0x0C, 0x0D}, - {0x0D, 0x0C}, - {0x0D, 0x0C}, - {0x0D, 0x0C}, - {0x0E, 0x0B}, - {0x0D, 0x0D}, - {0x0D, 0x0D}, - {0x0D, 0x0E}, - {0x0C, 0x10}, - {0x0E, 0x0D}, - {0x0E, 0x0D}, - {0x0E, 0x0D}, - {0x0E, 0x0E}, - {0x0E, 0x0E}, - {0x0E, 0x0E}, - {0x0E, 0x0F}, - {0x0D, 0x11}, - {0x0F, 0x0E}, - {0x0F, 0x0E}, - {0x10, 0x0C}, - {0x0F, 0x0F}, - {0x0F, 0x0F}, - {0x0F, 0x0F}, - {0x0F, 0x10}, - {0x0F, 0x10}, - {0x0F, 0x10}, - {0x0F, 0x11}, - {0x10, 0x0F}, - {0x10, 0x0F}, - {0x11, 0x0E}, - {0x10, 0x10}, - {0x10, 0x10}, - {0x10, 0x11}, - {0x10, 0x11}, - {0x11, 0x10}, - {0x11, 0x10}, - {0x11, 0x10}, - {0x12, 0x0F}, - {0x11, 0x11}, - {0x11, 0x11}, - {0x11, 0x12}, - {0x10, 0x14}, - {0x12, 0x11}, - {0x12, 0x11}, - {0x12, 0x11}, - {0x12, 0x12}, - {0x12, 0x12}, - {0x12, 0x12}, - {0x12, 0x13}, - {0x11, 0x15}, - {0x13, 0x12}, - {0x13, 0x12}, - {0x14, 0x10}, - {0x13, 0x13}, - {0x13, 0x13}, - {0x13, 0x13}, - {0x13, 0x14}, - {0x13, 0x14}, - {0x13, 0x14}, - {0x13, 0x15}, - {0x14, 0x13}, - {0x14, 0x13}, - {0x15, 0x12}, - {0x14, 0x14}, - {0x14, 0x14}, - {0x14, 0x15}, - {0x14, 0x15}, - {0x15, 0x14}, - {0x15, 0x14}, - {0x15, 0x14}, - {0x16, 0x13}, - {0x15, 0x15}, - {0x15, 0x15}, - {0x15, 0x16}, - {0x14, 0x18}, - {0x16, 0x15}, - {0x16, 0x15}, - {0x16, 0x15}, - {0x16, 0x16}, - {0x16, 0x16}, - {0x16, 0x16}, - {0x16, 0x17}, - {0x15, 0x19}, - {0x17, 0x16}, - {0x17, 0x16}, - {0x18, 0x14}, - {0x17, 0x17}, - {0x17, 0x17}, - {0x17, 0x17}, - {0x17, 0x18}, - {0x17, 0x18}, - {0x17, 0x18}, - {0x17, 0x19}, - {0x18, 0x17}, - {0x18, 0x17}, - {0x19, 0x16}, - {0x18, 0x18}, - {0x18, 0x18}, - {0x18, 0x19}, - {0x18, 0x19}, - {0x19, 0x18}, - {0x19, 0x18}, - {0x19, 0x18}, - {0x1A, 0x17}, - {0x19, 0x19}, - {0x19, 0x19}, - {0x19, 0x1A}, - {0x18, 0x1C}, - {0x1A, 0x19}, - {0x1A, 0x19}, - {0x1A, 0x19}, - {0x1A, 0x1A}, - {0x1A, 0x1A}, - {0x1A, 0x1A}, - {0x1A, 0x1B}, - {0x19, 0x1D}, - {0x1B, 0x1A}, - {0x1B, 0x1A}, - {0x1C, 0x18}, - {0x1B, 0x1B}, - {0x1B, 0x1B}, - {0x1B, 0x1B}, - {0x1B, 0x1C}, - {0x1B, 0x1C}, - {0x1B, 0x1C}, - {0x1B, 0x1D}, - {0x1C, 0x1B}, - {0x1C, 0x1B}, - {0x1D, 0x1A}, - {0x1C, 0x1C}, - {0x1C, 0x1C}, - {0x1C, 0x1D}, - {0x1C, 0x1D}, - {0x1D, 0x1C}, - {0x1D, 0x1C}, - {0x1D, 0x1C}, - {0x1E, 0x1B}, - {0x1D, 0x1D}, - {0x1D, 0x1D}, - {0x1D, 0x1E}, - {0x1D, 0x1E}, - {0x1E, 0x1D}, - {0x1E, 0x1D}, - {0x1E, 0x1D}, - {0x1E, 0x1E}, - {0x1E, 0x1E}, - {0x1E, 0x1E}, - {0x1E, 0x1F}, - {0x1E, 0x1F}, - {0x1F, 0x1E}, - {0x1F, 0x1E}, - {0x1F, 0x1E}, - {0x1F, 0x1F}, - {0x1F, 0x1F}, -}; - -#if __CUDACC__ -__constant__ unsigned short -#else -const static uint8 -#endif -OMatch6[256][2] = -{ - {0x00, 0x00}, - {0x00, 0x01}, - {0x01, 0x00}, - {0x01, 0x01}, - {0x01, 0x01}, - {0x01, 0x02}, - {0x02, 0x01}, - {0x02, 0x02}, - {0x02, 0x02}, - {0x02, 0x03}, - {0x03, 0x02}, - {0x03, 0x03}, - {0x03, 0x03}, - {0x03, 0x04}, - {0x04, 0x03}, - {0x04, 0x04}, - {0x04, 0x04}, - {0x04, 0x05}, - {0x05, 0x04}, - {0x05, 0x05}, - {0x05, 0x05}, - {0x05, 0x06}, - {0x06, 0x05}, - {0x00, 0x11}, - {0x06, 0x06}, - {0x06, 0x07}, - {0x07, 0x06}, - {0x02, 0x10}, - {0x07, 0x07}, - {0x07, 0x08}, - {0x08, 0x07}, - {0x03, 0x11}, - {0x08, 0x08}, - {0x08, 0x09}, - {0x09, 0x08}, - {0x05, 0x10}, - {0x09, 0x09}, - {0x09, 0x0A}, - {0x0A, 0x09}, - {0x06, 0x11}, - {0x0A, 0x0A}, - {0x0A, 0x0B}, - {0x0B, 0x0A}, - {0x08, 0x10}, - {0x0B, 0x0B}, - {0x0B, 0x0C}, - {0x0C, 0x0B}, - {0x09, 0x11}, - {0x0C, 0x0C}, - {0x0C, 0x0D}, - {0x0D, 0x0C}, - {0x0B, 0x10}, - {0x0D, 0x0D}, - {0x0D, 0x0E}, - {0x0E, 0x0D}, - {0x0C, 0x11}, - {0x0E, 0x0E}, - {0x0E, 0x0F}, - {0x0F, 0x0E}, - {0x0E, 0x10}, - {0x0F, 0x0F}, - {0x0F, 0x10}, - {0x10, 0x0E}, - {0x10, 0x0F}, - {0x11, 0x0E}, - {0x10, 0x10}, - {0x10, 0x11}, - {0x11, 0x10}, - {0x12, 0x0F}, - {0x11, 0x11}, - {0x11, 0x12}, - {0x12, 0x11}, - {0x14, 0x0E}, - {0x12, 0x12}, - {0x12, 0x13}, - {0x13, 0x12}, - {0x15, 0x0F}, - {0x13, 0x13}, - {0x13, 0x14}, - {0x14, 0x13}, - {0x17, 0x0E}, - {0x14, 0x14}, - {0x14, 0x15}, - {0x15, 0x14}, - {0x18, 0x0F}, - {0x15, 0x15}, - {0x15, 0x16}, - {0x16, 0x15}, - {0x1A, 0x0E}, - {0x16, 0x16}, - {0x16, 0x17}, - {0x17, 0x16}, - {0x1B, 0x0F}, - {0x17, 0x17}, - {0x17, 0x18}, - {0x18, 0x17}, - {0x13, 0x21}, - {0x18, 0x18}, - {0x18, 0x19}, - {0x19, 0x18}, - {0x15, 0x20}, - {0x19, 0x19}, - {0x19, 0x1A}, - {0x1A, 0x19}, - {0x16, 0x21}, - {0x1A, 0x1A}, - {0x1A, 0x1B}, - {0x1B, 0x1A}, - {0x18, 0x20}, - {0x1B, 0x1B}, - {0x1B, 0x1C}, - {0x1C, 0x1B}, - {0x19, 0x21}, - {0x1C, 0x1C}, - {0x1C, 0x1D}, - {0x1D, 0x1C}, - {0x1B, 0x20}, - {0x1D, 0x1D}, - {0x1D, 0x1E}, - {0x1E, 0x1D}, - {0x1C, 0x21}, - {0x1E, 0x1E}, - {0x1E, 0x1F}, - {0x1F, 0x1E}, - {0x1E, 0x20}, - {0x1F, 0x1F}, - {0x1F, 0x20}, - {0x20, 0x1E}, - {0x20, 0x1F}, - {0x21, 0x1E}, - {0x20, 0x20}, - {0x20, 0x21}, - {0x21, 0x20}, - {0x22, 0x1F}, - {0x21, 0x21}, - {0x21, 0x22}, - {0x22, 0x21}, - {0x24, 0x1E}, - {0x22, 0x22}, - {0x22, 0x23}, - {0x23, 0x22}, - {0x25, 0x1F}, - {0x23, 0x23}, - {0x23, 0x24}, - {0x24, 0x23}, - {0x27, 0x1E}, - {0x24, 0x24}, - {0x24, 0x25}, - {0x25, 0x24}, - {0x28, 0x1F}, - {0x25, 0x25}, - {0x25, 0x26}, - {0x26, 0x25}, - {0x2A, 0x1E}, - {0x26, 0x26}, - {0x26, 0x27}, - {0x27, 0x26}, - {0x2B, 0x1F}, - {0x27, 0x27}, - {0x27, 0x28}, - {0x28, 0x27}, - {0x23, 0x31}, - {0x28, 0x28}, - {0x28, 0x29}, - {0x29, 0x28}, - {0x25, 0x30}, - {0x29, 0x29}, - {0x29, 0x2A}, - {0x2A, 0x29}, - {0x26, 0x31}, - {0x2A, 0x2A}, - {0x2A, 0x2B}, - {0x2B, 0x2A}, - {0x28, 0x30}, - {0x2B, 0x2B}, - {0x2B, 0x2C}, - {0x2C, 0x2B}, - {0x29, 0x31}, - {0x2C, 0x2C}, - {0x2C, 0x2D}, - {0x2D, 0x2C}, - {0x2B, 0x30}, - {0x2D, 0x2D}, - {0x2D, 0x2E}, - {0x2E, 0x2D}, - {0x2C, 0x31}, - {0x2E, 0x2E}, - {0x2E, 0x2F}, - {0x2F, 0x2E}, - {0x2E, 0x30}, - {0x2F, 0x2F}, - {0x2F, 0x30}, - {0x30, 0x2E}, - {0x30, 0x2F}, - {0x31, 0x2E}, - {0x30, 0x30}, - {0x30, 0x31}, - {0x31, 0x30}, - {0x32, 0x2F}, - {0x31, 0x31}, - {0x31, 0x32}, - {0x32, 0x31}, - {0x34, 0x2E}, - {0x32, 0x32}, - {0x32, 0x33}, - {0x33, 0x32}, - {0x35, 0x2F}, - {0x33, 0x33}, - {0x33, 0x34}, - {0x34, 0x33}, - {0x37, 0x2E}, - {0x34, 0x34}, - {0x34, 0x35}, - {0x35, 0x34}, - {0x38, 0x2F}, - {0x35, 0x35}, - {0x35, 0x36}, - {0x36, 0x35}, - {0x3A, 0x2E}, - {0x36, 0x36}, - {0x36, 0x37}, - {0x37, 0x36}, - {0x3B, 0x2F}, - {0x37, 0x37}, - {0x37, 0x38}, - {0x38, 0x37}, - {0x3D, 0x2E}, - {0x38, 0x38}, - {0x38, 0x39}, - {0x39, 0x38}, - {0x3E, 0x2F}, - {0x39, 0x39}, - {0x39, 0x3A}, - {0x3A, 0x39}, - {0x3A, 0x3A}, - {0x3A, 0x3A}, - {0x3A, 0x3B}, - {0x3B, 0x3A}, - {0x3B, 0x3B}, - {0x3B, 0x3B}, - {0x3B, 0x3C}, - {0x3C, 0x3B}, - {0x3C, 0x3C}, - {0x3C, 0x3C}, - {0x3C, 0x3D}, - {0x3D, 0x3C}, - {0x3D, 0x3D}, - {0x3D, 0x3D}, - {0x3D, 0x3E}, - {0x3E, 0x3D}, - {0x3E, 0x3E}, - {0x3E, 0x3E}, - {0x3E, 0x3F}, - {0x3F, 0x3E}, - {0x3F, 0x3F}, - {0x3F, 0x3F}, -}; +extern uint8 OMatch5[256][2]; +extern uint8 OMatch6[256][2]; +extern uint8 OMatchAlpha5[256][2]; +extern uint8 OMatchAlpha6[256][2]; +void initSingleColorLookup(); \ No newline at end of file Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/SingleColorLookup.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/SingleColorLookup.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/SingleColorLookup.cpp @@ -0,0 +1,90 @@ + +#include "SingleColorLookup.h" + +#include "nvcore/Debug.h" + +#include // abs + +// Globals +uint8 OMatch5[256][2]; +uint8 OMatch6[256][2]; +uint8 OMatchAlpha5[256][2]; +uint8 OMatchAlpha6[256][2]; + + + +static int Mul8Bit(int a, int b) +{ + int t = a * b + 128; + return (t + (t >> 8)) >> 8; +} + +static inline int Lerp13(int a, int b) +{ +#ifdef DXT_USE_ROUNDING_BIAS + // with rounding bias + return a + Mul8Bit(b-a, 0x55); +#else + // without rounding bias + // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed. + return (a * 2 + b) / 3; +#endif +} + +static void PrepareOptTable(uint8 * table, const uint8 * expand, int size, bool alpha_mode) +{ + for (int i = 0; i < 256; i++) + { + int bestErr = 256 * 100; + + for (int min = 0; min < size; min++) + { + for (int max = 0; max < size; max++) + { + int mine = expand[min]; + int maxe = expand[max]; + + int err; + if (alpha_mode) err = abs((maxe + mine)/2 - i); + else err = abs(Lerp13(maxe, mine) - i); + err *= 100; + + // DX10 spec says that interpolation must be within 3% of "correct" result, + // add this as error term. (normally we'd expect a random distribution of + // +-1.5% error, but nowhere in the spec does it say that the error has to be + // unbiased - better safe than sorry). + err += abs(max - min) * 3; + + if (err < bestErr) + { + table[i*2+0] = max; + table[i*2+1] = min; + bestErr = err; + } + } + } + } +} + + +NV_AT_STARTUP(initSingleColorLookup()); + +void initSingleColorLookup() +{ + uint8 expand5[32]; + uint8 expand6[64]; + + for (int i = 0; i < 32; i++) { + expand5[i] = (i<<3) | (i>>2); + } + + for (int i = 0; i < 64; i++) { + expand6[i] = (i<<2) | (i>>4); + } + + PrepareOptTable(&OMatch5[0][0], expand5, 32, false); + PrepareOptTable(&OMatch6[0][0], expand6, 64, false); + PrepareOptTable(&OMatchAlpha5[0][0], expand5, 32, true); + PrepareOptTable(&OMatchAlpha6[0][0], expand6, 64, true); +} + Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/Surface.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/Surface.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/Surface.h @@ -0,0 +1,90 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_TEXIMAGE_H +#define NVTT_TEXIMAGE_H + +#include "nvtt.h" + +#include "nvcore/RefCounted.h" +#include "nvcore/Ptr.h" + +#include "nvimage/Image.h" +#include "nvimage/FloatImage.h" + +namespace nvtt +{ + + struct Surface::Private : public nv::RefCounted + { + void operator=(const Private &); + public: + Private() + { + nvDebugCheck( refCount() == 0 ); + + type = TextureType_2D; + wrapMode = WrapMode_Mirror; + alphaMode = AlphaMode_None; + isNormalMap = false; + + image = NULL; + } + Private(const Private & p) : RefCounted() // Copy ctor. inits refcount to 0. + { + nvDebugCheck( refCount() == 0 ); + + type = p.type; + wrapMode = p.wrapMode; + alphaMode = p.alphaMode; + isNormalMap = p.isNormalMap; + + image = p.image->clone(); + } + ~Private() + { + delete image; + } + + TextureType type; + WrapMode wrapMode; + AlphaMode alphaMode; + bool isNormalMap; + + nv::FloatImage * image; + }; + +} // nvtt namespace + +namespace nv { + bool canMakeNextMipmap(uint w, uint h, uint d, uint min_size); + uint countMipmaps(uint w); + uint countMipmaps(uint w, uint h, uint d); + uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size); + uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format); + void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType); +} + + +#endif // NVTT_TEXIMAGE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/Surface.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/Surface.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/Surface.cpp @@ -0,0 +1,3255 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "Surface.h" + +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Color.h" +#include "nvmath/Half.h" +#include "nvmath/ftoi.h" + +#include "nvimage/Filter.h" +#include "nvimage/ImageIO.h" +#include "nvimage/NormalMap.h" +#include "nvimage/BlockDXT.h" +#include "nvimage/ColorBlock.h" +#include "nvimage/PixelFormat.h" +#include "nvimage/ErrorMetric.h" +#include "nvimage/DirectDrawSurface.h" + +#include +#include // memset, memcpy + +#if NV_CC_GNUC +#include // exp2f and log2f +#endif + +using namespace nv; +using namespace nvtt; + +namespace +{ + // 1 -> 1, 2 -> 2, 3 -> 2, 4 -> 4, 5 -> 4, ... + static inline uint previousPowerOfTwo(uint v) + { + return nextPowerOfTwo(v + 1) / 2; + } + + static inline uint nearestPowerOfTwo(uint v) + { + const uint np2 = nextPowerOfTwo(v); + const uint pp2 = previousPowerOfTwo(v); + + if (np2 - v <= v - pp2) + { + return np2; + } + else + { + return pp2; + } + } + + static inline uint nextMultipleOfFour(uint v) + { + return (v + 3) & ~3; + } + static inline uint previousMultipleOfFour(uint v) + { + return v & ~3; + } + + static inline uint nearestMultipleOfFour(uint v) + { + const uint nm4 = nextMultipleOfFour(v); + const uint pm4 = previousMultipleOfFour(v); + + if (nm4 - v <= v - pm4) + { + return nm4; + } + else + { + return pm4; + } + } + + + static int blockSize(Format format) + { + if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) { + return 8; + } + else if (format == Format_DXT3) { + return 16; + } + else if (format == Format_DXT5 || format == Format_DXT5n || format == Format_BC3_RGBM) { + return 16; + } + else if (format == Format_BC4) { + return 8; + } + else if (format == Format_BC5 /*|| format == Format_BC5_Luma*/) { + return 16; + } + else if (format == Format_CTX1) { + return 8; + } + else if (format == Format_BC6) { + return 16; + } + else if (format == Format_BC7) { + return 16; + } + return 0; + } + + /*static int translateMask(int input) { + if (input > 0) return 1 << input; + return ~input; + }*/ +} + +bool nv::canMakeNextMipmap(uint w, uint h, uint d, uint min_size) +{ + if (min_size==1u) { + if(w==1u && h==1u && d==1u) { + return false; + } + } + else if (((w <= min_size || h <= min_size) && d == 1u)) { + return false; + } + + return true; +} + +uint nv::countMipmaps(uint w) +{ + uint mipmap = 0; + + while (w != 1) { + w = max(1U, w / 2); + mipmap++; + } + + return mipmap + 1; +} + +uint nv::countMipmaps(uint w, uint h, uint d) +{ + uint mipmap = 0; + + while (w != 1 || h != 1 || d != 1) { + w = max(1U, w / 2); + h = max(1U, h / 2); + d = max(1U, d / 2); + mipmap++; + } + + return mipmap + 1; +} + +uint nv::countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size) +{ + uint mipmap = 0; + + while (canMakeNextMipmap(w, h, d, min_size)) { + w = max(1U, w / 2); + h = max(1U, h / 2); + d = max(1U, d / 2); + mipmap++; + } + + return mipmap + 1; +} + + +uint nv::computeImageSize(uint w, uint h, uint d, uint bitCount, uint pitchAlignmentInBytes, Format format) +{ + if (format == Format_RGBA) { + return d * h * computeBytePitch(w, bitCount, pitchAlignmentInBytes); + } + else { + return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format) * d; + } +} + +void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType) { + nvDebugCheck(width != NULL && *width > 0); + nvDebugCheck(height != NULL && *height > 0); + nvDebugCheck(depth != NULL && *depth > 0); + + int w = *width; + int h = *height; + int d = *depth; + + if (roundMode != RoundMode_None && maxExtent > 0) + { + // rounded max extent should never be higher than original max extent. + maxExtent = previousPowerOfTwo(maxExtent); + } + + // Scale extents without changing aspect ratio. + int m = max(max(w, h), d); + if (maxExtent > 0 && m > maxExtent) + { + w = max((w * maxExtent) / m, 1); + h = max((h * maxExtent) / m, 1); + d = max((d * maxExtent) / m, 1); + } + + if (textureType == TextureType_2D) + { + d = 1; + } + else if (textureType == TextureType_Cube) + { + w = h = (w + h) / 2; + d = 1; + } + + // Round to power of two. + if (roundMode == RoundMode_ToNextPowerOfTwo) + { + w = nextPowerOfTwo(w); + h = nextPowerOfTwo(h); + d = nextPowerOfTwo(d); + } + else if (roundMode == RoundMode_ToNearestPowerOfTwo) + { + w = nearestPowerOfTwo(w); + h = nearestPowerOfTwo(h); + d = nearestPowerOfTwo(d); + } + else if (roundMode == RoundMode_ToPreviousPowerOfTwo) + { + w = previousPowerOfTwo(w); + h = previousPowerOfTwo(h); + d = previousPowerOfTwo(d); + } + else if (roundMode == RoundMode_ToNextMultipleOfFour) + { + w = nextMultipleOfFour(w); + h = nextMultipleOfFour(h); + d = nextMultipleOfFour(d); + } + else if (roundMode == RoundMode_ToNextMultipleOfFour) + { + w = nearestMultipleOfFour(w); + h = nearestMultipleOfFour(h); + d = nearestMultipleOfFour(d); + } + else if (roundMode == RoundMode_ToPreviousMultipleOfFour) + { + w = previousMultipleOfFour(w); + h = previousMultipleOfFour(h); + d = previousMultipleOfFour(d); + } + + *width = w; + *height = h; + *depth = d; +} + + + +Surface::Surface() : m(new Surface::Private()) +{ + m->addRef(); +} + +Surface::Surface(const Surface & tex) : m(tex.m) +{ + if (m != NULL) m->addRef(); +} + +Surface::~Surface() +{ + if (m != NULL) m->release(); + m = NULL; +} + +void Surface::operator=(const Surface & tex) +{ + if (tex.m != NULL) tex.m->addRef(); + if (m != NULL) m->release(); + m = tex.m; +} + +void Surface::detach() +{ + if (m->refCount() > 1) + { + m->release(); + m = new Surface::Private(*m); + m->addRef(); + nvDebugCheck(m->refCount() == 1); + } +} + +void Surface::setWrapMode(WrapMode wrapMode) +{ + if (m->wrapMode != wrapMode) + { + detach(); + m->wrapMode = wrapMode; + } +} + +void Surface::setAlphaMode(AlphaMode alphaMode) +{ + if (m->alphaMode != alphaMode) + { + detach(); + m->alphaMode = alphaMode; + } +} + +void Surface::setNormalMap(bool isNormalMap) +{ + if (m->isNormalMap != isNormalMap) + { + detach(); + m->isNormalMap = isNormalMap; + } +} + +bool Surface::isNull() const +{ + return m->image == NULL; +} + +int Surface::width() const +{ + if (m->image != NULL) return m->image->width(); + return 0; +} + +int Surface::height() const +{ + if (m->image != NULL) return m->image->height(); + return 0; +} + +int Surface::depth() const +{ + if (m->image != NULL) return m->image->depth(); + return 0; +} + +WrapMode Surface::wrapMode() const +{ + return m->wrapMode; +} + +AlphaMode Surface::alphaMode() const +{ + return m->alphaMode; +} + +bool Surface::isNormalMap() const +{ + return m->isNormalMap; +} + +TextureType Surface::type() const +{ + return m->type; +} + +int Surface::countMipmaps() const +{ + if (m->image == NULL) return 0; + return ::countMipmaps(m->image->width(), m->image->height(), 1); +} + +int Surface::countMipmaps(int min_size) const +{ + if (m->image == NULL) return 0; + return ::countMipmapsWithMinSize(m->image->width(), m->image->height(), 1, min_size); +} + +float Surface::alphaTestCoverage(float alphaRef/*= 0.5*/, int alpha_channel/*=3*/) const +{ + if (m->image == NULL) return 0.0f; + + alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256); + + return m->image->alphaTestCoverage(alphaRef, alpha_channel); +} + +float Surface::average(int channel, int alpha_channel/*= -1*/, float gamma /*= 2.2f*/) const +{ + if (m->image == NULL) return 0.0f; + + const uint count = m->image->width() * m->image->height(); + + float sum = 0.0f; + const float * c = m->image->channel(channel); + + float denom; + + if (alpha_channel == -1) { + for (uint i = 0; i < count; i++) { + sum += powf(c[i], gamma); + } + + denom = float(count); + } + else { + float alpha_sum = 0.0f; + const float * a = m->image->channel(alpha_channel); + + for (uint i = 0; i < count; i++) { + sum += powf(c[i], gamma) * a[i]; + alpha_sum += a[i]; + } + + denom = alpha_sum; + } + + // Avoid division by zero. + if (denom == 0.0f) return 0.0f; + + return powf(sum / denom, 1.0f/gamma); +} + +const float * Surface::data() const +{ + return m->image->channel(0); +} + +const float * Surface::channel(int i) const +{ + if (i < 0 || i > 3) return NULL; + return m->image->channel(i); +} + + +void Surface::histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const +{ + // We assume it's clear in case we want to accumulate multiple histograms. + //memset(bins, 0, sizeof(int)*count); + + if (m->image == NULL) return; + + const float * c = m->image->channel(channel); + + float scale = float(binCount) / rangeMax; + float bias = - scale * rangeMin; + + const uint count = m->image->pixelCount(); + for (uint i = 0; i < count; i++) { + float f = c[i] * scale + bias; + int idx = ftoi_floor(f); + if (idx < 0) idx = 0; + if (idx > binCount-1) idx = binCount-1; + binPtr[idx]++; + } +} + +void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_channel/*= -1*/, float alpha_ref/*= 0.f*/) const +{ + Vector2 range(FLT_MAX, -FLT_MAX); + + FloatImage * img = m->image; + + if (alpha_channel == -1) { // no alpha channel; just like the original range function + + if (m->image != NULL) { + float * c = img->channel(channel); + + const uint count = img->pixelCount(); + for (uint p = 0; p < count; p++) { + float f = c[p]; + if (f < range.x) range.x = f; + if (f > range.y) range.y = f; + } + } + } + else { // use alpha test to ignore some pixels + //note, it's quite possible to get FLT_MAX,-FLT_MAX back if all pixels fail the test + + if (m->image != NULL) + { + const float * c = img->channel(channel); + const float * a = img->channel(alpha_channel); + + const uint count = img->pixelCount(); + for (uint p = 0; p < count; p++) { + if(a[p]>alpha_ref) { + float f = c[p]; + if (f < range.x) range.x = f; + if (f > range.y) range.y = f; + } + } + } + } + + *rangeMin = range.x; + *rangeMax = range.y; +} + +bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/) +{ + AutoPtr img(ImageIO::loadFloat(fileName)); + if (img == NULL) { + // Try loading as DDS. + if (nv::strEqual(nv::Path::extension(fileName), ".dds")) { + nv::DirectDrawSurface dds; + if (dds.load(fileName)) { + if (dds.header.isBlockFormat()) { + int w = dds.surfaceWidth(0); + int h = dds.surfaceHeight(0); + uint size = dds.surfaceSize(0); + + void * data = malloc(size); + dds.readSurface(0, 0, data, size); + + // @@ Handle all formats! @@ Get nvtt format from dds.surfaceFormat() ? + + if (dds.header.hasDX10Header()) { + if (dds.header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16) { + this->setImage2D(nvtt::Format_BC6, nvtt::Decoder_D3D10, w, h, data); + } + else { + // @@ + nvCheck(false); + } + } + else { + uint fourcc = dds.header.pf.fourcc; + if (fourcc == FOURCC_DXT1) { + this->setImage2D(nvtt::Format_BC1, nvtt::Decoder_D3D10, w, h, data); + } + else if (fourcc == FOURCC_DXT5) { + this->setImage2D(nvtt::Format_BC3, nvtt::Decoder_D3D10, w, h, data); + } + else { + // @@ + nvCheck(false); + } + } + + free(data); + } + else { + Image img; + dds.mipmap(&img, /*face=*/0, /*mipmap=*/0); + + int w = img.width(); + int h = img.height(); + int d = img.depth(); + + // @@ Add support for all pixel formats. + + this->setImage(nvtt::InputFormat_BGRA_8UB, w, h, d, img.pixels()); + } + + return true; + } + } + + return false; + } + + detach(); + + if (hasAlpha != NULL) { + *hasAlpha = (img->componentCount() == 4); + } + + // @@ Have loadFloat allocate the image with the desired number of channels. + img->resizeChannelCount(4); + + delete m->image; + m->image = img.release(); + + return true; +} + +bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) const +{ + if (m->image == NULL) { + return false; + } + + if (hdr) { + return ImageIO::saveFloat(fileName, m->image, 0, 4); + } + else { + AutoPtr image(m->image->createImage(0, 4)); + nvCheck(image != NULL); + + if (hasAlpha) { + image->setFormat(Image::Format_ARGB); + } + + return ImageIO::save(fileName, image.ptr()); + } +} + + +bool Surface::setImage(int w, int h, int d) +{ + detach(); + + if (m->image == NULL) { + m->image = new FloatImage(); + } + m->image->allocate(4, w, h, d); + m->type = (d == 1) ? TextureType_2D : TextureType_3D; + + m->image->clear(); + + return true; +} + + +#if 0 //NV_OS_WIN32 + +#include +#undef min +#undef max + +static int filter(unsigned int code, struct _EXCEPTION_POINTERS *ep) { + if (code == EXCEPTION_ACCESS_VIOLATION) { + return EXCEPTION_EXECUTE_HANDLER; + } + else { + return EXCEPTION_CONTINUE_SEARCH; + }; +} + +#define TRY __try +#define CATCH __except (filter(GetExceptionCode(), GetExceptionInformation())) + +#else // 0 + +#define TRY if (true) +#define CATCH else + +#endif + +bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void * data) +{ + detach(); + + if (m->image == NULL) { + m->image = new FloatImage(); + } + m->image->allocate(4, w, h, d); + m->type = (d == 1) ? TextureType_2D : TextureType_3D; + + const int count = m->image->pixelCount(); + + float * rdst = m->image->channel(0); + float * gdst = m->image->channel(1); + float * bdst = m->image->channel(2); + float * adst = m->image->channel(3); + + if (format == InputFormat_BGRA_8UB) + { + const Color32 * src = (const Color32 *)data; + + TRY { + for (int i = 0; i < count; i++) + { + rdst[i] = float(src[i].r) / 255.0f; + gdst[i] = float(src[i].g) / 255.0f; + bdst[i] = float(src[i].b) / 255.0f; + adst[i] = float(src[i].a) / 255.0f; + } + } + CATCH { + return false; + } + } + else if (format == InputFormat_RGBA_16F) + { + const uint16 * src = (const uint16 *)data; + + TRY { + for (int i = 0; i < count; i++) + { + ((uint32 *)rdst)[i] = half_to_float(src[4*i+0]); + ((uint32 *)gdst)[i] = half_to_float(src[4*i+1]); + ((uint32 *)bdst)[i] = half_to_float(src[4*i+2]); + ((uint32 *)adst)[i] = half_to_float(src[4*i+3]); + } + } + CATCH { + return false; + } + } + else if (format == InputFormat_RGBA_32F) + { + const float * src = (const float *)data; + + TRY { + for (int i = 0; i < count; i++) + { + rdst[i] = src[4 * i + 0]; + gdst[i] = src[4 * i + 1]; + bdst[i] = src[4 * i + 2]; + adst[i] = src[4 * i + 3]; + } + } + CATCH { + return false; + } + } + else if (format == InputFormat_R_32F) + { + const float * src = (const float *)data; + + TRY { + for (int i = 0; i < count; i++) + { + rdst[i] = src[i]; + gdst[i] = 0; + bdst[i] = 0; + adst[i] = 0; + } + } + CATCH { + return false; + } + } + + return true; +} + +bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r, const void * g, const void * b, const void * a) +{ + detach(); + + if (m->image == NULL) { + m->image = new FloatImage(); + } + m->image->allocate(4, w, h, d); + m->type = (d == 1) ? TextureType_2D : TextureType_3D; + + const int count = m->image->pixelCount(); + + float * rdst = m->image->channel(0); + float * gdst = m->image->channel(1); + float * bdst = m->image->channel(2); + float * adst = m->image->channel(3); + + if (format == InputFormat_BGRA_8UB) + { + const uint8 * rsrc = (const uint8 *)r; + const uint8 * gsrc = (const uint8 *)g; + const uint8 * bsrc = (const uint8 *)b; + const uint8 * asrc = (const uint8 *)a; + + TRY { + for (int i = 0; i < count; i++) rdst[i] = float(rsrc[i]) / 255.0f; + for (int i = 0; i < count; i++) gdst[i] = float(gsrc[i]) / 255.0f; + for (int i = 0; i < count; i++) bdst[i] = float(bsrc[i]) / 255.0f; + for (int i = 0; i < count; i++) adst[i] = float(asrc[i]) / 255.0f; + } + CATCH { + return false; + } + } + else if (format == InputFormat_RGBA_16F) + { + const uint16 * rsrc = (const uint16 *)r; + const uint16 * gsrc = (const uint16 *)g; + const uint16 * bsrc = (const uint16 *)b; + const uint16 * asrc = (const uint16 *)a; + + TRY { + for (int i = 0; i < count; i++) ((uint32 *)rdst)[i] = half_to_float(rsrc[i]); + for (int i = 0; i < count; i++) ((uint32 *)gdst)[i] = half_to_float(gsrc[i]); + for (int i = 0; i < count; i++) ((uint32 *)bdst)[i] = half_to_float(bsrc[i]); + for (int i = 0; i < count; i++) ((uint32 *)adst)[i] = half_to_float(asrc[i]); + } + CATCH { + return false; + } + } + else if (format == InputFormat_RGBA_32F) + { + const float * rsrc = (const float *)r; + const float * gsrc = (const float *)g; + const float * bsrc = (const float *)b; + const float * asrc = (const float *)a; + + TRY { + memcpy(rdst, rsrc, count * sizeof(float)); + memcpy(gdst, gsrc, count * sizeof(float)); + memcpy(bdst, bsrc, count * sizeof(float)); + memcpy(adst, asrc, count * sizeof(float)); + } + CATCH { + return false; + } + } + else if (format == InputFormat_R_32F) + { + const float * rsrc = (const float *)r; + + TRY { + memcpy(rdst, rsrc, count * sizeof(float)); + memset(gdst, 0, count * sizeof(float)); + memset(bdst, 0, count * sizeof(float)); + memset(adst, 0, count * sizeof(float)); + } + CATCH { + return false; + } + } + + return true; +} + +// @@ Add support for compressed 3D textures. +bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data) +{ + if (format != nvtt::Format_BC1 && + format != nvtt::Format_BC2 && + format != nvtt::Format_BC3 && + format != nvtt::Format_BC4 && + format != nvtt::Format_BC5 && + format != nvtt::Format_BC6 && + format != nvtt::Format_BC7) + { + return false; + } + + detach(); + + if (m->image == NULL) { + m->image = new FloatImage(); + } + m->image->allocate(4, w, h, 1); + m->type = TextureType_2D; + + const int bw = (w + 3) / 4; + const int bh = (h + 3) / 4; + + const uint bs = blockSize(format); + + const uint8 * ptr = (const uint8 *)data; + + TRY { + if (format == nvtt::Format_BC6) + { + // BC6 format - decode directly to float + + for (int y = 0; y < bh; y++) + { + for (int x = 0; x < bw; x++) + { + Vector3 colors[16]; + const BlockBC6 * block = (const BlockBC6 *)ptr; + block->decodeBlock(colors); + + for (int yy = 0; yy < 4; yy++) + { + for (int xx = 0; xx < 4; xx++) + { + Vector3 rgb = colors[yy*4 + xx]; + + if (x * 4 + xx < w && y * 4 + yy < h) + { + m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = rgb.x; + m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = rgb.y; + m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = rgb.z; + m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = 1.0f; + } + } + } + + ptr += bs; + } + } + } + else + { + // Non-BC6 - decode to 8-bit, then convert to float + + for (int y = 0; y < bh; y++) + { + for (int x = 0; x < bw; x++) + { + ColorBlock colors; + + if (format == nvtt::Format_BC1) + { + const BlockDXT1 * block = (const BlockDXT1 *)ptr; + + if (decoder == Decoder_D3D10) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_D3D9) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_NV5x) { + block->decodeBlockNV5x(&colors); + } + } + else if (format == nvtt::Format_BC2) + { + const BlockDXT3 * block = (const BlockDXT3 *)ptr; + + if (decoder == Decoder_D3D10) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_D3D9) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_NV5x) { + block->decodeBlockNV5x(&colors); + } + } + else if (format == nvtt::Format_BC3) + { + const BlockDXT5 * block = (const BlockDXT5 *)ptr; + + if (decoder == Decoder_D3D10) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_D3D9) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_NV5x) { + block->decodeBlockNV5x(&colors); + } + } + else if (format == nvtt::Format_BC4) + { + const BlockATI1 * block = (const BlockATI1 *)ptr; + block->decodeBlock(&colors, decoder == Decoder_D3D9); + } + else if (format == nvtt::Format_BC5) + { + const BlockATI2 * block = (const BlockATI2 *)ptr; + block->decodeBlock(&colors, decoder == Decoder_D3D9); + } + else if (format == nvtt::Format_BC7) + { + const BlockBC7 * block = (const BlockBC7 *)ptr; + block->decodeBlock(&colors); + } + else + { + nvDebugCheck(false); + } + + for (int yy = 0; yy < 4; yy++) + { + for (int xx = 0; xx < 4; xx++) + { + Color32 c = colors.color(xx, yy); + + if (x * 4 + xx < w && y * 4 + yy < h) + { + m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f; + m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f; + m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f; + m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f; + } + } + } + + ptr += bs; + } + } + } + } + CATCH { + return false; + } + + return true; +} + + +static void getDefaultFilterWidthAndParams(int filter, float * filterWidth, float params[2]) +{ + if (filter == ResizeFilter_Box) { + *filterWidth = 0.5f; + } + else if (filter == ResizeFilter_Triangle) { + *filterWidth = 1.0f; + } + else if (filter == ResizeFilter_Kaiser) + { + *filterWidth = 3.0f; + params[0] = 4.0f; + params[1] = 1.0f; + } + else //if (filter == ResizeFilter_Mitchell) + { + *filterWidth = 2.0f; + params[0] = 1.0f / 3.0f; + params[1] = 1.0f / 3.0f; + } +} + +void Surface::resize(int w, int h, int d, ResizeFilter filter) +{ + float filterWidth; + float params[2]; + getDefaultFilterWidthAndParams(filter, &filterWidth, params); + + resize(w, h, d, filter, filterWidth, params); +} + +void Surface::resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params) +{ + if (isNull() || (w == width() && h == height() && d == depth())) { + return; + } + + detach(); + + FloatImage * img = m->image; + + FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode; + + if (m->alphaMode == AlphaMode_Transparency) + { + if (filter == ResizeFilter_Box) + { + BoxFilter filter(filterWidth); + img = img->resize(filter, w, h, d, wrapMode, 3); + } + else if (filter == ResizeFilter_Triangle) + { + TriangleFilter filter(filterWidth); + img = img->resize(filter, w, h, d, wrapMode, 3); + } + else if (filter == ResizeFilter_Kaiser) + { + KaiserFilter filter(filterWidth); + if (params != NULL) filter.setParameters(params[0], params[1]); + img = img->resize(filter, w, h, d, wrapMode, 3); + } + else //if (filter == ResizeFilter_Mitchell) + { + nvDebugCheck(filter == ResizeFilter_Mitchell); + MitchellFilter filter; + if (params != NULL) filter.setParameters(params[0], params[1]); + img = img->resize(filter, w, h, d, wrapMode, 3); + } + } + else + { + if (filter == ResizeFilter_Box) + { + BoxFilter filter(filterWidth); + img = img->resize(filter, w, h, d, wrapMode); + } + else if (filter == ResizeFilter_Triangle) + { + TriangleFilter filter(filterWidth); + img = img->resize(filter, w, h, d, wrapMode); + } + else if (filter == ResizeFilter_Kaiser) + { + KaiserFilter filter(filterWidth); + if (params != NULL) filter.setParameters(params[0], params[1]); + img = img->resize(filter, w, h, d, wrapMode); + } + else //if (filter == ResizeFilter_Mitchell) + { + nvDebugCheck(filter == ResizeFilter_Mitchell); + MitchellFilter filter; + if (params != NULL) filter.setParameters(params[0], params[1]); + img = img->resize(filter, w, h, d, wrapMode); + } + } + + delete m->image; + m->image = img; +} + +void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter) +{ + if (isNull()) return; + + float filterWidth; + float params[2]; + getDefaultFilterWidthAndParams(filter, &filterWidth, params); + + int w = m->image->width(); + int h = m->image->height(); + int d = m->image->depth(); + + getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type); + + if (m->type == TextureType_2D) + { + nvDebugCheck(d==1); + int md = nv::min(w,h); + w = md; + h = md; + } + else if (m->type == TextureType_Cube) + { + nvDebugCheck(d==1); + nvDebugCheck(w==h); + } + else if (m->type == TextureType_3D) + { + int md = nv::min(nv::min(w,h),d); + w = md; + h = md; + d = md; + } + + resize(w, h, d, filter, filterWidth, params); +} + +void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter) +{ + float filterWidth; + float params[2]; + getDefaultFilterWidthAndParams(filter, &filterWidth, params); + + resize(maxExtent, roundMode, filter, filterWidth, params); +} + +void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, float filterWidth, const float * params) +{ + if (isNull()) return; + + int w = m->image->width(); + int h = m->image->height(); + int d = m->image->depth(); + + getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type); + + resize(w, h, d, filter, filterWidth, params); +} + +bool Surface::canMakeNextMipmap(int min_size /*= 1*/) +{ + if (isNull()) return false; + + return nv::canMakeNextMipmap(width(), height(), depth(), min_size); +} + + +bool Surface::buildNextMipmap(MipmapFilter filter, int min_size /*= 1*/) +{ + float filterWidth; + float params[2]; + getDefaultFilterWidthAndParams(filter, &filterWidth, params); + + return buildNextMipmap(filter, filterWidth, params, min_size); +} + +bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params, int min_size /*= 1*/) +{ + if (!canMakeNextMipmap(min_size)) { + return false; + } + + detach(); + + FloatImage * img = m->image; + + FloatImage::WrapMode wrapMode = (FloatImage::WrapMode)m->wrapMode; + + if (m->alphaMode == AlphaMode_Transparency) + { + if (filter == MipmapFilter_Box) + { + BoxFilter filter(filterWidth); + img = img->downSample(filter, wrapMode, 3); + } + else if (filter == MipmapFilter_Triangle) + { + TriangleFilter filter(filterWidth); + img = img->downSample(filter, wrapMode, 3); + } + else if (filter == MipmapFilter_Kaiser) + { + nvDebugCheck(filter == MipmapFilter_Kaiser); + KaiserFilter filter(filterWidth); + if (params != NULL) filter.setParameters(params[0], params[1]); + img = img->downSample(filter, wrapMode, 3); + } + } + else + { + if (filter == MipmapFilter_Box) + { + if (filterWidth == 0.5f && img->depth() == 1) { + img = img->fastDownSample(); + } + else { + BoxFilter filter(filterWidth); + img = img->downSample(filter, wrapMode); + } + } + else if (filter == MipmapFilter_Triangle) + { + TriangleFilter filter(filterWidth); + img = img->downSample(filter, wrapMode); + } + else //if (filter == MipmapFilter_Kaiser) + { + nvDebugCheck(filter == MipmapFilter_Kaiser); + KaiserFilter filter(filterWidth); + if (params != NULL) filter.setParameters(params[0], params[1]); + img = img->downSample(filter, wrapMode); + } + } + + delete m->image; + m->image = img; + + return true; +} + +bool Surface::buildNextMipmapSolidColor(const float * const color_components) +{ + if (isNull() || (width() == 1 && height() == 1 && depth() == 1)) { + return false; + } + + detach(); + + FloatImage * img = new FloatImage(); + const uint w = max(1, m->image->m_width / 2); + const uint h = max(1, m->image->m_height / 2); + img->allocate(m->image->m_componentCount, w, h); + + for(uint c = 0; c < img->m_componentCount; c++) + { + img->clear(c, color_components[c]); + } + + delete m->image; + m->image = img; + + return true; +} + +void Surface::canvasSize(int w, int h, int d) +{ + nvDebugCheck(w > 0 && h > 0 && d > 0); + + if (isNull() || (w == width() && h == height() && d == depth())) { + return; + } + + detach(); + + FloatImage * img = m->image; + + FloatImage * new_img = new FloatImage; + new_img->allocate(4, w, h, d); + new_img->clear(); + + w = min(uint(w), img->width()); + h = min(uint(h), img->height()); + d = min(uint(d), img->depth()); + + for (int z = 0; z < d; z++) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + new_img->pixel(0, x, y, z) = img->pixel(0, x, y, z); + new_img->pixel(1, x, y, z) = img->pixel(1, x, y, z); + new_img->pixel(2, x, y, z) = img->pixel(2, x, y, z); + new_img->pixel(3, x, y, z) = img->pixel(3, x, y, z); + } + } + } + + delete m->image; + m->image = new_img; + m->type = (d == 1) ? TextureType_2D : TextureType_3D; +} + + +// Color transforms. +void Surface::toLinear(float gamma) +{ + if (isNull()) return; + if (equal(gamma, 1.0f)) return; + + detach(); + + m->image->toLinear(0, 3, gamma); +} + +void Surface::toGamma(float gamma) +{ + if (isNull()) return; + if (equal(gamma, 1.0f)) return; + + detach(); + + m->image->toGamma(0, 3, gamma); +} + +void Surface::toLinear(int channel, float gamma) +{ + if (isNull()) return; + if (equal(gamma, 1.0f)) return; + + detach(); + + m->image->toLinear(channel, 1, gamma); +} + +void Surface::toGamma(int channel, float gamma) +{ + if (isNull()) return; + if (equal(gamma, 1.0f)) return; + + detach(); + + m->image->toGamma(channel, 1, gamma); +} + + + +static float toSrgb(float f) { + if (isNan(f)) f = 0.0f; + else if (f <= 0.0f) f = 0.0f; + else if (f <= 0.0031308f) f = 12.92f * f; + else if (f <= 1.0f) f = (powf(f, 0.41666f) * 1.055f) - 0.055f; + else f = 1.0f; + return f; +} + +void Surface::toSrgb() +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + + const uint count = img->pixelCount(); + for (uint c = 0; c < 3; c++) { + float * channel = img->channel(c); + for (uint i = 0; i < count; i++) { + channel[i] = ::toSrgb(channel[i]); + } + } +} + +static float fromSrgb(float f) { + if (f < 0.0f) f = 0.0f; + else if (f < 0.04045f) f = f / 12.92f; + else if (f <= 1.0f) f = powf((f + 0.055f) / 1.055f, 2.4f); + else f = 1.0f; + return f; +} + +void Surface::toLinearFromSrgb() +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + + const uint count = img->pixelCount(); + for (uint c = 0; c < 3; c++) { + float * channel = img->channel(c); + for (uint i = 0; i < count; i++) { + channel[i] = ::fromSrgb(channel[i]); + } + } +} + +static float toXenonSrgb(float f) { + if (f < 0) f = 0; + else if (f < (1.0f/16.0f)) f = 4.0f * f; + else if (f < (1.0f/8.0f)) f = 0.25f + 2.0f * (f - 0.0625f); + else if (f < 0.5f) f = 0.375f + 1.0f * (f - 0.125f); + else if (f < 1.0f) f = 0.75f + 0.5f * (f - 0.50f); + else f = 1.0f; + return f; +} + +void Surface::toXenonSrgb() +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + + const uint count = img->pixelCount(); + for (uint c = 0; c < 3; c++) { + float * channel = img->channel(c); + for (uint i = 0; i < count; i++) { + channel[i] = ::toXenonSrgb(channel[i]); + } + } +} + + +void Surface::transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]) +{ + if (isNull()) return; + + detach(); + + Matrix xform( + Vector4(w0[0], w0[1], w0[2], w0[3]), + Vector4(w1[0], w1[1], w1[2], w1[3]), + Vector4(w2[0], w2[1], w2[2], w2[3]), + Vector4(w3[0], w3[1], w3[2], w3[3])); + + Vector4 voffset(offset[0], offset[1], offset[2], offset[3]); + + m->image->transform(0, xform, voffset); +} + +// R, G, B, A, 1, 0, -1 +void Surface::swizzle(int r, int g, int b, int a) +{ + if (isNull()) return; + if (r == 0 && g == 1 && b == 2 && a == 3) return; + + detach(); + + m->image->swizzle(0, r, g, b, a); +} + +// color * scale + bias +void Surface::scaleBias(int channel, float scale, float bias) +{ + if (isNull()) return; + if (equal(scale, 1.0f) && equal(bias, 0.0f)) return; + + detach(); + + m->image->scaleBias(channel, 1, scale, bias); +} + +void Surface::clamp(int channel, float low, float high) +{ + if (isNull()) return; + + detach(); + + m->image->clamp(channel, 1, low, high); +} + +void Surface::blend(float red, float green, float blue, float alpha, float t) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + r[i] = lerp(r[i], red, t); + g[i] = lerp(g[i], green, t); + b[i] = lerp(b[i], blue, t); + a[i] = lerp(a[i], alpha, t); + } +} + +void Surface::premultiplyAlpha() +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + r[i] *= a[i]; + g[i] *= a[i]; + b[i] *= a[i]; + } +} + + +void Surface::toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale) +{ + if (isNull()) return; + + detach(); + + float sum = redScale + greenScale + blueScale + alphaScale; + redScale /= sum; + greenScale /= sum; + blueScale /= sum; + alphaScale /= sum; + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + float grey = r[i] * redScale + g[i] * greenScale + b[i] * blueScale + a[i] * alphaScale; + a[i] = b[i] = g[i] = r[i] = grey; + } +} + +// Draw colored border. +void Surface::setBorder(float r, float g, float b, float a) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + const uint w = img->width(); + const uint h = img->height(); + const uint d = img->depth(); + + for (uint z = 0; z < d; z++) + { + for (uint i = 0; i < w; i++) + { + img->pixel(0, i, 0, z) = r; + img->pixel(1, i, 0, z) = g; + img->pixel(2, i, 0, z) = b; + img->pixel(3, i, 0, z) = a; + + img->pixel(0, i, h-1, z) = r; + img->pixel(1, i, h-1, z) = g; + img->pixel(2, i, h-1, z) = b; + img->pixel(3, i, h-1, z) = a; + } + + for (uint i = 0; i < h; i++) + { + img->pixel(0, 0, i, z) = r; + img->pixel(1, 0, i, z) = g; + img->pixel(2, 0, i, z) = b; + img->pixel(3, 0, i, z) = a; + + img->pixel(0, w-1, i, z) = r; + img->pixel(1, w-1, i, z) = g; + img->pixel(2, w-1, i, z) = b; + img->pixel(3, w-1, i, z) = a; + } + } +} + +// Fill image with the given color. +void Surface::fill(float red, float green, float blue, float alpha) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) r[i] = red; + for (uint i = 0; i < count; i++) g[i] = green; + for (uint i = 0; i < count; i++) b[i] = blue; + for (uint i = 0; i < count; i++) a[i] = alpha; +} + + +void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/, int alpha_channel/*= 3*/) +{ + if (isNull()) return; + + detach(); + + alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256); + + m->image->scaleAlphaToCoverage(coverage, alphaRef, alpha_channel); +} + +/*bool Surface::normalizeRange(float * rangeMin, float * rangeMax) +{ + if (m->image == NULL) return false; + + range(0, rangeMin, rangeMax); + + if (*rangeMin == *rangeMax) { + // Single color image. + return false; + } + + const float scale = 1.0f / (*rangeMax - *rangeMin); + const float bias = *rangeMin * scale; + + if (range.x == 0.0f && range.y == 1.0f) { + // Already normalized. + return true; + } + + detach(); + + // Scale to range. + img->scaleBias(0, 4, scale, bias); + //img->clamp(0, 4, 0.0f, 1.0f); + + return true; +}*/ + +// Ideally you should compress/quantize the RGB and M portions independently. +// Once you have M quantized, you would compute the corresponding RGB and quantize that. +void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/) +{ + if (isNull()) return; + + detach(); + + threshold = ::clamp(threshold, 1e-6f, 1.0f); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + float R = nv::clamp(r[i], 0.0f, 1.0f); + float G = nv::clamp(g[i], 0.0f, 1.0f); + float B = nv::clamp(b[i], 0.0f, 1.0f); + +#if 0 + // Baseline, no compression: + r[i] = R; + g[i] = G; + b[i] = B; + a[i] = 1; + +#elif 0 + float M = max(max(R, G), max(B, threshold)); + + r[i] = R / M; + g[i] = G / M; + b[i] = B / M; + + a[i] = (M - threshold) / (1 - threshold); + +#else + // The optimal compressor produces the best results, but can introduce interpolation errors! + float bestM; + float bestError = FLT_MAX; + + //float range = 15; // 4 bit quantization. + //int irange = 16; + float range = 255; // 8 bit quantization. + int irange = 256; + + + float M = max(max(R, G), max(B, threshold)); + int iM = ftoi_ceil((M - threshold) / (1 - threshold) * range); + + //for (int m = 0; m < 256; m++) { // If we use the entire search space, interpolation errors are very likely to occur. + for (int m = max(iM-16, 0); m < min(iM+16, irange); m++) { // If we constrain the search space, these errors disappear. + //for (int m = max(iM-4, 0); m < min(iM+4, irange); m++) { // If we constrain the search space, these errors disappear. + float fm = float(m) / range; + + // Decode M + float M = fm * (1 - threshold) + threshold; + + // Encode. + int ir = ftoi_round(range * nv::saturate(R / M)); + int ig = ftoi_round(range * nv::saturate(G / M)); + int ib = ftoi_round(range * nv::saturate(B / M)); + + // Decode. + float fr = (float(ir) / range) * M; + float fg = (float(ig) / range) * M; + float fb = (float(ib) / range) * M; + + // Measure error. + float error = square(R-fr) + square(G-fg) + square(B-fb); + + if (error < bestError) { + bestError = error; + bestM = M; + } + } + + M = bestM; + r[i] = nv::saturate(R / M); + g[i] = nv::saturate(G / M); + b[i] = nv::saturate(B / M); + a[i] = (M - threshold) / (1 - threshold); +#endif + } +} + +// @@ IC: Dubious merge. Review! +void Surface::fromRGBM(float range/*= 1*/, float threshold/*= 0.25*/) +{ + if (isNull()) return; + + detach(); + + threshold = ::clamp(threshold, 1e-6f, 1.0f); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + float M = a[i] * (range - threshold) + threshold; + + r[i] *= M; + g[i] *= M; + b[i] *= M; + a[i] = 1.0f; + } +} + +// This is dumb way to encode luminance only values. +void Surface::toLM(float range/*= 1*/, float threshold/*= 0.25*/) +{ + if (isNull()) return; + + detach(); + + threshold = ::clamp(threshold, 1e-6f, 1.0f); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + float R = nv::clamp(r[i], 0.0f, 1.0f); + float G = nv::clamp(g[i], 0.0f, 1.0f); + float B = nv::clamp(b[i], 0.0f, 1.0f); + + float M = max(max(R, G), max(B, threshold)); + + float L = (R + G + B) / 3; + r[i] = L / M; + b[i] = L / M; + g[i] = L / M; + a[i] = (M - threshold) / (1 - threshold); + } +} + + +static Color32 toRgbe8(float r, float g, float b) +{ + Color32 c; + float v = max(max(r, g), b); + if (v < 1e-32) { + c.r = c.g = c.b = c.a = 0; + } + else { + int e; + v = frexp(v, &e) * 256.0f / v; + c.r = uint8(clamp(r * v, 0.0f, 255.0f)); + c.g = uint8(clamp(g * v, 0.0f, 255.0f)); + c.b = uint8(clamp(b * v, 0.0f, 255.0f)); + c.a = e + 128; + } + + return c; +} + + +/* + Alen Ladavac @ GDAlgorithms-list on Feb 7, 2007: + One trick that we use to alleviate such problems is to use RGBE5.3 - + i.e. have a fixed point exponent. Note that it is not enough to just + shift the exponent up for 3 bits, but you actually have to convert + each pixel in the RGBE8 texture by unpacking it to floats and then + repacking it with a non-integer exponent, which gives different + mantissas as well. Now your jumps in exponent are much smaller, thus + the bands are not that noticeable. It is still not as good as FP16, + but it is much better than RGBE8. I hope this explanation is + understandable, if not I can fill in more details. + + Though there still are some bands, you can get an even better + precision if you upload that same texture as RGBA16, because you'll + get even more interpolation then, and it works good as a scalable + option for people with more GPU RAM). Alternatively, when some of the + future cards (hopefully, because I'm trying to lobby for that + everywhere :) ), start returning more than 8 bits, your scenes will + automatically look better even without using RGBA16. + + Jon Watte: + The interpolation of 5.3 is the same as that of 8 bits, because it's a + fixed point format. + + The reason using 5.3 helps, is that each bit of quantization in the + interpolation only means 1/8th of a fully significant bit. The + quantization still happens, it's just less visible. The trade-off is + that you get less dynamic range. + + Alen Ladavac: + True, but it is just a small part of the improvement. The greater part + is that RGB values have to be calculated according to the fractional + exponent. With integer exponent, the RGB values jump by a factor of 2 + when each bit changes in exponent, and 5.3 with correct adjustment of + RGB lowers this jump to be about 1.09, which is much better. I may not + be entirely correct on the numbers, which I'm pulling out from my + memory now, but it's a rough estimate. +*/ +/* Ward's version: +static Color32 toRgbe8(float r, float g, float b) +{ + Color32 c; + float v = max(max(r, g), b); + if (v < 1e-32) { + c.r = c.g = c.b = c.a = 0; + } + else { + int e; + v = frexp(v, &e) * 256.0f / v; + c.r = uint8(clamp(r * v, 0.0f, 255.0f)); + c.g = uint8(clamp(g * v, 0.0f, 255.0f)); + c.b = uint8(clamp(b * v, 0.0f, 255.0f)); + c.a = e + 128; + } + + return c; +} +*/ + +// For R9G9B9E5, use toRGBE(9, 5), for Ward's RGBE, use toRGBE(8, 8) +// @@ Note that most Radiance HDR loaders use an exponent bias of 128 instead of 127! This implementation +// matches the OpenGL extension. +void Surface::toRGBE(int mantissaBits, int exponentBits) +{ + // According to the OpenGL extension: + // http://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt + // + // Components red, green, and blue are first clamped (in the process, + // mapping NaN to zero) so: + // + // red_c = max(0, min(sharedexp_max, red)) + // green_c = max(0, min(sharedexp_max, green)) + // blue_c = max(0, min(sharedexp_max, blue)) + // + // where sharedexp_max is (2^N-1)/2^N * 2^(Emax-B), N is the number + // of mantissa bits per component, Emax is the maximum allowed biased + // exponent value (careful: not necessarily 2^E-1 when E is the number of + // exponent bits), bits, and B is the exponent bias. For the RGB9_E5_EXT + // format, N=9, Emax=31, and B=15. + // + // The largest clamped component, max_c, is determined: + // + // max_c = max(red_c, green_c, blue_c) + // + // A preliminary shared exponent is computed: + // + // exp_shared_p = max(-B-1, floor(log2(max_c))) + 1 + B + // + // A refined shared exponent is then computed as: + // + // max_s = floor(max_c / 2^(exp_shared_p - B - N) + 0.5) + // + // { exp_shared_p, 0 <= max_s < 2^N + // exp_shared = { + // { exp_shared_p+1, max_s == 2^N + // + // These integers values in the range 0 to 2^N-1 are then computed: + // + // red_s = floor(red_c / 2^(exp_shared - B - N) + 0.5) + // green_s = floor(green_c / 2^(exp_shared - B - N) + 0.5) + // blue_s = floor(blue_c / 2^(exp_shared - B - N) + 0.5) + + if (isNull()) return; + + detach(); + + // mantissaBits = N + // exponentBits = E + // exponentMax = Emax + // exponentBias = B + // maxValue = sharedexp_max + + // max exponent: 5 -> 31, 8 -> 255 + const int exponentMax = (1 << exponentBits) - 1; + + // exponent bias: 5 -> 15, 8 -> 127 + const int exponentBias = (1 << (exponentBits - 1)) - 1; + + // Maximum representable value: 5 -> 63488, 8 -> HUGE + const float maxValue = float(exponentMax) / float(exponentMax + 1) * float(1 << (exponentMax - exponentBias)); + + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + // Clamp components: + float R = ::clamp(r[i], 0.0f, maxValue); + float G = ::clamp(g[i], 0.0f, maxValue); + float B = ::clamp(b[i], 0.0f, maxValue); + + // Compute max: + float M = max3(R, G, B); + + // Preliminary exponent: + int E = max(- exponentBias - 1, floatExponent(M)) + 1 + exponentBias; + nvDebugCheck(E >= 0 && E < (1 << exponentBits)); + + double denom = pow(2.0, double(E - exponentBias - mantissaBits)); + + // Refine exponent: + int m = ftoi_round(float(M / denom)); + nvDebugCheck(m <= (1 << mantissaBits)); + + if (m == (1 << mantissaBits)) { + denom *= 2; + E += 1; + nvDebugCheck(E < (1 << exponentBits)); + } + + R = floatRound(float(R / denom)); + G = floatRound(float(G / denom)); + B = floatRound(float(B / denom)); + + nvDebugCheck(R >= 0 && R < (1 << mantissaBits)); + nvDebugCheck(G >= 0 && G < (1 << mantissaBits)); + nvDebugCheck(B >= 0 && B < (1 << mantissaBits)); + + // Store as normalized float. + r[i] = R / ((1 << mantissaBits) - 1); + g[i] = G / ((1 << mantissaBits) - 1); + b[i] = B / ((1 << mantissaBits) - 1); + a[i] = float(E) / ((1 << exponentBits) - 1); + } +} + +void Surface::fromRGBE(int mantissaBits, int exponentBits) +{ + // According to the OpenGL extension: + // http://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt + // + // The 1st, 2nd, 3rd, and 4th components are called + // p_red, p_green, p_blue, and p_exp respectively and are treated as + // unsigned integers. These are then used to compute floating-point + // RGB components (ignoring the "Conversion to floating-point" section + // below in this case) as follows: + // + // red = p_red * 2^(p_exp - B - N) + // green = p_green * 2^(p_exp - B - N) + // blue = p_blue * 2^(p_exp - B - N) + // + // where B is 15 (the exponent bias) and N is 9 (the number of mantissa + // bits)." + + + // int exponent = v.field.biasedexponent - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS; + // float scale = (float) pow(2, exponent); + // + // retval[0] = v.field.r * scale; + // retval[1] = v.field.g * scale; + // retval[2] = v.field.b * scale; + + + if (isNull()) return; + + detach(); + + // exponent bias: 5 -> 15, 8 -> 127 + const int exponentBias = (1 << (exponentBits - 1)) - 1; + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + // Expand normalized float to to 9995 + int R = ftoi_round(r[i] * ((1 << mantissaBits) - 1)); + int G = ftoi_round(g[i] * ((1 << mantissaBits) - 1)); + int B = ftoi_round(b[i] * ((1 << mantissaBits) - 1)); + int E = ftoi_round(a[i] * ((1 << exponentBits) - 1)); + + //float scale = ldexpf(1.0f, E - exponentBias - mantissaBits); + float scale = powf(2, float(E - exponentBias - mantissaBits)); + + r[i] = R * scale; + g[i] = G * scale; + b[i] = B * scale; + a[i] = 1; + } +} + +// Y is in the [0, 1] range, while CoCg are in the [-1, 1] range. +void Surface::toYCoCg() +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + float R = r[i]; + float G = g[i]; + float B = b[i]; + + float Y = (2*G + R + B) * 0.25f; + float Co = (R - B); + float Cg = (2*G - R - B) * 0.5f; + + r[i] = Co; + g[i] = Cg; + b[i] = 1.0f; + a[i] = Y; + } +} + +// img.toYCoCg(); +// img.blockScaleCoCg(); +// img.scaleBias(0, 0.5, 0.5); +// img.scaleBias(1, 0.5, 0.5); + +// @@ Add support for threshold. +// We could do something to prevent scale values from adjacent blocks from being too different to each other +// and minimize bilinear interpolation artifacts. +void Surface::blockScaleCoCg(int bits/*= 5*/, float threshold/*= 0.0*/) +{ + if (isNull() || depth() != 1) return; + + detach(); + + FloatImage * img = m->image; + const uint w = img->width(); + const uint h = img->height(); + const uint bw = max(1U, w/4); + const uint bh = max(1U, h/4); + + for (uint bj = 0; bj < bh; bj++) { + for (uint bi = 0; bi < bw; bi++) { + + // Compute per block scale. + float m = 1.0f / 255.0f; + for (uint j = 0; j < 4; j++) { + const uint y = bj*4 + j; + if (y >= h) continue; + + for (uint i = 0; i < 4; i++) { + const uint x = bi*4 + i; + if (x >= w) continue; + + float Co = img->pixel(0, x, y, 0); + float Cg = img->pixel(1, x, y, 0); + + m = max(m, fabsf(Co)); + m = max(m, fabsf(Cg)); + } + } + + float scale = PixelFormat::quantizeCeil(m, bits, 8); + nvDebugCheck(scale >= m); + + // Store block scale in blue channel and scale CoCg. + for (uint j = 0; j < 4; j++) { + for (uint i = 0; i < 4; i++) { + uint x = min(bi*4 + i, w); + uint y = min(bj*4 + j, h); + + float & Co = img->pixel(0, x, y, 0); + float & Cg = img->pixel(1, x, y, 0); + + Co /= scale; + nvDebugCheck(fabsf(Co) <= 1.0f); + + Cg /= scale; + nvDebugCheck(fabsf(Cg) <= 1.0f); + + img->pixel(2, x, y, 0) = scale; + } + } + } + } +} + +void Surface::fromYCoCg() +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + float Co = r[i]; + float Cg = g[i]; + float scale = b[i] * 0.5f; + float Y = a[i]; + + Co *= scale; + Cg *= scale; + + float R = Y + Co - Cg; + float G = Y + Cg; + float B = Y - Co - Cg; + + r[i] = R; + g[i] = G; + b[i] = B; + a[i] = 1.0f; + } +} + +void Surface::toLUVW(float range/*= 1.0f*/) +{ + if (isNull()) return; + + detach(); + + float irange = 1.0f / range; + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + float R = nv::clamp(r[i] * irange, 0.0f, 1.0f); + float G = nv::clamp(g[i] * irange, 0.0f, 1.0f); + float B = nv::clamp(b[i] * irange, 0.0f, 1.0f); + + float L = max(sqrtf(R*R + G*G + B*B), 1e-6f); // Avoid division by zero. + + r[i] = R / L; + g[i] = G / L; + b[i] = B / L; + a[i] = L / sqrtf(3); + } +} + +void Surface::fromLUVW(float range/*= 1.0f*/) +{ + // Decompression is the same as in RGBM. + fromRGBM(range * sqrtf(3)); +} + +void Surface::abs(int channel) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * c = img->channel(channel); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + c[i] = fabsf(c[i]); + } +} + +void Surface::convolve(int channel, int kernelSize, float * kernelData) +{ + if (isNull()) return; + + detach(); + + Kernel2 k(kernelSize, kernelData); + m->image->convolve(k, channel, (FloatImage::WrapMode)m->wrapMode); +} + +// Assumes input has already been scaled by exposure. +void Surface::toneMap(ToneMapper tm, float * parameters) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + const uint count = img->pixelCount(); + + if (tm == ToneMapper_Linear) { + // Clamp preserving the hue. + for (uint i = 0; i < count; i++) { + float m = max3(r[i], g[i], b[i]); + if (m > 1.0f) { + r[i] *= 1.0f / m; + g[i] *= 1.0f / m; + b[i] *= 1.0f / m; + } + } + } + else if (tm == ToneMapper_Reindhart) { + for (uint i = 0; i < count; i++) { + r[i] /= r[i] + 1; + g[i] /= g[i] + 1; + b[i] /= b[i] + 1; + } + } + else if (tm == ToneMapper_Halo) { + for (uint i = 0; i < count; i++) { + r[i] = 1 - exp2f(-r[i]); + g[i] = 1 - exp2f(-g[i]); + b[i] = 1 - exp2f(-b[i]); + } + } + else if (tm == ToneMapper_Lightmap) { + // @@ Goals: + // Preserve hue. + // Avoid clamping abrubtly. + // Minimize color difference along most of the color range. [0, alpha) + for (uint i = 0; i < count; i++) { + float m = max3(r[i], g[i], b[i]); + if (m > 1.0f) { + r[i] *= 1.0f / m; + g[i] *= 1.0f / m; + b[i] *= 1.0f / m; + } + } + } +} + +void Surface::toLogScale(int channel, float base) { + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * c = img->channel(channel); + + float scale = 1.0f / log2f(base); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + c[i] = log2f(c[i]) * scale; + } +} + +void Surface::fromLogScale(int channel, float base) { + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * c = img->channel(channel); + + float scale = log2f(base); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + c[i] = exp2f(c[i] * scale); + } +} + + + +/* +void Surface::blockLuminanceScale(float scale) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + + //float * r = img->channel(0); + //float * g = img->channel(1); + //float * b = img->channel(2); + //float * a = img->channel(3); + + const uint w = img->width(); + const uint h = img->height(); + const uint bw = max(1U, w/4); + const uint bh = max(1U, h/4); + + Vector3 L = normalize(Vector3(1, 1, 1)); + + for (uint bj = 0; bj < bh; bj++) { + for (uint bi = 0; bi < bw; bi++) { + + // Compute block centroid. + Vector3 centroid(0.0f); + int count = 0; + for (uint j = 0; j < 4; j++) { + const uint y = bj*4 + j; + if (y >= h) continue; + + for (uint i = 0; i < 4; i++) { + const uint x = bi*4 + i; + if (x >= w) continue; + + float r = img->pixel(x, y, 0); + float g = img->pixel(x, y, 1); + float b = img->pixel(x, y, 2); + Vector3 rgb(r, g, b); + + centroid += rgb; + count++; + } + } + + centroid /= float(count); + + // Project to luminance plane. + for (uint j = 0; j < 4; j++) { + const uint y = bj*4 + j; + if (y >= h) continue; + + for (uint i = 0; i < 4; i++) { + const uint x = bi*4 + i; + if (x >= w) continue; + + float & r = img->pixel(x, y, 0); + float & g = img->pixel(x, y, 1); + float & b = img->pixel(x, y, 2); + Vector3 rgb(r, g, b); + + Vector3 delta = rgb - centroid; + + delta -= scale * dot(delta, L) * L; + + r = centroid.x + delta.x; + g = centroid.y + delta.y; + b = centroid.z + delta.z; + } + } + } + } +} +*/ + +/* +void Surface::toJPEGLS() +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + + const uint count = img->width() * img->height(); + for (uint i = 0; i < count; i++) { + float R = nv::clamp(r[i], 0.0f, 1.0f); + float G = nv::clamp(g[i], 0.0f, 1.0f); + float B = nv::clamp(b[i], 0.0f, 1.0f); + + r[i] = R-G; + g[i] = G; + b[i] = B-G; + } +} + +void Surface::fromJPEGLS() +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + + const uint count = img->width() * img->height(); + for (uint i = 0; i < count; i++) { + float R = nv::clamp(r[i], -1.0f, 1.0f); + float G = nv::clamp(g[i], 0.0f, 1.0f); + float B = nv::clamp(b[i], -1.0f, 1.0f); + + r[i] = R+G; + g[i] = G; + b[i] = B+G; + } +} +*/ + + +// If dither is true, this uses Floyd-Steinberg dithering method. +void Surface::binarize(int channel, float threshold, bool dither) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + + if (!dither) { + float * c = img->channel(channel); + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + c[i] = float(c[i] > threshold); + } + } + else { + const uint w = img->width(); + const uint h = img->height(); + const uint d = img->depth(); + + float * row0 = new float[(w+2)]; + float * row1 = new float[(w+2)]; + + // @@ Extend Floyd-Steinberg dithering to 3D properly. + for (uint z = 0; z < d; z++) { + memset(row0, 0, sizeof(float)*(w+2)); + memset(row1, 0, sizeof(float)*(w+2)); + + for (uint y = 0; y < h; y++) { + for (uint x = 0; x < w; x++) { + + float & f = img->pixel(channel, x, y, 0); + + // Add error and quantize. + float qf = float(f + row0[1+x] > threshold); + + // Compute new error: + float diff = f - qf; + + // Store color. + f = qf; + + // Propagate new error. + row0[1+x+1] += (7.0f / 16.0f) * diff; + row1[1+x-1] += (3.0f / 16.0f) * diff; + row1[1+x+0] += (5.0f / 16.0f) * diff; + row1[1+x+1] += (1.0f / 16.0f) * diff; + } + + swap(row0, row1); + memset(row1, 0, sizeof(float)*(w+2)); + } + } + + delete [] row0; + delete [] row1; + } +} + +// Uniform quantizer. +// Assumes input is in [0, 1] range. Output is in the [0, 1] range, but rounded to the middle of each bin. +// If exactEndPoints is true, [0, 1] are represented exactly, and the correponding bins are half the size, so quantization is not truly uniform. +// When dither is true, this uses Floyd-Steinberg dithering. +void Surface::quantize(int channel, int bits, bool exactEndPoints, bool dither) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + + float scale, offset0, offset1; + if (exactEndPoints) { + // floor(x*(range-1) + 0.5) / (range-1) + scale = float((1 << bits) - 1); + offset0 = 0.5f; + offset1 = 0.0f; + } + else { + // (floor(x*range) + 0.5) / range + scale = float(1 << bits); + offset0 = 0.0f; + offset1 = 0.5f; + } + + if (!dither) { + float * c = img->channel(channel); + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + c[i] = saturate((floorf(c[i] * scale + offset0) + offset1) / scale); + } + } + else { + const uint w = img->width(); + const uint h = img->height(); + const uint d = img->depth(); + + float * row0 = new float[(w+2)]; + float * row1 = new float[(w+2)]; + + for (uint z = 0; z < d; z++) { + memset(row0, 0, sizeof(float)*(w+2)); + memset(row1, 0, sizeof(float)*(w+2)); + + for (uint y = 0; y < h; y++) { + for (uint x = 0; x < w; x++) { + + float & f = img->pixel(channel, x, y, 0); + + // Add error and quantize. + float qf = saturate((floorf((f + row0[1+x]) * scale + offset0) + offset1) / scale); + + // Compute new error: + float diff = f - qf; + + // Store color. + f = qf; + + // Propagate new error. + row0[1+x+1] += (7.0f / 16.0f) * diff; + row1[1+x-1] += (3.0f / 16.0f) * diff; + row1[1+x+0] += (5.0f / 16.0f) * diff; + row1[1+x+1] += (1.0f / 16.0f) * diff; + } + + swap(row0, row1); + memset(row1, 0, sizeof(float)*(w+2)); + } + } + + delete [] row0; + delete [] row1; + } +} + + + +// Set normal map options. +void Surface::toNormalMap(float sm, float medium, float big, float large) +{ + if (isNull()) return; + + detach(); + + const Vector4 filterWeights(sm, medium, big, large); + + const FloatImage * img = m->image; + m->image = nv::createNormalMap(img, (FloatImage::WrapMode)m->wrapMode, filterWeights); + + delete img; + + m->isNormalMap = true; +} + +void Surface::normalizeNormalMap() +{ + if (isNull()) return; + if (!m->isNormalMap) return; + + detach(); + + nv::normalizeNormalMap(m->image); +} + +void Surface::transformNormals(NormalTransform xform) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + float & x = img->pixel(0, i); + float & y = img->pixel(1, i); + float & z = img->pixel(2, i); + Vector3 n(x, y, z); + + n = normalizeSafe(n, Vector3(0.0f), 0.0f); + + if (xform == NormalTransform_Orthographic) { + n.z = 0.0f; + } + else if (xform == NormalTransform_Stereographic) { + n.x = n.x / (1 + n.z); + n.y = n.y / (1 + n.z); + n.z = 0.0f; + } + else if (xform == NormalTransform_Paraboloid) { + float a = (n.x * n.x) + (n.y * n.y); + float b = n.z; + float c = -1.0f; + float discriminant = b * b - 4.0f * a * c; + float t = (-b + sqrtf(discriminant)) / (2.0f * a); + n.x = n.x * t; + n.y = n.y * t; + n.z = 0.0f; + } + else if (xform == NormalTransform_Quartic) { + // Use Newton's method to solve equation: + // f(t) = 1 - zt - (x^2+y^2)t^2 + x^2y^2t^4 = 0 + // f'(t) = - z - 2(x^2+y^2)t + 4x^2y^2t^3 + + // Initial approximation: + float a = (n.x * n.x) + (n.y * n.y); + float b = n.z; + float c = -1.0f; + float discriminant = b * b - 4.0f * a * c; + float t = (-b + sqrtf(discriminant)) / (2.0f * a); + + float d = fabs(n.z * t - (1 - n.x*n.x*t*t) * (1 - n.y*n.y*t*t)); + + while (d > 0.0001) { + float ft = 1 - n.z * t - (n.x*n.x + n.y*n.y)*t*t + n.x*n.x*n.y*n.y*t*t*t*t; + float fit = - n.z - 2*(n.x*n.x + n.y*n.y)*t + 4*n.x*n.x*n.y*n.y*t*t*t; + t -= ft / fit; + d = fabs(n.z * t - (1 - n.x*n.x*t*t) * (1 - n.y*n.y*t*t)); + }; + + n.x = n.x * t; + n.y = n.y * t; + n.z = 0.0f; + } + /*else if (xform == NormalTransform_DualParaboloid) { + + }*/ + + x = n.x; + y = n.y; + z = n.z; + } +} + +void Surface::reconstructNormals(NormalTransform xform) +{ + if (isNull()) return; + + detach(); + + FloatImage * img = m->image; + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + float & x = img->pixel(0, i); + float & y = img->pixel(1, i); + float & z = img->pixel(2, i); + Vector3 n(x, y, z); + + if (xform == NormalTransform_Orthographic) { + n.z = sqrtf(1 - nv::clamp(n.x * n.x + n.y * n.y, 0.0f, 1.0f)); + } + else if (xform == NormalTransform_Stereographic) { + float denom = 2.0f / (1 + nv::clamp(n.x * n.x + n.y * n.y, 0.0f, 1.0f)); + n.x *= denom; + n.y *= denom; + n.z = denom - 1; + } + else if (xform == NormalTransform_Paraboloid) { + n.x = n.x; + n.y = n.y; + n.z = 1.0f - nv::clamp(n.x * n.x + n.y * n.y, 0.0f, 1.0f); + n = normalizeSafe(n, Vector3(0.0f), 0.0f); + } + else if (xform == NormalTransform_Quartic) { + n.x = n.x; + n.y = n.y; + n.z = nv::clamp((1 - n.x * n.x) * (1 - n.y * n.y), 0.0f, 1.0f); + n = normalizeSafe(n, Vector3(0.0f), 0.0f); + } + /*else if (xform == NormalTransform_DualParaboloid) { + + }*/ + + x = n.x; + y = n.y; + z = n.z; + } +} + +void Surface::toCleanNormalMap() +{ + if (isNull()) return; + + detach(); + + const uint count = m->image->pixelCount(); + for (uint i = 0; i < count; i++) { + float x = m->image->pixel(0, i); + float y = m->image->pixel(1, i); + + m->image->pixel(2, i) = x*x + y*y; + } +} + +// [-1,1] -> [ 0,1] +void Surface::packNormals(float scale/*= 0.5f*/, float bias/*= 0.5f*/) { + if (isNull()) return; + detach(); + m->image->scaleBias(0, 3, scale, bias); +} + +// [ 0,1] -> [-1,1] +void Surface::expandNormals(float scale/*= 2.0f*/, float bias/*= - 2.0f * 127.0f / 255.0f*/) { + if (isNull()) return; + detach(); + m->image->scaleBias(0, 3, scale, bias); +} + + +// Create a Toksvig map for this normal map. +// http://blog.selfshadow.com/2011/07/22/specular-showdown/ +// @@ Assumes this is a normal map expanded in the [-1, 1] range. +Surface Surface::createToksvigMap(float power) const +{ + if (isNull()) return Surface(); + + // @@ TODO + + return Surface(); +} + +// @@ Should I add support for LEAN maps? That requires 5 terms, which would have to be encoded in two textures. +// There's nothing stopping us from having 5 channels in a surface, and then, let the user swizzle them as they wish. +// CLEAN maps are probably more practical, though. +// http://www.cs.umbc.edu/~olano/papers/lean/ +// http://gaim.umbc.edu/2011/07/24/shiny-and-clean/ +// http://gaim.umbc.edu/2011/07/26/on-error/ +NVTT_API Surface Surface::createCleanMap() const +{ + if (isNull()) return Surface(); + + // @@ TODO + + return Surface(); +} + + +void Surface::flipX() +{ + if (isNull()) return; + + detach(); + + m->image->flipX(); +} + +void Surface::flipY() +{ + if (isNull()) return; + + detach(); + + m->image->flipY(); +} + +void Surface::flipZ() +{ + if (isNull()) return; + + detach(); + + m->image->flipZ(); +} + +Surface Surface::createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const +{ + Surface s; + + if (isNull()) return s; + if (x0 < 0 || x1 > width() || x0 > x1) return s; + if (y0 < 0 || y1 > height() || y0 > y1) return s; + if (z0 < 0 || z1 > depth() || z0 > z1) return s; + if (x1 >= width() || y1 >= height() || z1 >= depth()) return s; + + FloatImage * img = s.m->image = new FloatImage; + + int w = x1 - x0 + 1; + int h = y1 - y0 + 1; + int d = z1 - z0 + 1; + + img->allocate(4, w, h, d); + + for (int c = 0; c < 4; c++) { + for (int z = 0; z < d; z++) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + img->pixel(c, x, y, z) = m->image->pixel(c, x0+x, y0+y, z0+z); + } + } + } + } + + return s; +} + +bool Surface::copyChannel(const Surface & srcImage, int srcChannel) +{ + return copyChannel(srcImage, srcChannel, srcChannel); +} + +bool Surface::copyChannel(const Surface & srcImage, int srcChannel, int dstChannel) +{ + if (srcChannel < 0 || srcChannel > 3 || dstChannel < 0 || dstChannel > 3) return false; + + FloatImage * dst = m->image; + const FloatImage * src = srcImage.m->image; + + if (!sameLayout(dst, src)) { + return false; + } + nvDebugCheck(dst->componentCount() == 4 && src->componentCount() == 4); + + detach(); + + dst = m->image; + + memcpy(dst->channel(dstChannel), src->channel(srcChannel), dst->pixelCount()*sizeof(float)); + + return true; +} + +bool Surface::addChannel(const Surface & srcImage, int srcChannel, int dstChannel, float scale) +{ + if (srcChannel < 0 || srcChannel > 3 || dstChannel < 0 || dstChannel > 3) return false; + + FloatImage * dst = m->image; + const FloatImage * src = srcImage.m->image; + + if (!sameLayout(dst, src)) { + return false; + } + nvDebugCheck(dst->componentCount() == 4 && src->componentCount() == 4); + + detach(); + + dst = m->image; + + float * d = dst->channel(dstChannel); + const float * s = src->channel(srcChannel); + + const uint count = src->pixelCount(); + for (uint i = 0; i < count; i++) { + d[i] += s[i] * scale; + } + + return true; +} + + +bool Surface::copy(const Surface & srcImage, int xsrc, int ysrc, int zsrc, int xsize, int ysize, int zsize, int xdst, int ydst, int zdst) +{ + if (xsrc < 0 || ysrc < 0 || zsrc < 0) return false; + if (xdst < 0 || ydst < 0 || zdst < 0) return false; + + FloatImage * dst = m->image; + const FloatImage * src = srcImage.m->image; + + if (U32(xsrc + xsize) > src->width() || U32(ysrc + ysize) > src->height() || U32(zsrc + zsize) > src->depth()) return false; + if (U32(xdst + xsize) > dst->width() || U32(ydst + ysize) > dst->height() || U32(zdst + zsize) > dst->depth()) return false; + + detach(); + + // For each channel. + for(int i = 0; i < 4; i++) { + float * d = dst->channel(i); + const float * s = src->channel(i); + + // Copy region from src to dst. + for (int z = 0; z < zsize; z++) { + for (int y = 0; y < ysize; y++) { + for (int x = 0; x < xsize; x++) { + d[dst->index(xdst + x, ydst + y, zdst + z)] = s[src->index(xsrc + x, ysrc + y, zsrc + z)]; + } + } + } + } + + return true; +} + + +// Draw colored border around atlas elements. +void Surface::setAtlasBorder(int aw, int ah, float r, float g, float b, float a) +{ + if (isNull()) return; + if (aw <= 0) return; + if (ah <= 0) return; + + detach(); + + FloatImage * img = m->image; + const uint w = img->width(); + const uint h = img->height(); + const uint d = img->depth(); + + // @@ Ideally the reminder of these divisions should be 0. + uint tile_height = h / ah; + uint tile_width = w / aw; + + // Note that this renders two consecutive lines between tiles. In theory we could just have one, but this way I think we have better rotation invariance. + + for (uint z = 0; z < d; z++) + { + // Horizontal lines: + for (uint i = 0, y = 0; i < uint(ah); i++, y += tile_height) + { + for (uint x = 0; x < w; x++) + { + img->pixel(0, x, y, z) = r; + img->pixel(1, x, y, z) = g; + img->pixel(2, x, y, z) = b; + img->pixel(3, x, y, z) = a; + + img->pixel(0, x, y + tile_height - 1, z) = r; + img->pixel(1, x, y + tile_height - 1, z) = g; + img->pixel(2, x, y + tile_height - 1, z) = b; + img->pixel(3, x, y + tile_height - 1, z) = a; + } + } + + // Vertical lines: + for (uint i = 0, x = 0; i < uint(ah); i++, x += tile_width) + { + for (uint y = 0; y < h; y++) + { + img->pixel(0, x, y, z) = r; + img->pixel(1, x, y, z) = g; + img->pixel(2, x, y, z) = b; + img->pixel(3, x, y, z) = a; + + img->pixel(0, x + tile_width - 1, y, z) = r; + img->pixel(1, x + tile_width - 1, y, z) = g; + img->pixel(2, x + tile_width - 1, y, z) = b; + img->pixel(3, x + tile_width - 1, y, z) = a; + } + } + } +} + + + +float nvtt::rmsError(const Surface & reference, const Surface & image) +{ + return nv::rmsColorError(reference.m->image, image.m->image, reference.alphaMode() == nvtt::AlphaMode_Transparency); +} + + +float nvtt::rmsAlphaError(const Surface & reference, const Surface & image) +{ + return nv::rmsAlphaError(reference.m->image, image.m->image); +} + + +float nvtt::cieLabError(const Surface & reference, const Surface & image) +{ + return nv::cieLabError(reference.m->image, image.m->image); +} + +float nvtt::angularError(const Surface & reference, const Surface & image) +{ + //return nv::averageAngularError(reference.m->image, image.m->image); + return nv::rmsAngularError(reference.m->image, image.m->image); +} + + +Surface nvtt::diff(const Surface & reference, const Surface & image, float scale) +{ + const FloatImage * ref = reference.m->image; + const FloatImage * img = image.m->image; + + if (!sameLayout(img, ref)) { + return Surface(); + } + + nvDebugCheck(img->componentCount() == 4); + nvDebugCheck(ref->componentCount() == 4); + + nvtt::Surface diffImage; + FloatImage * diff = diffImage.m->image = new FloatImage; + diff->allocate(4, img->width(), img->height(), img->depth()); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + float r0 = img->pixel(0, i); + float g0 = img->pixel(1, i); + float b0 = img->pixel(2, i); + //float a0 = img->pixel(3, i); + float r1 = ref->pixel(0, i); + float g1 = ref->pixel(1, i); + float b1 = ref->pixel(2, i); + float a1 = ref->pixel(3, i); + + float dr = r0 - r1; + float dg = g0 - g1; + float db = b0 - b1; + //float da = a0 - a1; + + if (reference.alphaMode() == nvtt::AlphaMode_Transparency) + { + dr *= a1; + dg *= a1; + db *= a1; + } + + diff->pixel(0, i) = dr * scale; + diff->pixel(1, i) = dg * scale; + diff->pixel(2, i) = db * scale; + diff->pixel(3, i) = a1; + } + + return diffImage; +} + +float nvtt::rmsToneMappedError(const Surface & reference, const Surface & img, float exposure) +{ + // @@ We could do this in the rms function without having to create image copies. + Surface r = reference; + Surface i = img; + + // @@ Ideally we should use our Reindhart operator. Add Reindhart_L & Reindhart_M ? + + float scale = 1.0f / exposure; + + r.scaleBias(0, scale, 0); r.scaleBias(1, scale, 0); r.scaleBias(2, scale, 0); + r.toneMap(ToneMapper_Reindhart, NULL); + r.toSrgb(); + + i.scaleBias(0, scale, 0); i.scaleBias(1, scale, 0); i.scaleBias(2, scale, 0); + i.toneMap(ToneMapper_Reindhart, NULL); + i.toSrgb(); + + return nv::rmsColorError(r.m->image, i.m->image, reference.alphaMode() == nvtt::AlphaMode_Transparency); +} + + +Surface nvtt::histogram(const Surface & img, int width, int height) +{ + float min_color[3], max_color[3]; + img.range(0, &min_color[0], &max_color[0]); + img.range(1, &min_color[1], &max_color[1]); + img.range(2, &min_color[2], &max_color[2]); + + float minRange = nv::min3(min_color[0], min_color[1], min_color[2]); + float maxRange = nv::max3(max_color[0], max_color[1], max_color[2]); + + if (maxRange > 16) maxRange = 16; + + return histogram(img, /*minRange*/0, maxRange, width, height); +} + +#include "nvcore/Array.inl" +#include "nvmath/PackedFloat.h" +#include + +nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRange, int width, int height) +{ + nv::Array buckets; + buckets.resize(width, Vector3(0)); + + int w = img.width(); + int h = img.height(); + int d = img.depth(); + + const float * r = img.channel(0); + const float * g = img.channel(1); + const float * b = img.channel(2); + const float * a = img.channel(3); + +#if 0 + for (int z = 0; z < d; z++) + for (int y = 0; y < h; y++) + for (int x = 0; x < w; x++) + { + int i = x + y * w + z * w * d; + + float fr = (r[i] - minRange) / (maxRange - minRange); + float fg = (g[i] - minRange) / (maxRange - minRange); + float fb = (b[i] - minRange) / (maxRange - minRange); + + int R = ftoi_round(fr * (width - 1)); + int G = ftoi_round(fg * (width - 1)); + int B = ftoi_round(fb * (width - 1)); + + R = nv::clamp(R, 0, width-1); + G = nv::clamp(G, 0, width-1); + B = nv::clamp(B, 0, width-1); + + // Alpha weighted histogram? + float A = nv::saturate(a[i]); + + buckets[R].x += A; + buckets[G].y += A; + buckets[B].z += A; + } + +#elif 1 + + float exposure = 0.22f; + + //int E = 8, M = 23; // float + int E = 5, M = 10; // half + //int E = 5, M = 9; // rgb9e5 + //int E = 5, M = 6; // r11g11b10 + + for (int e = 0; e < (1 << E); e++) + { + /*if (e == 0x1f) { // Skip NaN and inf. + continue; + }*/ + if (e == 0) { // Skip denormals. + continue; + } + + for (int m = 0; m < (1 << M); m++) + { + Float754 F; + F.field.negative = 0; + F.field.biasedexponent = e + 128 - (1 << (E - 1)) - 1; // E=5 -> 128 - 15 + F.field.mantissa = m << (23 - M); + + // value = (1 + mantissa) * 2^(e-15) + + // @@ Handle denormals. + + float fc = F.value; + + // Tone mapping: + fc /= exposure; + //fc /= (fc + 1); // Reindhart tone mapping. + fc = 1 - exp2f(-fc); // Halo2 tone mapping. + + // Gamma space conversion: + //fc = sqrtf(fc); + fc = powf(fc, 1.0f/2.2f); + //fc = toSrgb(fc); + + //fc = (fc - 0.5f) * 8; // zoom in + //if (fc < 0 || fc > 1) continue; + + //printf("%f\n", fc); + + int c = ftoi_round(fc * (width - 1) / 1); + c = clamp(c, 0, width - 1); + + buckets[c] += Vector3(1); + } + } + +#else + + float exposure = 0.22f; + + int R = 8, M = 8; + //int R = 6, M = 8; + //int R = 9, M = 5; + + float threshold = 1.0f / (1 << M); + //float threshold = 0.25f; + + for (int r = 0; r < (1 << R); r++) + { + float fr = float(r) / ((1 << R) - 1); + + for (int m = 0; m < (1 << M); m++) + { + float fm = float(m) / ((1 << M) - 1); + float M = fm * (1 - threshold) + threshold; + + float fc = fr * M; + + fc /= exposure; + + //fc /= (fc + 1); // Reindhart tone mapping. + fc = 1 - exp2f(-fc); // Halo2 tone mapping. + + // Gamma space conversion: + //fc = sqrtf(fc); + fc = powf(fc, 1.0f/2.2f); + //fc = toSrgb(fc); + + //fc = (fc - 0.5f) * 8; // zoom in + //if (fc < 0 || fc > 1) continue; + + int c = ftoi_round(fc * (width - 1)); + c = clamp(c, 0, width - 1); + + buckets[c] += Vector3(1); + } + } + + //buckets[0] = Vector3(1); // Hack, for prettier histograms. + +#endif + + + // Compute largerst height. + float maxh = 0; + for (int i = 0; i < width; i++) { + maxh = nv::max(maxh, nv::max3(buckets[i].x, buckets[i].y, buckets[i].z)); + } + + printf("maxh = %f\n", maxh); + //maxh = 80; + maxh = 256; + + // Draw histogram. + nvtt::Surface hist; + hist.setImage(width, height, 1); + + for (int y = 0; y < height; y++) { + float fy = 1.0f - float(y) / (height - 1); + for (int x = 0; x < width; x++) { + hist.m->image->pixel(0, x, y, /*z=*/0) = fy < (buckets[x].x / maxh); + hist.m->image->pixel(1, x, y, /*z=*/0) = fy < (buckets[x].y / maxh); + hist.m->image->pixel(2, x, y, /*z=*/0) = fy < (buckets[x].z / maxh); + } + } + + return hist; +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/TaskDispatcher.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/TaskDispatcher.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/TaskDispatcher.h @@ -0,0 +1,149 @@ + +#include "nvtt.h" + +// OpenMP +// http://en.wikipedia.org/wiki/OpenMP +#if defined(HAVE_OPENMP) +#include +#endif + +// Gran Central Dispatch (GCD/libdispatch) +// http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html +#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) +#define HAVE_GCD 1 +#include +#endif + +// Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime: +// http://msdn.microsoft.com/en-us/library/dd504870.aspx +#if NV_OS_WIN32 && _MSC_VER >= 1600 +#define HAVE_PPL 1 +#include +#endif + +// Intel Thread Building Blocks (TBB). +// http://www.threadingbuildingblocks.org/ +#if defined(HAVE_TBB) +#include +#endif + +#include "nvthread/ParallelFor.h" + + +namespace nvtt { + + struct SequentialTaskDispatcher : public TaskDispatcher + { + virtual void dispatch(Task * task, void * context, int count) { + for (int i = 0; i < count; i++) { + task(context, i); + } + } + }; + + struct ParallelTaskDispatcher : public TaskDispatcher + { + virtual void dispatch(Task * task, void * context, int count) { + nv::ParallelFor parallelFor(task, context); + parallelFor.run(count); // @@ Add support for custom grain. + } + }; + + +#if defined(HAVE_OPENMP) + + struct OpenMPTaskDispatcher : public TaskDispatcher + { + virtual void dispatch(Task * task, void * context, int count) { + #pragma omp parallel for + for (int i = 0; i < count; i++) { + task(context, i); + } + } + }; + +#endif + +#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) + + // Task dispatcher using Apple's Grand Central Dispatch. + struct AppleTaskDispatcher : public TaskDispatcher + { + // @@ This is really lame, but I refuse to use size_t in the public API. + struct BlockContext { + Task * task; + void * context; + }; + + static void block(void * context, size_t id) { + BlockContext * ctx = (BlockContext *)context; + ctx->task(ctx->context, int(id)); + } + + virtual void dispatch(Task * task, void * context, int count) { + dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); + BlockContext blockCtx = { task, context }; + dispatch_apply_f(count, q, &blockCtx, block); + } + }; + +#endif + +#if defined(HAVE_PPL) + + struct TaskFunctor { + TaskFunctor(Task * task, void * context) : task(task), context(context) {} + void operator()(int n) const { + task(context, n); + } + Task * task; + void * context; + }; + + // Task dispatcher using Microsoft's concurrency runtime. + struct MicrosoftTaskDispatcher : public TaskDispatcher + { + virtual void dispatch(Task * task, void * context, int count) + { + TaskFunctor func(task, context); + Concurrency::parallel_for(0, count, func); + } + }; + +#endif + +#if defined(HAVE_TBB) + + struct TaskFunctor { + TaskFunctor(Task * task, void * context) : task(task), context(context) {} + void operator()(int & n) const { + task(context, n); + } + Task * task; + void * context; + }; + + // Task dispatcher using Inte's Thread Building Blocks. + struct IntelTaskDispatcher : public TaskDispatcher + { + virtual void dispatch(Task * task, void * context, int count) { + parallel_for(blocked_range(0, count, 1), TaskFunctor(task, context)); + } + }; + +#endif + +#if defined(HAVE_OPENMP) + typedef OpenMPTaskDispatcher ConcurrentTaskDispatcher; +#elif defined(HAVE_TBB) + typedef IntelTaskDispatcher ConcurrentTaskDispatcher; +#elif defined(HAVE_PPL) + typedef MicrosoftTaskDispatcher ConcurrentTaskDispatcher; +#elif defined(HAVE_GCD) + typedef AppleTaskDispatcher ConcurrentTaskDispatcher; +#else + //typedef SequentialTaskDispatcher ConcurrentTaskDispatcher; + typedef ParallelTaskDispatcher ConcurrentTaskDispatcher; +#endif + +} // namespace nvtt Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/BitmapTable.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/BitmapTable.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/BitmapTable.h @@ -0,0 +1,1886 @@ + + +/* +static void doPrecomputation() +{ + uint bitmaps[1024]; + + int indices[16]; + int num = 0; + + // Compute bitmaps with 3 clusters: + + // first cluster [0,i) is at the start + for( int m = 0; m < 16; ++m ) + { + indices[m] = 0; + } + const int imax = 15; + for( int i = imax; i >= 0; --i ) + { + // second cluster [i,j) is half along + for( int m = i; m < 16; ++m ) + { + indices[m] = 2; + } + const int jmax = ( i == 0 ) ? 15 : 16; + for( int j = jmax; j >= i; --j ) + { + // last cluster [j,k) is at the end + if( j < 16 ) + { + indices[j] = 1; + } + + uint bitmap = 0; + + for(int p = 0; p < 16; p++) { + bitmap |= indices[p] << (p * 2); + } + + bitmaps[num] = bitmap; + + num++; + } + } + nvDebugCheck(num == 151); + + // Align to 160. + for(int i = 0; i < 9; i++) + { + bitmaps[num] = 0x555AA000; + num++; + } + nvDebugCheck(num == 160); + + // Append bitmaps with 4 clusters: + + // first cluster [0,i) is at the start + for( int m = 0; m < 16; ++m ) + { + indices[m] = 0; + } + for( int i = imax; i >= 0; --i ) + { + // second cluster [i,j) is one third along + for( int m = i; m < 16; ++m ) + { + indices[m] = 2; + } + const int jmax = ( i == 0 ) ? 15 : 16; + for( int j = jmax; j >= i; --j ) + { + // third cluster [j,k) is two thirds along + for( int m = j; m < 16; ++m ) + { + indices[m] = 3; + } + + int kmax = ( j == 0 ) ? 15 : 16; + for( int k = kmax; k >= j; --k ) + { + // last cluster [k,n) is at the end + if( k < 16 ) + { + indices[k] = 1; + } + + uint bitmap = 0; + + bool hasThree = false; + for(int p = 0; p < 16; p++) { + bitmap |= indices[p] << (p * 2); + + if (indices[p] == 3) hasThree = true; + } + + if (hasThree) { + bitmaps[num] = bitmap; + num++; + } + } + } + } + nvDebugCheck(num == 975); + + // Align to 1024. + for(int i = 0; i < 49; i++) + { + bitmaps[num] = 0x555AA000; + num++; + } + + nvDebugCheck(num == 1024); + + printf("uint bitmaps[992] =\n{\n"); + for (int i = 0; i < 992; i++) + { + printf("\t0x%.8X,\n", bitmaps[i]); + } + printf("};\n"); +} +*/ + + +const static uint s_bitmapTable[992] = +{ + 0x80000000, + 0x40000000, + 0xA0000000, + 0x60000000, + 0x50000000, + 0xA8000000, + 0x68000000, + 0x58000000, + 0x54000000, + 0xAA000000, + 0x6A000000, + 0x5A000000, + 0x56000000, + 0x55000000, + 0xAA800000, + 0x6A800000, + 0x5A800000, + 0x56800000, + 0x55800000, + 0x55400000, + 0xAAA00000, + 0x6AA00000, + 0x5AA00000, + 0x56A00000, + 0x55A00000, + 0x55600000, + 0x55500000, + 0xAAA80000, + 0x6AA80000, + 0x5AA80000, + 0x56A80000, + 0x55A80000, + 0x55680000, + 0x55580000, + 0x55540000, + 0xAAAA0000, + 0x6AAA0000, + 0x5AAA0000, + 0x56AA0000, + 0x55AA0000, + 0x556A0000, + 0x555A0000, + 0x55560000, + 0x55550000, + 0xAAAA8000, + 0x6AAA8000, + 0x5AAA8000, + 0x56AA8000, + 0x55AA8000, + 0x556A8000, + 0x555A8000, + 0x55568000, + 0x55558000, + 0x55554000, + 0xAAAAA000, + 0x6AAAA000, + 0x5AAAA000, + 0x56AAA000, + 0x55AAA000, + 0x556AA000, + 0x555AA000, + 0x5556A000, + 0x5555A000, + 0x55556000, + 0x55555000, + 0xAAAAA800, + 0x6AAAA800, + 0x5AAAA800, + 0x56AAA800, + 0x55AAA800, + 0x556AA800, + 0x555AA800, + 0x5556A800, + 0x5555A800, + 0x55556800, + 0x55555800, + 0x55555400, + 0xAAAAAA00, + 0x6AAAAA00, + 0x5AAAAA00, + 0x56AAAA00, + 0x55AAAA00, + 0x556AAA00, + 0x555AAA00, + 0x5556AA00, + 0x5555AA00, + 0x55556A00, + 0x55555A00, + 0x55555600, + 0x55555500, + 0xAAAAAA80, + 0x6AAAAA80, + 0x5AAAAA80, + 0x56AAAA80, + 0x55AAAA80, + 0x556AAA80, + 0x555AAA80, + 0x5556AA80, + 0x5555AA80, + 0x55556A80, + 0x55555A80, + 0x55555680, + 0x55555580, + 0x55555540, + 0xAAAAAAA0, + 0x6AAAAAA0, + 0x5AAAAAA0, + 0x56AAAAA0, + 0x55AAAAA0, + 0x556AAAA0, + 0x555AAAA0, + 0x5556AAA0, + 0x5555AAA0, + 0x55556AA0, + 0x55555AA0, + 0x555556A0, + 0x555555A0, + 0x55555560, + 0x55555550, + 0xAAAAAAA8, + 0x6AAAAAA8, + 0x5AAAAAA8, + 0x56AAAAA8, + 0x55AAAAA8, + 0x556AAAA8, + 0x555AAAA8, + 0x5556AAA8, + 0x5555AAA8, + 0x55556AA8, + 0x55555AA8, + 0x555556A8, + 0x555555A8, + 0x55555568, + 0x55555558, + 0x55555554, + 0x6AAAAAAA, + 0x5AAAAAAA, + 0x56AAAAAA, + 0x55AAAAAA, + 0x556AAAAA, + 0x555AAAAA, + 0x5556AAAA, + 0x5555AAAA, + 0x55556AAA, + 0x55555AAA, + 0x555556AA, + 0x555555AA, + 0x5555556A, + 0x5555555A, + 0x55555556, + 0x55555555, + 0x55555555, + 0x55555555, + 0x55555555, + 0x55555555, + 0x55555555, + 0x55555555, + 0x55555555, + 0x55555555, + 0x55555555, + 0xC0000000, + 0xE0000000, + 0xF0000000, + 0x70000000, + 0xE8000000, + 0xF8000000, + 0x78000000, + 0xFC000000, + 0x7C000000, + 0x5C000000, + 0xEA000000, + 0xFA000000, + 0x7A000000, + 0xFE000000, + 0x7E000000, + 0x5E000000, + 0xFF000000, + 0x7F000000, + 0x5F000000, + 0x57000000, + 0xEA800000, + 0xFA800000, + 0x7A800000, + 0xFE800000, + 0x7E800000, + 0x5E800000, + 0xFF800000, + 0x7F800000, + 0x5F800000, + 0x57800000, + 0xFFC00000, + 0x7FC00000, + 0x5FC00000, + 0x57C00000, + 0x55C00000, + 0xEAA00000, + 0xFAA00000, + 0x7AA00000, + 0xFEA00000, + 0x7EA00000, + 0x5EA00000, + 0xFFA00000, + 0x7FA00000, + 0x5FA00000, + 0x57A00000, + 0xFFE00000, + 0x7FE00000, + 0x5FE00000, + 0x57E00000, + 0x55E00000, + 0xFFF00000, + 0x7FF00000, + 0x5FF00000, + 0x57F00000, + 0x55F00000, + 0x55700000, + 0xEAA80000, + 0xFAA80000, + 0x7AA80000, + 0xFEA80000, + 0x7EA80000, + 0x5EA80000, + 0xFFA80000, + 0x7FA80000, + 0x5FA80000, + 0x57A80000, + 0xFFE80000, + 0x7FE80000, + 0x5FE80000, + 0x57E80000, + 0x55E80000, + 0xFFF80000, + 0x7FF80000, + 0x5FF80000, + 0x57F80000, + 0x55F80000, + 0x55780000, + 0xFFFC0000, + 0x7FFC0000, + 0x5FFC0000, + 0x57FC0000, + 0x55FC0000, + 0x557C0000, + 0x555C0000, + 0xEAAA0000, + 0xFAAA0000, + 0x7AAA0000, + 0xFEAA0000, + 0x7EAA0000, + 0x5EAA0000, + 0xFFAA0000, + 0x7FAA0000, + 0x5FAA0000, + 0x57AA0000, + 0xFFEA0000, + 0x7FEA0000, + 0x5FEA0000, + 0x57EA0000, + 0x55EA0000, + 0xFFFA0000, + 0x7FFA0000, + 0x5FFA0000, + 0x57FA0000, + 0x55FA0000, + 0x557A0000, + 0xFFFE0000, + 0x7FFE0000, + 0x5FFE0000, + 0x57FE0000, + 0x55FE0000, + 0x557E0000, + 0x555E0000, + 0xFFFF0000, + 0x7FFF0000, + 0x5FFF0000, + 0x57FF0000, + 0x55FF0000, + 0x557F0000, + 0x555F0000, + 0x55570000, + 0xEAAA8000, + 0xFAAA8000, + 0x7AAA8000, + 0xFEAA8000, + 0x7EAA8000, + 0x5EAA8000, + 0xFFAA8000, + 0x7FAA8000, + 0x5FAA8000, + 0x57AA8000, + 0xFFEA8000, + 0x7FEA8000, + 0x5FEA8000, + 0x57EA8000, + 0x55EA8000, + 0xFFFA8000, + 0x7FFA8000, + 0x5FFA8000, + 0x57FA8000, + 0x55FA8000, + 0x557A8000, + 0xFFFE8000, + 0x7FFE8000, + 0x5FFE8000, + 0x57FE8000, + 0x55FE8000, + 0x557E8000, + 0x555E8000, + 0xFFFF8000, + 0x7FFF8000, + 0x5FFF8000, + 0x57FF8000, + 0x55FF8000, + 0x557F8000, + 0x555F8000, + 0x55578000, + 0xFFFFC000, + 0x7FFFC000, + 0x5FFFC000, + 0x57FFC000, + 0x55FFC000, + 0x557FC000, + 0x555FC000, + 0x5557C000, + 0x5555C000, + 0xEAAAA000, + 0xFAAAA000, + 0x7AAAA000, + 0xFEAAA000, + 0x7EAAA000, + 0x5EAAA000, + 0xFFAAA000, + 0x7FAAA000, + 0x5FAAA000, + 0x57AAA000, + 0xFFEAA000, + 0x7FEAA000, + 0x5FEAA000, + 0x57EAA000, + 0x55EAA000, + 0xFFFAA000, + 0x7FFAA000, + 0x5FFAA000, + 0x57FAA000, + 0x55FAA000, + 0x557AA000, + 0xFFFEA000, + 0x7FFEA000, + 0x5FFEA000, + 0x57FEA000, + 0x55FEA000, + 0x557EA000, + 0x555EA000, + 0xFFFFA000, + 0x7FFFA000, + 0x5FFFA000, + 0x57FFA000, + 0x55FFA000, + 0x557FA000, + 0x555FA000, + 0x5557A000, + 0xFFFFE000, + 0x7FFFE000, + 0x5FFFE000, + 0x57FFE000, + 0x55FFE000, + 0x557FE000, + 0x555FE000, + 0x5557E000, + 0x5555E000, + 0xFFFFF000, + 0x7FFFF000, + 0x5FFFF000, + 0x57FFF000, + 0x55FFF000, + 0x557FF000, + 0x555FF000, + 0x5557F000, + 0x5555F000, + 0x55557000, + 0xEAAAA800, + 0xFAAAA800, + 0x7AAAA800, + 0xFEAAA800, + 0x7EAAA800, + 0x5EAAA800, + 0xFFAAA800, + 0x7FAAA800, + 0x5FAAA800, + 0x57AAA800, + 0xFFEAA800, + 0x7FEAA800, + 0x5FEAA800, + 0x57EAA800, + 0x55EAA800, + 0xFFFAA800, + 0x7FFAA800, + 0x5FFAA800, + 0x57FAA800, + 0x55FAA800, + 0x557AA800, + 0xFFFEA800, + 0x7FFEA800, + 0x5FFEA800, + 0x57FEA800, + 0x55FEA800, + 0x557EA800, + 0x555EA800, + 0xFFFFA800, + 0x7FFFA800, + 0x5FFFA800, + 0x57FFA800, + 0x55FFA800, + 0x557FA800, + 0x555FA800, + 0x5557A800, + 0xFFFFE800, + 0x7FFFE800, + 0x5FFFE800, + 0x57FFE800, + 0x55FFE800, + 0x557FE800, + 0x555FE800, + 0x5557E800, + 0x5555E800, + 0xFFFFF800, + 0x7FFFF800, + 0x5FFFF800, + 0x57FFF800, + 0x55FFF800, + 0x557FF800, + 0x555FF800, + 0x5557F800, + 0x5555F800, + 0x55557800, + 0xFFFFFC00, + 0x7FFFFC00, + 0x5FFFFC00, + 0x57FFFC00, + 0x55FFFC00, + 0x557FFC00, + 0x555FFC00, + 0x5557FC00, + 0x5555FC00, + 0x55557C00, + 0x55555C00, + 0xEAAAAA00, + 0xFAAAAA00, + 0x7AAAAA00, + 0xFEAAAA00, + 0x7EAAAA00, + 0x5EAAAA00, + 0xFFAAAA00, + 0x7FAAAA00, + 0x5FAAAA00, + 0x57AAAA00, + 0xFFEAAA00, + 0x7FEAAA00, + 0x5FEAAA00, + 0x57EAAA00, + 0x55EAAA00, + 0xFFFAAA00, + 0x7FFAAA00, + 0x5FFAAA00, + 0x57FAAA00, + 0x55FAAA00, + 0x557AAA00, + 0xFFFEAA00, + 0x7FFEAA00, + 0x5FFEAA00, + 0x57FEAA00, + 0x55FEAA00, + 0x557EAA00, + 0x555EAA00, + 0xFFFFAA00, + 0x7FFFAA00, + 0x5FFFAA00, + 0x57FFAA00, + 0x55FFAA00, + 0x557FAA00, + 0x555FAA00, + 0x5557AA00, + 0xFFFFEA00, + 0x7FFFEA00, + 0x5FFFEA00, + 0x57FFEA00, + 0x55FFEA00, + 0x557FEA00, + 0x555FEA00, + 0x5557EA00, + 0x5555EA00, + 0xFFFFFA00, + 0x7FFFFA00, + 0x5FFFFA00, + 0x57FFFA00, + 0x55FFFA00, + 0x557FFA00, + 0x555FFA00, + 0x5557FA00, + 0x5555FA00, + 0x55557A00, + 0xFFFFFE00, + 0x7FFFFE00, + 0x5FFFFE00, + 0x57FFFE00, + 0x55FFFE00, + 0x557FFE00, + 0x555FFE00, + 0x5557FE00, + 0x5555FE00, + 0x55557E00, + 0x55555E00, + 0xFFFFFF00, + 0x7FFFFF00, + 0x5FFFFF00, + 0x57FFFF00, + 0x55FFFF00, + 0x557FFF00, + 0x555FFF00, + 0x5557FF00, + 0x5555FF00, + 0x55557F00, + 0x55555F00, + 0x55555700, + 0xEAAAAA80, + 0xFAAAAA80, + 0x7AAAAA80, + 0xFEAAAA80, + 0x7EAAAA80, + 0x5EAAAA80, + 0xFFAAAA80, + 0x7FAAAA80, + 0x5FAAAA80, + 0x57AAAA80, + 0xFFEAAA80, + 0x7FEAAA80, + 0x5FEAAA80, + 0x57EAAA80, + 0x55EAAA80, + 0xFFFAAA80, + 0x7FFAAA80, + 0x5FFAAA80, + 0x57FAAA80, + 0x55FAAA80, + 0x557AAA80, + 0xFFFEAA80, + 0x7FFEAA80, + 0x5FFEAA80, + 0x57FEAA80, + 0x55FEAA80, + 0x557EAA80, + 0x555EAA80, + 0xFFFFAA80, + 0x7FFFAA80, + 0x5FFFAA80, + 0x57FFAA80, + 0x55FFAA80, + 0x557FAA80, + 0x555FAA80, + 0x5557AA80, + 0xFFFFEA80, + 0x7FFFEA80, + 0x5FFFEA80, + 0x57FFEA80, + 0x55FFEA80, + 0x557FEA80, + 0x555FEA80, + 0x5557EA80, + 0x5555EA80, + 0xFFFFFA80, + 0x7FFFFA80, + 0x5FFFFA80, + 0x57FFFA80, + 0x55FFFA80, + 0x557FFA80, + 0x555FFA80, + 0x5557FA80, + 0x5555FA80, + 0x55557A80, + 0xFFFFFE80, + 0x7FFFFE80, + 0x5FFFFE80, + 0x57FFFE80, + 0x55FFFE80, + 0x557FFE80, + 0x555FFE80, + 0x5557FE80, + 0x5555FE80, + 0x55557E80, + 0x55555E80, + 0xFFFFFF80, + 0x7FFFFF80, + 0x5FFFFF80, + 0x57FFFF80, + 0x55FFFF80, + 0x557FFF80, + 0x555FFF80, + 0x5557FF80, + 0x5555FF80, + 0x55557F80, + 0x55555F80, + 0x55555780, + 0xFFFFFFC0, + 0x7FFFFFC0, + 0x5FFFFFC0, + 0x57FFFFC0, + 0x55FFFFC0, + 0x557FFFC0, + 0x555FFFC0, + 0x5557FFC0, + 0x5555FFC0, + 0x55557FC0, + 0x55555FC0, + 0x555557C0, + 0x555555C0, + 0xEAAAAAA0, + 0xFAAAAAA0, + 0x7AAAAAA0, + 0xFEAAAAA0, + 0x7EAAAAA0, + 0x5EAAAAA0, + 0xFFAAAAA0, + 0x7FAAAAA0, + 0x5FAAAAA0, + 0x57AAAAA0, + 0xFFEAAAA0, + 0x7FEAAAA0, + 0x5FEAAAA0, + 0x57EAAAA0, + 0x55EAAAA0, + 0xFFFAAAA0, + 0x7FFAAAA0, + 0x5FFAAAA0, + 0x57FAAAA0, + 0x55FAAAA0, + 0x557AAAA0, + 0xFFFEAAA0, + 0x7FFEAAA0, + 0x5FFEAAA0, + 0x57FEAAA0, + 0x55FEAAA0, + 0x557EAAA0, + 0x555EAAA0, + 0xFFFFAAA0, + 0x7FFFAAA0, + 0x5FFFAAA0, + 0x57FFAAA0, + 0x55FFAAA0, + 0x557FAAA0, + 0x555FAAA0, + 0x5557AAA0, + 0xFFFFEAA0, + 0x7FFFEAA0, + 0x5FFFEAA0, + 0x57FFEAA0, + 0x55FFEAA0, + 0x557FEAA0, + 0x555FEAA0, + 0x5557EAA0, + 0x5555EAA0, + 0xFFFFFAA0, + 0x7FFFFAA0, + 0x5FFFFAA0, + 0x57FFFAA0, + 0x55FFFAA0, + 0x557FFAA0, + 0x555FFAA0, + 0x5557FAA0, + 0x5555FAA0, + 0x55557AA0, + 0xFFFFFEA0, + 0x7FFFFEA0, + 0x5FFFFEA0, + 0x57FFFEA0, + 0x55FFFEA0, + 0x557FFEA0, + 0x555FFEA0, + 0x5557FEA0, + 0x5555FEA0, + 0x55557EA0, + 0x55555EA0, + 0xFFFFFFA0, + 0x7FFFFFA0, + 0x5FFFFFA0, + 0x57FFFFA0, + 0x55FFFFA0, + 0x557FFFA0, + 0x555FFFA0, + 0x5557FFA0, + 0x5555FFA0, + 0x55557FA0, + 0x55555FA0, + 0x555557A0, + 0xFFFFFFE0, + 0x7FFFFFE0, + 0x5FFFFFE0, + 0x57FFFFE0, + 0x55FFFFE0, + 0x557FFFE0, + 0x555FFFE0, + 0x5557FFE0, + 0x5555FFE0, + 0x55557FE0, + 0x55555FE0, + 0x555557E0, + 0x555555E0, + 0xFFFFFFF0, + 0x7FFFFFF0, + 0x5FFFFFF0, + 0x57FFFFF0, + 0x55FFFFF0, + 0x557FFFF0, + 0x555FFFF0, + 0x5557FFF0, + 0x5555FFF0, + 0x55557FF0, + 0x55555FF0, + 0x555557F0, + 0x555555F0, + 0x55555570, + 0xEAAAAAA8, + 0xFAAAAAA8, + 0x7AAAAAA8, + 0xFEAAAAA8, + 0x7EAAAAA8, + 0x5EAAAAA8, + 0xFFAAAAA8, + 0x7FAAAAA8, + 0x5FAAAAA8, + 0x57AAAAA8, + 0xFFEAAAA8, + 0x7FEAAAA8, + 0x5FEAAAA8, + 0x57EAAAA8, + 0x55EAAAA8, + 0xFFFAAAA8, + 0x7FFAAAA8, + 0x5FFAAAA8, + 0x57FAAAA8, + 0x55FAAAA8, + 0x557AAAA8, + 0xFFFEAAA8, + 0x7FFEAAA8, + 0x5FFEAAA8, + 0x57FEAAA8, + 0x55FEAAA8, + 0x557EAAA8, + 0x555EAAA8, + 0xFFFFAAA8, + 0x7FFFAAA8, + 0x5FFFAAA8, + 0x57FFAAA8, + 0x55FFAAA8, + 0x557FAAA8, + 0x555FAAA8, + 0x5557AAA8, + 0xFFFFEAA8, + 0x7FFFEAA8, + 0x5FFFEAA8, + 0x57FFEAA8, + 0x55FFEAA8, + 0x557FEAA8, + 0x555FEAA8, + 0x5557EAA8, + 0x5555EAA8, + 0xFFFFFAA8, + 0x7FFFFAA8, + 0x5FFFFAA8, + 0x57FFFAA8, + 0x55FFFAA8, + 0x557FFAA8, + 0x555FFAA8, + 0x5557FAA8, + 0x5555FAA8, + 0x55557AA8, + 0xFFFFFEA8, + 0x7FFFFEA8, + 0x5FFFFEA8, + 0x57FFFEA8, + 0x55FFFEA8, + 0x557FFEA8, + 0x555FFEA8, + 0x5557FEA8, + 0x5555FEA8, + 0x55557EA8, + 0x55555EA8, + 0xFFFFFFA8, + 0x7FFFFFA8, + 0x5FFFFFA8, + 0x57FFFFA8, + 0x55FFFFA8, + 0x557FFFA8, + 0x555FFFA8, + 0x5557FFA8, + 0x5555FFA8, + 0x55557FA8, + 0x55555FA8, + 0x555557A8, + 0xFFFFFFE8, + 0x7FFFFFE8, + 0x5FFFFFE8, + 0x57FFFFE8, + 0x55FFFFE8, + 0x557FFFE8, + 0x555FFFE8, + 0x5557FFE8, + 0x5555FFE8, + 0x55557FE8, + 0x55555FE8, + 0x555557E8, + 0x555555E8, + 0xFFFFFFF8, + 0x7FFFFFF8, + 0x5FFFFFF8, + 0x57FFFFF8, + 0x55FFFFF8, + 0x557FFFF8, + 0x555FFFF8, + 0x5557FFF8, + 0x5555FFF8, + 0x55557FF8, + 0x55555FF8, + 0x555557F8, + 0x555555F8, + 0x55555578, + 0xFFFFFFFC, + 0x7FFFFFFC, + 0x5FFFFFFC, + 0x57FFFFFC, + 0x55FFFFFC, + 0x557FFFFC, + 0x555FFFFC, + 0x5557FFFC, + 0x5555FFFC, + 0x55557FFC, + 0x55555FFC, + 0x555557FC, + 0x555555FC, + 0x5555557C, + 0x5555555C, + 0xEAAAAAAA, + 0xFAAAAAAA, + 0x7AAAAAAA, + 0xFEAAAAAA, + 0x7EAAAAAA, + 0x5EAAAAAA, + 0xFFAAAAAA, + 0x7FAAAAAA, + 0x5FAAAAAA, + 0x57AAAAAA, + 0xFFEAAAAA, + 0x7FEAAAAA, + 0x5FEAAAAA, + 0x57EAAAAA, + 0x55EAAAAA, + 0xFFFAAAAA, + 0x7FFAAAAA, + 0x5FFAAAAA, + 0x57FAAAAA, + 0x55FAAAAA, + 0x557AAAAA, + 0xFFFEAAAA, + 0x7FFEAAAA, + 0x5FFEAAAA, + 0x57FEAAAA, + 0x55FEAAAA, + 0x557EAAAA, + 0x555EAAAA, + 0xFFFFAAAA, + 0x7FFFAAAA, + 0x5FFFAAAA, + 0x57FFAAAA, + 0x55FFAAAA, + 0x557FAAAA, + 0x555FAAAA, + 0x5557AAAA, + 0xFFFFEAAA, + 0x7FFFEAAA, + 0x5FFFEAAA, + 0x57FFEAAA, + 0x55FFEAAA, + 0x557FEAAA, + 0x555FEAAA, + 0x5557EAAA, + 0x5555EAAA, + 0xFFFFFAAA, + 0x7FFFFAAA, + 0x5FFFFAAA, + 0x57FFFAAA, + 0x55FFFAAA, + 0x557FFAAA, + 0x555FFAAA, + 0x5557FAAA, + 0x5555FAAA, + 0x55557AAA, + 0xFFFFFEAA, + 0x7FFFFEAA, + 0x5FFFFEAA, + 0x57FFFEAA, + 0x55FFFEAA, + 0x557FFEAA, + 0x555FFEAA, + 0x5557FEAA, + 0x5555FEAA, + 0x55557EAA, + 0x55555EAA, + 0xFFFFFFAA, + 0x7FFFFFAA, + 0x5FFFFFAA, + 0x57FFFFAA, + 0x55FFFFAA, + 0x557FFFAA, + 0x555FFFAA, + 0x5557FFAA, + 0x5555FFAA, + 0x55557FAA, + 0x55555FAA, + 0x555557AA, + 0xFFFFFFEA, + 0x7FFFFFEA, + 0x5FFFFFEA, + 0x57FFFFEA, + 0x55FFFFEA, + 0x557FFFEA, + 0x555FFFEA, + 0x5557FFEA, + 0x5555FFEA, + 0x55557FEA, + 0x55555FEA, + 0x555557EA, + 0x555555EA, + 0xFFFFFFFA, + 0x7FFFFFFA, + 0x5FFFFFFA, + 0x57FFFFFA, + 0x55FFFFFA, + 0x557FFFFA, + 0x555FFFFA, + 0x5557FFFA, + 0x5555FFFA, + 0x55557FFA, + 0x55555FFA, + 0x555557FA, + 0x555555FA, + 0x5555557A, + 0xFFFFFFFE, + 0x7FFFFFFE, + 0x5FFFFFFE, + 0x57FFFFFE, + 0x55FFFFFE, + 0x557FFFFE, + 0x555FFFFE, + 0x5557FFFE, + 0x5555FFFE, + 0x55557FFE, + 0x55555FFE, + 0x555557FE, + 0x555555FE, + 0x5555557E, + 0x5555555E, + 0x7FFFFFFF, + 0x5FFFFFFF, + 0x57FFFFFF, + 0x55FFFFFF, + 0x557FFFFF, + 0x555FFFFF, + 0x5557FFFF, + 0x5555FFFF, + 0x55557FFF, + 0x55555FFF, + 0x555557FF, + 0x555555FF, + 0x5555557F, + 0x5555555F, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, + 0x55555557, +}; + + +/* +void precomp() +{ + unsigned int bitmaps[1024]; + + int num = 0; + + printf("const static uint s_bitmapTableCTX[704] =\n{\n"); + + for (int a = 1; a <= 15; a++) + { + for (int b = a; b <= 15; b++) + { + for (int c = b; c <= 15; c++) + { + int indices[16]; + + int i = 0; + for(; i < a; i++) { + indices[i] = 0; + } + for(; i < a+b; i++) { + indices[i] = 2; + } + for(; i < a+b+c; i++) { + indices[i] = 3; + } + for(; i < 16; i++) { + indices[i] = 1; + } + + unsigned int bm = 0; + for(i = 0; i < 16; i++) { + bm |= indices[i] << (i * 2); + } + + printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c); + + bitmaps[num] = bm; + num++; + } + } + } + + // Align to 32: 680 -> 704 + while (num < 704) + { + printf("\t0x80000000,\n"); + + bitmaps[num] = 0x80000000; // 15 0 0 1; + num++; + } + + printf("}; // num = %d\n", num); +} +*/ + +const static uint s_bitmapTableCTX[704] = +{ + 0x55555578, // 1 0 0 15 + 0x555555F8, // 1 0 1 14 + 0x555557F8, // 1 0 2 13 + 0x55555FF8, // 1 0 3 12 + 0x55557FF8, // 1 0 4 11 + 0x5555FFF8, // 1 0 5 10 + 0x5557FFF8, // 1 0 6 9 + 0x555FFFF8, // 1 0 7 8 + 0x557FFFF8, // 1 0 8 7 + 0x55FFFFF8, // 1 0 9 6 + 0x57FFFFF8, // 1 0 10 5 + 0x5FFFFFF8, // 1 0 11 4 + 0x7FFFFFF8, // 1 0 12 3 + 0xFFFFFFF8, // 1 0 13 2 + 0xFFFFFFF8, // 1 0 14 1 + 0x555557E8, // 1 1 0 14 + 0x55555FE8, // 1 1 1 13 + 0x55557FE8, // 1 1 2 12 + 0x5555FFE8, // 1 1 3 11 + 0x5557FFE8, // 1 1 4 10 + 0x555FFFE8, // 1 1 5 9 + 0x557FFFE8, // 1 1 6 8 + 0x55FFFFE8, // 1 1 7 7 + 0x57FFFFE8, // 1 1 8 6 + 0x5FFFFFE8, // 1 1 9 5 + 0x7FFFFFE8, // 1 1 10 4 + 0xFFFFFFE8, // 1 1 11 3 + 0xFFFFFFE8, // 1 1 12 2 + 0xFFFFFFE8, // 1 1 13 1 + 0x55557FA8, // 1 2 0 13 + 0x5555FFA8, // 1 2 1 12 + 0x5557FFA8, // 1 2 2 11 + 0x555FFFA8, // 1 2 3 10 + 0x557FFFA8, // 1 2 4 9 + 0x55FFFFA8, // 1 2 5 8 + 0x57FFFFA8, // 1 2 6 7 + 0x5FFFFFA8, // 1 2 7 6 + 0x7FFFFFA8, // 1 2 8 5 + 0xFFFFFFA8, // 1 2 9 4 + 0xFFFFFFA8, // 1 2 10 3 + 0xFFFFFFA8, // 1 2 11 2 + 0xFFFFFFA8, // 1 2 12 1 + 0x5557FEA8, // 1 3 0 12 + 0x555FFEA8, // 1 3 1 11 + 0x557FFEA8, // 1 3 2 10 + 0x55FFFEA8, // 1 3 3 9 + 0x57FFFEA8, // 1 3 4 8 + 0x5FFFFEA8, // 1 3 5 7 + 0x7FFFFEA8, // 1 3 6 6 + 0xFFFFFEA8, // 1 3 7 5 + 0xFFFFFEA8, // 1 3 8 4 + 0xFFFFFEA8, // 1 3 9 3 + 0xFFFFFEA8, // 1 3 10 2 + 0xFFFFFEA8, // 1 3 11 1 + 0x557FFAA8, // 1 4 0 11 + 0x55FFFAA8, // 1 4 1 10 + 0x57FFFAA8, // 1 4 2 9 + 0x5FFFFAA8, // 1 4 3 8 + 0x7FFFFAA8, // 1 4 4 7 + 0xFFFFFAA8, // 1 4 5 6 + 0xFFFFFAA8, // 1 4 6 5 + 0xFFFFFAA8, // 1 4 7 4 + 0xFFFFFAA8, // 1 4 8 3 + 0xFFFFFAA8, // 1 4 9 2 + 0xFFFFFAA8, // 1 4 10 1 + 0x57FFEAA8, // 1 5 0 10 + 0x5FFFEAA8, // 1 5 1 9 + 0x7FFFEAA8, // 1 5 2 8 + 0xFFFFEAA8, // 1 5 3 7 + 0xFFFFEAA8, // 1 5 4 6 + 0xFFFFEAA8, // 1 5 5 5 + 0xFFFFEAA8, // 1 5 6 4 + 0xFFFFEAA8, // 1 5 7 3 + 0xFFFFEAA8, // 1 5 8 2 + 0xFFFFEAA8, // 1 5 9 1 + 0x7FFFAAA8, // 1 6 0 9 + 0xFFFFAAA8, // 1 6 1 8 + 0xFFFFAAA8, // 1 6 2 7 + 0xFFFFAAA8, // 1 6 3 6 + 0xFFFFAAA8, // 1 6 4 5 + 0xFFFFAAA8, // 1 6 5 4 + 0xFFFFAAA8, // 1 6 6 3 + 0xFFFFAAA8, // 1 6 7 2 + 0xFFFFAAA8, // 1 6 8 1 + 0xFFFEAAA8, // 1 7 0 8 + 0xFFFEAAA8, // 1 7 1 7 + 0xFFFEAAA8, // 1 7 2 6 + 0xFFFEAAA8, // 1 7 3 5 + 0xFFFEAAA8, // 1 7 4 4 + 0xFFFEAAA8, // 1 7 5 3 + 0xFFFEAAA8, // 1 7 6 2 + 0xFFFEAAA8, // 1 7 7 1 + 0xFFFAAAA8, // 1 8 0 7 + 0xFFFAAAA8, // 1 8 1 6 + 0xFFFAAAA8, // 1 8 2 5 + 0xFFFAAAA8, // 1 8 3 4 + 0xFFFAAAA8, // 1 8 4 3 + 0xFFFAAAA8, // 1 8 5 2 + 0xFFFAAAA8, // 1 8 6 1 + 0xFFEAAAA8, // 1 9 0 6 + 0xFFEAAAA8, // 1 9 1 5 + 0xFFEAAAA8, // 1 9 2 4 + 0xFFEAAAA8, // 1 9 3 3 + 0xFFEAAAA8, // 1 9 4 2 + 0xFFEAAAA8, // 1 9 5 1 + 0xFFAAAAA8, // 1 10 0 5 + 0xFFAAAAA8, // 1 10 1 4 + 0xFFAAAAA8, // 1 10 2 3 + 0xFFAAAAA8, // 1 10 3 2 + 0xFFAAAAA8, // 1 10 4 1 + 0xFEAAAAA8, // 1 11 0 4 + 0xFEAAAAA8, // 1 11 1 3 + 0xFEAAAAA8, // 1 11 2 2 + 0xFEAAAAA8, // 1 11 3 1 + 0xFAAAAAA8, // 1 12 0 3 + 0xFAAAAAA8, // 1 12 1 2 + 0xFAAAAAA8, // 1 12 2 1 + 0xEAAAAAA8, // 1 13 0 2 + 0xEAAAAAA8, // 1 13 1 1 + 0xAAAAAAA8, // 1 14 0 1 + 0x55555FA0, // 2 0 0 14 + 0x55557FA0, // 2 0 1 13 + 0x5555FFA0, // 2 0 2 12 + 0x5557FFA0, // 2 0 3 11 + 0x555FFFA0, // 2 0 4 10 + 0x557FFFA0, // 2 0 5 9 + 0x55FFFFA0, // 2 0 6 8 + 0x57FFFFA0, // 2 0 7 7 + 0x5FFFFFA0, // 2 0 8 6 + 0x7FFFFFA0, // 2 0 9 5 + 0xFFFFFFA0, // 2 0 10 4 + 0xFFFFFFA0, // 2 0 11 3 + 0xFFFFFFA0, // 2 0 12 2 + 0xFFFFFFA0, // 2 0 13 1 + 0x5555FEA0, // 2 1 0 13 + 0x5557FEA0, // 2 1 1 12 + 0x555FFEA0, // 2 1 2 11 + 0x557FFEA0, // 2 1 3 10 + 0x55FFFEA0, // 2 1 4 9 + 0x57FFFEA0, // 2 1 5 8 + 0x5FFFFEA0, // 2 1 6 7 + 0x7FFFFEA0, // 2 1 7 6 + 0xFFFFFEA0, // 2 1 8 5 + 0xFFFFFEA0, // 2 1 9 4 + 0xFFFFFEA0, // 2 1 10 3 + 0xFFFFFEA0, // 2 1 11 2 + 0xFFFFFEA0, // 2 1 12 1 + 0x555FFAA0, // 2 2 0 12 + 0x557FFAA0, // 2 2 1 11 + 0x55FFFAA0, // 2 2 2 10 + 0x57FFFAA0, // 2 2 3 9 + 0x5FFFFAA0, // 2 2 4 8 + 0x7FFFFAA0, // 2 2 5 7 + 0xFFFFFAA0, // 2 2 6 6 + 0xFFFFFAA0, // 2 2 7 5 + 0xFFFFFAA0, // 2 2 8 4 + 0xFFFFFAA0, // 2 2 9 3 + 0xFFFFFAA0, // 2 2 10 2 + 0xFFFFFAA0, // 2 2 11 1 + 0x55FFEAA0, // 2 3 0 11 + 0x57FFEAA0, // 2 3 1 10 + 0x5FFFEAA0, // 2 3 2 9 + 0x7FFFEAA0, // 2 3 3 8 + 0xFFFFEAA0, // 2 3 4 7 + 0xFFFFEAA0, // 2 3 5 6 + 0xFFFFEAA0, // 2 3 6 5 + 0xFFFFEAA0, // 2 3 7 4 + 0xFFFFEAA0, // 2 3 8 3 + 0xFFFFEAA0, // 2 3 9 2 + 0xFFFFEAA0, // 2 3 10 1 + 0x5FFFAAA0, // 2 4 0 10 + 0x7FFFAAA0, // 2 4 1 9 + 0xFFFFAAA0, // 2 4 2 8 + 0xFFFFAAA0, // 2 4 3 7 + 0xFFFFAAA0, // 2 4 4 6 + 0xFFFFAAA0, // 2 4 5 5 + 0xFFFFAAA0, // 2 4 6 4 + 0xFFFFAAA0, // 2 4 7 3 + 0xFFFFAAA0, // 2 4 8 2 + 0xFFFFAAA0, // 2 4 9 1 + 0xFFFEAAA0, // 2 5 0 9 + 0xFFFEAAA0, // 2 5 1 8 + 0xFFFEAAA0, // 2 5 2 7 + 0xFFFEAAA0, // 2 5 3 6 + 0xFFFEAAA0, // 2 5 4 5 + 0xFFFEAAA0, // 2 5 5 4 + 0xFFFEAAA0, // 2 5 6 3 + 0xFFFEAAA0, // 2 5 7 2 + 0xFFFEAAA0, // 2 5 8 1 + 0xFFFAAAA0, // 2 6 0 8 + 0xFFFAAAA0, // 2 6 1 7 + 0xFFFAAAA0, // 2 6 2 6 + 0xFFFAAAA0, // 2 6 3 5 + 0xFFFAAAA0, // 2 6 4 4 + 0xFFFAAAA0, // 2 6 5 3 + 0xFFFAAAA0, // 2 6 6 2 + 0xFFFAAAA0, // 2 6 7 1 + 0xFFEAAAA0, // 2 7 0 7 + 0xFFEAAAA0, // 2 7 1 6 + 0xFFEAAAA0, // 2 7 2 5 + 0xFFEAAAA0, // 2 7 3 4 + 0xFFEAAAA0, // 2 7 4 3 + 0xFFEAAAA0, // 2 7 5 2 + 0xFFEAAAA0, // 2 7 6 1 + 0xFFAAAAA0, // 2 8 0 6 + 0xFFAAAAA0, // 2 8 1 5 + 0xFFAAAAA0, // 2 8 2 4 + 0xFFAAAAA0, // 2 8 3 3 + 0xFFAAAAA0, // 2 8 4 2 + 0xFFAAAAA0, // 2 8 5 1 + 0xFEAAAAA0, // 2 9 0 5 + 0xFEAAAAA0, // 2 9 1 4 + 0xFEAAAAA0, // 2 9 2 3 + 0xFEAAAAA0, // 2 9 3 2 + 0xFEAAAAA0, // 2 9 4 1 + 0xFAAAAAA0, // 2 10 0 4 + 0xFAAAAAA0, // 2 10 1 3 + 0xFAAAAAA0, // 2 10 2 2 + 0xFAAAAAA0, // 2 10 3 1 + 0xEAAAAAA0, // 2 11 0 3 + 0xEAAAAAA0, // 2 11 1 2 + 0xEAAAAAA0, // 2 11 2 1 + 0xAAAAAAA0, // 2 12 0 2 + 0xAAAAAAA0, // 2 12 1 1 + 0xAAAAAAA0, // 2 13 0 1 + 0x5557FA80, // 3 0 0 13 + 0x555FFA80, // 3 0 1 12 + 0x557FFA80, // 3 0 2 11 + 0x55FFFA80, // 3 0 3 10 + 0x57FFFA80, // 3 0 4 9 + 0x5FFFFA80, // 3 0 5 8 + 0x7FFFFA80, // 3 0 6 7 + 0xFFFFFA80, // 3 0 7 6 + 0xFFFFFA80, // 3 0 8 5 + 0xFFFFFA80, // 3 0 9 4 + 0xFFFFFA80, // 3 0 10 3 + 0xFFFFFA80, // 3 0 11 2 + 0xFFFFFA80, // 3 0 12 1 + 0x557FEA80, // 3 1 0 12 + 0x55FFEA80, // 3 1 1 11 + 0x57FFEA80, // 3 1 2 10 + 0x5FFFEA80, // 3 1 3 9 + 0x7FFFEA80, // 3 1 4 8 + 0xFFFFEA80, // 3 1 5 7 + 0xFFFFEA80, // 3 1 6 6 + 0xFFFFEA80, // 3 1 7 5 + 0xFFFFEA80, // 3 1 8 4 + 0xFFFFEA80, // 3 1 9 3 + 0xFFFFEA80, // 3 1 10 2 + 0xFFFFEA80, // 3 1 11 1 + 0x57FFAA80, // 3 2 0 11 + 0x5FFFAA80, // 3 2 1 10 + 0x7FFFAA80, // 3 2 2 9 + 0xFFFFAA80, // 3 2 3 8 + 0xFFFFAA80, // 3 2 4 7 + 0xFFFFAA80, // 3 2 5 6 + 0xFFFFAA80, // 3 2 6 5 + 0xFFFFAA80, // 3 2 7 4 + 0xFFFFAA80, // 3 2 8 3 + 0xFFFFAA80, // 3 2 9 2 + 0xFFFFAA80, // 3 2 10 1 + 0x7FFEAA80, // 3 3 0 10 + 0xFFFEAA80, // 3 3 1 9 + 0xFFFEAA80, // 3 3 2 8 + 0xFFFEAA80, // 3 3 3 7 + 0xFFFEAA80, // 3 3 4 6 + 0xFFFEAA80, // 3 3 5 5 + 0xFFFEAA80, // 3 3 6 4 + 0xFFFEAA80, // 3 3 7 3 + 0xFFFEAA80, // 3 3 8 2 + 0xFFFEAA80, // 3 3 9 1 + 0xFFFAAA80, // 3 4 0 9 + 0xFFFAAA80, // 3 4 1 8 + 0xFFFAAA80, // 3 4 2 7 + 0xFFFAAA80, // 3 4 3 6 + 0xFFFAAA80, // 3 4 4 5 + 0xFFFAAA80, // 3 4 5 4 + 0xFFFAAA80, // 3 4 6 3 + 0xFFFAAA80, // 3 4 7 2 + 0xFFFAAA80, // 3 4 8 1 + 0xFFEAAA80, // 3 5 0 8 + 0xFFEAAA80, // 3 5 1 7 + 0xFFEAAA80, // 3 5 2 6 + 0xFFEAAA80, // 3 5 3 5 + 0xFFEAAA80, // 3 5 4 4 + 0xFFEAAA80, // 3 5 5 3 + 0xFFEAAA80, // 3 5 6 2 + 0xFFEAAA80, // 3 5 7 1 + 0xFFAAAA80, // 3 6 0 7 + 0xFFAAAA80, // 3 6 1 6 + 0xFFAAAA80, // 3 6 2 5 + 0xFFAAAA80, // 3 6 3 4 + 0xFFAAAA80, // 3 6 4 3 + 0xFFAAAA80, // 3 6 5 2 + 0xFFAAAA80, // 3 6 6 1 + 0xFEAAAA80, // 3 7 0 6 + 0xFEAAAA80, // 3 7 1 5 + 0xFEAAAA80, // 3 7 2 4 + 0xFEAAAA80, // 3 7 3 3 + 0xFEAAAA80, // 3 7 4 2 + 0xFEAAAA80, // 3 7 5 1 + 0xFAAAAA80, // 3 8 0 5 + 0xFAAAAA80, // 3 8 1 4 + 0xFAAAAA80, // 3 8 2 3 + 0xFAAAAA80, // 3 8 3 2 + 0xFAAAAA80, // 3 8 4 1 + 0xEAAAAA80, // 3 9 0 4 + 0xEAAAAA80, // 3 9 1 3 + 0xEAAAAA80, // 3 9 2 2 + 0xEAAAAA80, // 3 9 3 1 + 0xAAAAAA80, // 3 10 0 3 + 0xAAAAAA80, // 3 10 1 2 + 0xAAAAAA80, // 3 10 2 1 + 0xAAAAAA80, // 3 11 0 2 + 0xAAAAAA80, // 3 11 1 1 + 0xAAAAAA80, // 3 12 0 1 + 0x55FFAA00, // 4 0 0 12 + 0x57FFAA00, // 4 0 1 11 + 0x5FFFAA00, // 4 0 2 10 + 0x7FFFAA00, // 4 0 3 9 + 0xFFFFAA00, // 4 0 4 8 + 0xFFFFAA00, // 4 0 5 7 + 0xFFFFAA00, // 4 0 6 6 + 0xFFFFAA00, // 4 0 7 5 + 0xFFFFAA00, // 4 0 8 4 + 0xFFFFAA00, // 4 0 9 3 + 0xFFFFAA00, // 4 0 10 2 + 0xFFFFAA00, // 4 0 11 1 + 0x5FFEAA00, // 4 1 0 11 + 0x7FFEAA00, // 4 1 1 10 + 0xFFFEAA00, // 4 1 2 9 + 0xFFFEAA00, // 4 1 3 8 + 0xFFFEAA00, // 4 1 4 7 + 0xFFFEAA00, // 4 1 5 6 + 0xFFFEAA00, // 4 1 6 5 + 0xFFFEAA00, // 4 1 7 4 + 0xFFFEAA00, // 4 1 8 3 + 0xFFFEAA00, // 4 1 9 2 + 0xFFFEAA00, // 4 1 10 1 + 0xFFFAAA00, // 4 2 0 10 + 0xFFFAAA00, // 4 2 1 9 + 0xFFFAAA00, // 4 2 2 8 + 0xFFFAAA00, // 4 2 3 7 + 0xFFFAAA00, // 4 2 4 6 + 0xFFFAAA00, // 4 2 5 5 + 0xFFFAAA00, // 4 2 6 4 + 0xFFFAAA00, // 4 2 7 3 + 0xFFFAAA00, // 4 2 8 2 + 0xFFFAAA00, // 4 2 9 1 + 0xFFEAAA00, // 4 3 0 9 + 0xFFEAAA00, // 4 3 1 8 + 0xFFEAAA00, // 4 3 2 7 + 0xFFEAAA00, // 4 3 3 6 + 0xFFEAAA00, // 4 3 4 5 + 0xFFEAAA00, // 4 3 5 4 + 0xFFEAAA00, // 4 3 6 3 + 0xFFEAAA00, // 4 3 7 2 + 0xFFEAAA00, // 4 3 8 1 + 0xFFAAAA00, // 4 4 0 8 + 0xFFAAAA00, // 4 4 1 7 + 0xFFAAAA00, // 4 4 2 6 + 0xFFAAAA00, // 4 4 3 5 + 0xFFAAAA00, // 4 4 4 4 + 0xFFAAAA00, // 4 4 5 3 + 0xFFAAAA00, // 4 4 6 2 + 0xFFAAAA00, // 4 4 7 1 + 0xFEAAAA00, // 4 5 0 7 + 0xFEAAAA00, // 4 5 1 6 + 0xFEAAAA00, // 4 5 2 5 + 0xFEAAAA00, // 4 5 3 4 + 0xFEAAAA00, // 4 5 4 3 + 0xFEAAAA00, // 4 5 5 2 + 0xFEAAAA00, // 4 5 6 1 + 0xFAAAAA00, // 4 6 0 6 + 0xFAAAAA00, // 4 6 1 5 + 0xFAAAAA00, // 4 6 2 4 + 0xFAAAAA00, // 4 6 3 3 + 0xFAAAAA00, // 4 6 4 2 + 0xFAAAAA00, // 4 6 5 1 + 0xEAAAAA00, // 4 7 0 5 + 0xEAAAAA00, // 4 7 1 4 + 0xEAAAAA00, // 4 7 2 3 + 0xEAAAAA00, // 4 7 3 2 + 0xEAAAAA00, // 4 7 4 1 + 0xAAAAAA00, // 4 8 0 4 + 0xAAAAAA00, // 4 8 1 3 + 0xAAAAAA00, // 4 8 2 2 + 0xAAAAAA00, // 4 8 3 1 + 0xAAAAAA00, // 4 9 0 3 + 0xAAAAAA00, // 4 9 1 2 + 0xAAAAAA00, // 4 9 2 1 + 0xAAAAAA00, // 4 10 0 2 + 0xAAAAAA00, // 4 10 1 1 + 0xAAAAAA00, // 4 11 0 1 + 0x7FFAA800, // 5 0 0 11 + 0xFFFAA800, // 5 0 1 10 + 0xFFFAA800, // 5 0 2 9 + 0xFFFAA800, // 5 0 3 8 + 0xFFFAA800, // 5 0 4 7 + 0xFFFAA800, // 5 0 5 6 + 0xFFFAA800, // 5 0 6 5 + 0xFFFAA800, // 5 0 7 4 + 0xFFFAA800, // 5 0 8 3 + 0xFFFAA800, // 5 0 9 2 + 0xFFFAA800, // 5 0 10 1 + 0xFFEAA800, // 5 1 0 10 + 0xFFEAA800, // 5 1 1 9 + 0xFFEAA800, // 5 1 2 8 + 0xFFEAA800, // 5 1 3 7 + 0xFFEAA800, // 5 1 4 6 + 0xFFEAA800, // 5 1 5 5 + 0xFFEAA800, // 5 1 6 4 + 0xFFEAA800, // 5 1 7 3 + 0xFFEAA800, // 5 1 8 2 + 0xFFEAA800, // 5 1 9 1 + 0xFFAAA800, // 5 2 0 9 + 0xFFAAA800, // 5 2 1 8 + 0xFFAAA800, // 5 2 2 7 + 0xFFAAA800, // 5 2 3 6 + 0xFFAAA800, // 5 2 4 5 + 0xFFAAA800, // 5 2 5 4 + 0xFFAAA800, // 5 2 6 3 + 0xFFAAA800, // 5 2 7 2 + 0xFFAAA800, // 5 2 8 1 + 0xFEAAA800, // 5 3 0 8 + 0xFEAAA800, // 5 3 1 7 + 0xFEAAA800, // 5 3 2 6 + 0xFEAAA800, // 5 3 3 5 + 0xFEAAA800, // 5 3 4 4 + 0xFEAAA800, // 5 3 5 3 + 0xFEAAA800, // 5 3 6 2 + 0xFEAAA800, // 5 3 7 1 + 0xFAAAA800, // 5 4 0 7 + 0xFAAAA800, // 5 4 1 6 + 0xFAAAA800, // 5 4 2 5 + 0xFAAAA800, // 5 4 3 4 + 0xFAAAA800, // 5 4 4 3 + 0xFAAAA800, // 5 4 5 2 + 0xFAAAA800, // 5 4 6 1 + 0xEAAAA800, // 5 5 0 6 + 0xEAAAA800, // 5 5 1 5 + 0xEAAAA800, // 5 5 2 4 + 0xEAAAA800, // 5 5 3 3 + 0xEAAAA800, // 5 5 4 2 + 0xEAAAA800, // 5 5 5 1 + 0xAAAAA800, // 5 6 0 5 + 0xAAAAA800, // 5 6 1 4 + 0xAAAAA800, // 5 6 2 3 + 0xAAAAA800, // 5 6 3 2 + 0xAAAAA800, // 5 6 4 1 + 0xAAAAA800, // 5 7 0 4 + 0xAAAAA800, // 5 7 1 3 + 0xAAAAA800, // 5 7 2 2 + 0xAAAAA800, // 5 7 3 1 + 0xAAAAA800, // 5 8 0 3 + 0xAAAAA800, // 5 8 1 2 + 0xAAAAA800, // 5 8 2 1 + 0xAAAAA800, // 5 9 0 2 + 0xAAAAA800, // 5 9 1 1 + 0xAAAAA800, // 5 10 0 1 + 0xFFAAA000, // 6 0 0 10 + 0xFFAAA000, // 6 0 1 9 + 0xFFAAA000, // 6 0 2 8 + 0xFFAAA000, // 6 0 3 7 + 0xFFAAA000, // 6 0 4 6 + 0xFFAAA000, // 6 0 5 5 + 0xFFAAA000, // 6 0 6 4 + 0xFFAAA000, // 6 0 7 3 + 0xFFAAA000, // 6 0 8 2 + 0xFFAAA000, // 6 0 9 1 + 0xFEAAA000, // 6 1 0 9 + 0xFEAAA000, // 6 1 1 8 + 0xFEAAA000, // 6 1 2 7 + 0xFEAAA000, // 6 1 3 6 + 0xFEAAA000, // 6 1 4 5 + 0xFEAAA000, // 6 1 5 4 + 0xFEAAA000, // 6 1 6 3 + 0xFEAAA000, // 6 1 7 2 + 0xFEAAA000, // 6 1 8 1 + 0xFAAAA000, // 6 2 0 8 + 0xFAAAA000, // 6 2 1 7 + 0xFAAAA000, // 6 2 2 6 + 0xFAAAA000, // 6 2 3 5 + 0xFAAAA000, // 6 2 4 4 + 0xFAAAA000, // 6 2 5 3 + 0xFAAAA000, // 6 2 6 2 + 0xFAAAA000, // 6 2 7 1 + 0xEAAAA000, // 6 3 0 7 + 0xEAAAA000, // 6 3 1 6 + 0xEAAAA000, // 6 3 2 5 + 0xEAAAA000, // 6 3 3 4 + 0xEAAAA000, // 6 3 4 3 + 0xEAAAA000, // 6 3 5 2 + 0xEAAAA000, // 6 3 6 1 + 0xAAAAA000, // 6 4 0 6 + 0xAAAAA000, // 6 4 1 5 + 0xAAAAA000, // 6 4 2 4 + 0xAAAAA000, // 6 4 3 3 + 0xAAAAA000, // 6 4 4 2 + 0xAAAAA000, // 6 4 5 1 + 0xAAAAA000, // 6 5 0 5 + 0xAAAAA000, // 6 5 1 4 + 0xAAAAA000, // 6 5 2 3 + 0xAAAAA000, // 6 5 3 2 + 0xAAAAA000, // 6 5 4 1 + 0xAAAAA000, // 6 6 0 4 + 0xAAAAA000, // 6 6 1 3 + 0xAAAAA000, // 6 6 2 2 + 0xAAAAA000, // 6 6 3 1 + 0xAAAAA000, // 6 7 0 3 + 0xAAAAA000, // 6 7 1 2 + 0xAAAAA000, // 6 7 2 1 + 0xAAAAA000, // 6 8 0 2 + 0xAAAAA000, // 6 8 1 1 + 0xAAAAA000, // 6 9 0 1 + 0xFAAA8000, // 7 0 0 9 + 0xFAAA8000, // 7 0 1 8 + 0xFAAA8000, // 7 0 2 7 + 0xFAAA8000, // 7 0 3 6 + 0xFAAA8000, // 7 0 4 5 + 0xFAAA8000, // 7 0 5 4 + 0xFAAA8000, // 7 0 6 3 + 0xFAAA8000, // 7 0 7 2 + 0xFAAA8000, // 7 0 8 1 + 0xEAAA8000, // 7 1 0 8 + 0xEAAA8000, // 7 1 1 7 + 0xEAAA8000, // 7 1 2 6 + 0xEAAA8000, // 7 1 3 5 + 0xEAAA8000, // 7 1 4 4 + 0xEAAA8000, // 7 1 5 3 + 0xEAAA8000, // 7 1 6 2 + 0xEAAA8000, // 7 1 7 1 + 0xAAAA8000, // 7 2 0 7 + 0xAAAA8000, // 7 2 1 6 + 0xAAAA8000, // 7 2 2 5 + 0xAAAA8000, // 7 2 3 4 + 0xAAAA8000, // 7 2 4 3 + 0xAAAA8000, // 7 2 5 2 + 0xAAAA8000, // 7 2 6 1 + 0xAAAA8000, // 7 3 0 6 + 0xAAAA8000, // 7 3 1 5 + 0xAAAA8000, // 7 3 2 4 + 0xAAAA8000, // 7 3 3 3 + 0xAAAA8000, // 7 3 4 2 + 0xAAAA8000, // 7 3 5 1 + 0xAAAA8000, // 7 4 0 5 + 0xAAAA8000, // 7 4 1 4 + 0xAAAA8000, // 7 4 2 3 + 0xAAAA8000, // 7 4 3 2 + 0xAAAA8000, // 7 4 4 1 + 0xAAAA8000, // 7 5 0 4 + 0xAAAA8000, // 7 5 1 3 + 0xAAAA8000, // 7 5 2 2 + 0xAAAA8000, // 7 5 3 1 + 0xAAAA8000, // 7 6 0 3 + 0xAAAA8000, // 7 6 1 2 + 0xAAAA8000, // 7 6 2 1 + 0xAAAA8000, // 7 7 0 2 + 0xAAAA8000, // 7 7 1 1 + 0xAAAA8000, // 7 8 0 1 + 0xAAAA0000, // 8 0 0 8 + 0xAAAA0000, // 8 0 1 7 + 0xAAAA0000, // 8 0 2 6 + 0xAAAA0000, // 8 0 3 5 + 0xAAAA0000, // 8 0 4 4 + 0xAAAA0000, // 8 0 5 3 + 0xAAAA0000, // 8 0 6 2 + 0xAAAA0000, // 8 0 7 1 + 0xAAAA0000, // 8 1 0 7 + 0xAAAA0000, // 8 1 1 6 + 0xAAAA0000, // 8 1 2 5 + 0xAAAA0000, // 8 1 3 4 + 0xAAAA0000, // 8 1 4 3 + 0xAAAA0000, // 8 1 5 2 + 0xAAAA0000, // 8 1 6 1 + 0xAAAA0000, // 8 2 0 6 + 0xAAAA0000, // 8 2 1 5 + 0xAAAA0000, // 8 2 2 4 + 0xAAAA0000, // 8 2 3 3 + 0xAAAA0000, // 8 2 4 2 + 0xAAAA0000, // 8 2 5 1 + 0xAAAA0000, // 8 3 0 5 + 0xAAAA0000, // 8 3 1 4 + 0xAAAA0000, // 8 3 2 3 + 0xAAAA0000, // 8 3 3 2 + 0xAAAA0000, // 8 3 4 1 + 0xAAAA0000, // 8 4 0 4 + 0xAAAA0000, // 8 4 1 3 + 0xAAAA0000, // 8 4 2 2 + 0xAAAA0000, // 8 4 3 1 + 0xAAAA0000, // 8 5 0 3 + 0xAAAA0000, // 8 5 1 2 + 0xAAAA0000, // 8 5 2 1 + 0xAAAA0000, // 8 6 0 2 + 0xAAAA0000, // 8 6 1 1 + 0xAAAA0000, // 8 7 0 1 + 0xAAA80000, // 9 0 0 7 + 0xAAA80000, // 9 0 1 6 + 0xAAA80000, // 9 0 2 5 + 0xAAA80000, // 9 0 3 4 + 0xAAA80000, // 9 0 4 3 + 0xAAA80000, // 9 0 5 2 + 0xAAA80000, // 9 0 6 1 + 0xAAA80000, // 9 1 0 6 + 0xAAA80000, // 9 1 1 5 + 0xAAA80000, // 9 1 2 4 + 0xAAA80000, // 9 1 3 3 + 0xAAA80000, // 9 1 4 2 + 0xAAA80000, // 9 1 5 1 + 0xAAA80000, // 9 2 0 5 + 0xAAA80000, // 9 2 1 4 + 0xAAA80000, // 9 2 2 3 + 0xAAA80000, // 9 2 3 2 + 0xAAA80000, // 9 2 4 1 + 0xAAA80000, // 9 3 0 4 + 0xAAA80000, // 9 3 1 3 + 0xAAA80000, // 9 3 2 2 + 0xAAA80000, // 9 3 3 1 + 0xAAA80000, // 9 4 0 3 + 0xAAA80000, // 9 4 1 2 + 0xAAA80000, // 9 4 2 1 + 0xAAA80000, // 9 5 0 2 + 0xAAA80000, // 9 5 1 1 + 0xAAA80000, // 9 6 0 1 + 0xAAA00000, // 10 0 0 6 + 0xAAA00000, // 10 0 1 5 + 0xAAA00000, // 10 0 2 4 + 0xAAA00000, // 10 0 3 3 + 0xAAA00000, // 10 0 4 2 + 0xAAA00000, // 10 0 5 1 + 0xAAA00000, // 10 1 0 5 + 0xAAA00000, // 10 1 1 4 + 0xAAA00000, // 10 1 2 3 + 0xAAA00000, // 10 1 3 2 + 0xAAA00000, // 10 1 4 1 + 0xAAA00000, // 10 2 0 4 + 0xAAA00000, // 10 2 1 3 + 0xAAA00000, // 10 2 2 2 + 0xAAA00000, // 10 2 3 1 + 0xAAA00000, // 10 3 0 3 + 0xAAA00000, // 10 3 1 2 + 0xAAA00000, // 10 3 2 1 + 0xAAA00000, // 10 4 0 2 + 0xAAA00000, // 10 4 1 1 + 0xAAA00000, // 10 5 0 1 + 0xAA800000, // 11 0 0 5 + 0xAA800000, // 11 0 1 4 + 0xAA800000, // 11 0 2 3 + 0xAA800000, // 11 0 3 2 + 0xAA800000, // 11 0 4 1 + 0xAA800000, // 11 1 0 4 + 0xAA800000, // 11 1 1 3 + 0xAA800000, // 11 1 2 2 + 0xAA800000, // 11 1 3 1 + 0xAA800000, // 11 2 0 3 + 0xAA800000, // 11 2 1 2 + 0xAA800000, // 11 2 2 1 + 0xAA800000, // 11 3 0 2 + 0xAA800000, // 11 3 1 1 + 0xAA800000, // 11 4 0 1 + 0xAA000000, // 12 0 0 4 + 0xAA000000, // 12 0 1 3 + 0xAA000000, // 12 0 2 2 + 0xAA000000, // 12 0 3 1 + 0xAA000000, // 12 1 0 3 + 0xAA000000, // 12 1 1 2 + 0xAA000000, // 12 1 2 1 + 0xAA000000, // 12 2 0 2 + 0xAA000000, // 12 2 1 1 + 0xAA000000, // 12 3 0 1 + 0xA8000000, // 13 0 0 3 + 0xA8000000, // 13 0 1 2 + 0xA8000000, // 13 0 2 1 + 0xA8000000, // 13 1 0 2 + 0xA8000000, // 13 1 1 1 + 0xA8000000, // 13 2 0 1 + 0xA0000000, // 14 0 0 2 + 0xA0000000, // 14 0 1 1 + 0xA0000000, // 14 1 0 1 + 0x80000000, // 15 0 0 1 + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, + 0x80000000, +}; + Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/Bitmaps.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/Bitmaps.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/Bitmaps.h @@ -1,1119 +0,0 @@ - - -/* -static void doPrecomputation() -{ - uint bitmaps[1024]; - - int indices[16]; - int num = 0; - - // Compute bitmaps with 3 clusters: - - // first cluster [0,i) is at the start - for( int m = 0; m < 16; ++m ) - { - indices[m] = 0; - } - const int imax = 15; - for( int i = imax; i >= 0; --i ) - { - // second cluster [i,j) is half along - for( int m = i; m < 16; ++m ) - { - indices[m] = 2; - } - const int jmax = ( i == 0 ) ? 15 : 16; - for( int j = jmax; j >= i; --j ) - { - // last cluster [j,k) is at the end - if( j < 16 ) - { - indices[j] = 1; - } - - uint bitmap = 0; - - for(int p = 0; p < 16; p++) { - bitmap |= indices[p] << (p * 2); - } - - bitmaps[num] = bitmap; - - num++; - } - } - nvDebugCheck(num == 151); - - // Align to 160. - for(int i = 0; i < 9; i++) - { - bitmaps[num] = 0x555AA000; - num++; - } - nvDebugCheck(num == 160); - - // Append bitmaps with 4 clusters: - - // first cluster [0,i) is at the start - for( int m = 0; m < 16; ++m ) - { - indices[m] = 0; - } - for( int i = imax; i >= 0; --i ) - { - // second cluster [i,j) is one third along - for( int m = i; m < 16; ++m ) - { - indices[m] = 2; - } - const int jmax = ( i == 0 ) ? 15 : 16; - for( int j = jmax; j >= i; --j ) - { - // third cluster [j,k) is two thirds along - for( int m = j; m < 16; ++m ) - { - indices[m] = 3; - } - - int kmax = ( j == 0 ) ? 15 : 16; - for( int k = kmax; k >= j; --k ) - { - // last cluster [k,n) is at the end - if( k < 16 ) - { - indices[k] = 1; - } - - uint bitmap = 0; - - bool hasThree = false; - for(int p = 0; p < 16; p++) { - bitmap |= indices[p] << (p * 2); - - if (indices[p] == 3) hasThree = true; - } - - if (hasThree) { - bitmaps[num] = bitmap; - num++; - } - } - } - } - nvDebugCheck(num == 975); - - // Align to 1024. - for(int i = 0; i < 49; i++) - { - bitmaps[num] = 0x555AA000; - num++; - } - - nvDebugCheck(num == 1024); - - printf("uint bitmaps[992] =\n{\n"); - for (int i = 0; i < 992; i++) - { - printf("\t0x%.8X,\n", bitmaps[i]); - } - printf("};\n"); -} -*/ - - -const static uint s_bitmapTable[992] = -{ - 0x80000000, - 0x40000000, - 0xA0000000, - 0x60000000, - 0x50000000, - 0xA8000000, - 0x68000000, - 0x58000000, - 0x54000000, - 0xAA000000, - 0x6A000000, - 0x5A000000, - 0x56000000, - 0x55000000, - 0xAA800000, - 0x6A800000, - 0x5A800000, - 0x56800000, - 0x55800000, - 0x55400000, - 0xAAA00000, - 0x6AA00000, - 0x5AA00000, - 0x56A00000, - 0x55A00000, - 0x55600000, - 0x55500000, - 0xAAA80000, - 0x6AA80000, - 0x5AA80000, - 0x56A80000, - 0x55A80000, - 0x55680000, - 0x55580000, - 0x55540000, - 0xAAAA0000, - 0x6AAA0000, - 0x5AAA0000, - 0x56AA0000, - 0x55AA0000, - 0x556A0000, - 0x555A0000, - 0x55560000, - 0x55550000, - 0xAAAA8000, - 0x6AAA8000, - 0x5AAA8000, - 0x56AA8000, - 0x55AA8000, - 0x556A8000, - 0x555A8000, - 0x55568000, - 0x55558000, - 0x55554000, - 0xAAAAA000, - 0x6AAAA000, - 0x5AAAA000, - 0x56AAA000, - 0x55AAA000, - 0x556AA000, - 0x555AA000, - 0x5556A000, - 0x5555A000, - 0x55556000, - 0x55555000, - 0xAAAAA800, - 0x6AAAA800, - 0x5AAAA800, - 0x56AAA800, - 0x55AAA800, - 0x556AA800, - 0x555AA800, - 0x5556A800, - 0x5555A800, - 0x55556800, - 0x55555800, - 0x55555400, - 0xAAAAAA00, - 0x6AAAAA00, - 0x5AAAAA00, - 0x56AAAA00, - 0x55AAAA00, - 0x556AAA00, - 0x555AAA00, - 0x5556AA00, - 0x5555AA00, - 0x55556A00, - 0x55555A00, - 0x55555600, - 0x55555500, - 0xAAAAAA80, - 0x6AAAAA80, - 0x5AAAAA80, - 0x56AAAA80, - 0x55AAAA80, - 0x556AAA80, - 0x555AAA80, - 0x5556AA80, - 0x5555AA80, - 0x55556A80, - 0x55555A80, - 0x55555680, - 0x55555580, - 0x55555540, - 0xAAAAAAA0, - 0x6AAAAAA0, - 0x5AAAAAA0, - 0x56AAAAA0, - 0x55AAAAA0, - 0x556AAAA0, - 0x555AAAA0, - 0x5556AAA0, - 0x5555AAA0, - 0x55556AA0, - 0x55555AA0, - 0x555556A0, - 0x555555A0, - 0x55555560, - 0x55555550, - 0xAAAAAAA8, - 0x6AAAAAA8, - 0x5AAAAAA8, - 0x56AAAAA8, - 0x55AAAAA8, - 0x556AAAA8, - 0x555AAAA8, - 0x5556AAA8, - 0x5555AAA8, - 0x55556AA8, - 0x55555AA8, - 0x555556A8, - 0x555555A8, - 0x55555568, - 0x55555558, - 0x55555554, - 0x6AAAAAAA, - 0x5AAAAAAA, - 0x56AAAAAA, - 0x55AAAAAA, - 0x556AAAAA, - 0x555AAAAA, - 0x5556AAAA, - 0x5555AAAA, - 0x55556AAA, - 0x55555AAA, - 0x555556AA, - 0x555555AA, - 0x5555556A, - 0x5555555A, - 0x55555556, - 0x55555555, - 0x55555555, - 0x55555555, - 0x55555555, - 0x55555555, - 0x55555555, - 0x55555555, - 0x55555555, - 0x55555555, - 0x55555555, - 0xC0000000, - 0xE0000000, - 0xF0000000, - 0x70000000, - 0xE8000000, - 0xF8000000, - 0x78000000, - 0xFC000000, - 0x7C000000, - 0x5C000000, - 0xEA000000, - 0xFA000000, - 0x7A000000, - 0xFE000000, - 0x7E000000, - 0x5E000000, - 0xFF000000, - 0x7F000000, - 0x5F000000, - 0x57000000, - 0xEA800000, - 0xFA800000, - 0x7A800000, - 0xFE800000, - 0x7E800000, - 0x5E800000, - 0xFF800000, - 0x7F800000, - 0x5F800000, - 0x57800000, - 0xFFC00000, - 0x7FC00000, - 0x5FC00000, - 0x57C00000, - 0x55C00000, - 0xEAA00000, - 0xFAA00000, - 0x7AA00000, - 0xFEA00000, - 0x7EA00000, - 0x5EA00000, - 0xFFA00000, - 0x7FA00000, - 0x5FA00000, - 0x57A00000, - 0xFFE00000, - 0x7FE00000, - 0x5FE00000, - 0x57E00000, - 0x55E00000, - 0xFFF00000, - 0x7FF00000, - 0x5FF00000, - 0x57F00000, - 0x55F00000, - 0x55700000, - 0xEAA80000, - 0xFAA80000, - 0x7AA80000, - 0xFEA80000, - 0x7EA80000, - 0x5EA80000, - 0xFFA80000, - 0x7FA80000, - 0x5FA80000, - 0x57A80000, - 0xFFE80000, - 0x7FE80000, - 0x5FE80000, - 0x57E80000, - 0x55E80000, - 0xFFF80000, - 0x7FF80000, - 0x5FF80000, - 0x57F80000, - 0x55F80000, - 0x55780000, - 0xFFFC0000, - 0x7FFC0000, - 0x5FFC0000, - 0x57FC0000, - 0x55FC0000, - 0x557C0000, - 0x555C0000, - 0xEAAA0000, - 0xFAAA0000, - 0x7AAA0000, - 0xFEAA0000, - 0x7EAA0000, - 0x5EAA0000, - 0xFFAA0000, - 0x7FAA0000, - 0x5FAA0000, - 0x57AA0000, - 0xFFEA0000, - 0x7FEA0000, - 0x5FEA0000, - 0x57EA0000, - 0x55EA0000, - 0xFFFA0000, - 0x7FFA0000, - 0x5FFA0000, - 0x57FA0000, - 0x55FA0000, - 0x557A0000, - 0xFFFE0000, - 0x7FFE0000, - 0x5FFE0000, - 0x57FE0000, - 0x55FE0000, - 0x557E0000, - 0x555E0000, - 0xFFFF0000, - 0x7FFF0000, - 0x5FFF0000, - 0x57FF0000, - 0x55FF0000, - 0x557F0000, - 0x555F0000, - 0x55570000, - 0xEAAA8000, - 0xFAAA8000, - 0x7AAA8000, - 0xFEAA8000, - 0x7EAA8000, - 0x5EAA8000, - 0xFFAA8000, - 0x7FAA8000, - 0x5FAA8000, - 0x57AA8000, - 0xFFEA8000, - 0x7FEA8000, - 0x5FEA8000, - 0x57EA8000, - 0x55EA8000, - 0xFFFA8000, - 0x7FFA8000, - 0x5FFA8000, - 0x57FA8000, - 0x55FA8000, - 0x557A8000, - 0xFFFE8000, - 0x7FFE8000, - 0x5FFE8000, - 0x57FE8000, - 0x55FE8000, - 0x557E8000, - 0x555E8000, - 0xFFFF8000, - 0x7FFF8000, - 0x5FFF8000, - 0x57FF8000, - 0x55FF8000, - 0x557F8000, - 0x555F8000, - 0x55578000, - 0xFFFFC000, - 0x7FFFC000, - 0x5FFFC000, - 0x57FFC000, - 0x55FFC000, - 0x557FC000, - 0x555FC000, - 0x5557C000, - 0x5555C000, - 0xEAAAA000, - 0xFAAAA000, - 0x7AAAA000, - 0xFEAAA000, - 0x7EAAA000, - 0x5EAAA000, - 0xFFAAA000, - 0x7FAAA000, - 0x5FAAA000, - 0x57AAA000, - 0xFFEAA000, - 0x7FEAA000, - 0x5FEAA000, - 0x57EAA000, - 0x55EAA000, - 0xFFFAA000, - 0x7FFAA000, - 0x5FFAA000, - 0x57FAA000, - 0x55FAA000, - 0x557AA000, - 0xFFFEA000, - 0x7FFEA000, - 0x5FFEA000, - 0x57FEA000, - 0x55FEA000, - 0x557EA000, - 0x555EA000, - 0xFFFFA000, - 0x7FFFA000, - 0x5FFFA000, - 0x57FFA000, - 0x55FFA000, - 0x557FA000, - 0x555FA000, - 0x5557A000, - 0xFFFFE000, - 0x7FFFE000, - 0x5FFFE000, - 0x57FFE000, - 0x55FFE000, - 0x557FE000, - 0x555FE000, - 0x5557E000, - 0x5555E000, - 0xFFFFF000, - 0x7FFFF000, - 0x5FFFF000, - 0x57FFF000, - 0x55FFF000, - 0x557FF000, - 0x555FF000, - 0x5557F000, - 0x5555F000, - 0x55557000, - 0xEAAAA800, - 0xFAAAA800, - 0x7AAAA800, - 0xFEAAA800, - 0x7EAAA800, - 0x5EAAA800, - 0xFFAAA800, - 0x7FAAA800, - 0x5FAAA800, - 0x57AAA800, - 0xFFEAA800, - 0x7FEAA800, - 0x5FEAA800, - 0x57EAA800, - 0x55EAA800, - 0xFFFAA800, - 0x7FFAA800, - 0x5FFAA800, - 0x57FAA800, - 0x55FAA800, - 0x557AA800, - 0xFFFEA800, - 0x7FFEA800, - 0x5FFEA800, - 0x57FEA800, - 0x55FEA800, - 0x557EA800, - 0x555EA800, - 0xFFFFA800, - 0x7FFFA800, - 0x5FFFA800, - 0x57FFA800, - 0x55FFA800, - 0x557FA800, - 0x555FA800, - 0x5557A800, - 0xFFFFE800, - 0x7FFFE800, - 0x5FFFE800, - 0x57FFE800, - 0x55FFE800, - 0x557FE800, - 0x555FE800, - 0x5557E800, - 0x5555E800, - 0xFFFFF800, - 0x7FFFF800, - 0x5FFFF800, - 0x57FFF800, - 0x55FFF800, - 0x557FF800, - 0x555FF800, - 0x5557F800, - 0x5555F800, - 0x55557800, - 0xFFFFFC00, - 0x7FFFFC00, - 0x5FFFFC00, - 0x57FFFC00, - 0x55FFFC00, - 0x557FFC00, - 0x555FFC00, - 0x5557FC00, - 0x5555FC00, - 0x55557C00, - 0x55555C00, - 0xEAAAAA00, - 0xFAAAAA00, - 0x7AAAAA00, - 0xFEAAAA00, - 0x7EAAAA00, - 0x5EAAAA00, - 0xFFAAAA00, - 0x7FAAAA00, - 0x5FAAAA00, - 0x57AAAA00, - 0xFFEAAA00, - 0x7FEAAA00, - 0x5FEAAA00, - 0x57EAAA00, - 0x55EAAA00, - 0xFFFAAA00, - 0x7FFAAA00, - 0x5FFAAA00, - 0x57FAAA00, - 0x55FAAA00, - 0x557AAA00, - 0xFFFEAA00, - 0x7FFEAA00, - 0x5FFEAA00, - 0x57FEAA00, - 0x55FEAA00, - 0x557EAA00, - 0x555EAA00, - 0xFFFFAA00, - 0x7FFFAA00, - 0x5FFFAA00, - 0x57FFAA00, - 0x55FFAA00, - 0x557FAA00, - 0x555FAA00, - 0x5557AA00, - 0xFFFFEA00, - 0x7FFFEA00, - 0x5FFFEA00, - 0x57FFEA00, - 0x55FFEA00, - 0x557FEA00, - 0x555FEA00, - 0x5557EA00, - 0x5555EA00, - 0xFFFFFA00, - 0x7FFFFA00, - 0x5FFFFA00, - 0x57FFFA00, - 0x55FFFA00, - 0x557FFA00, - 0x555FFA00, - 0x5557FA00, - 0x5555FA00, - 0x55557A00, - 0xFFFFFE00, - 0x7FFFFE00, - 0x5FFFFE00, - 0x57FFFE00, - 0x55FFFE00, - 0x557FFE00, - 0x555FFE00, - 0x5557FE00, - 0x5555FE00, - 0x55557E00, - 0x55555E00, - 0xFFFFFF00, - 0x7FFFFF00, - 0x5FFFFF00, - 0x57FFFF00, - 0x55FFFF00, - 0x557FFF00, - 0x555FFF00, - 0x5557FF00, - 0x5555FF00, - 0x55557F00, - 0x55555F00, - 0x55555700, - 0xEAAAAA80, - 0xFAAAAA80, - 0x7AAAAA80, - 0xFEAAAA80, - 0x7EAAAA80, - 0x5EAAAA80, - 0xFFAAAA80, - 0x7FAAAA80, - 0x5FAAAA80, - 0x57AAAA80, - 0xFFEAAA80, - 0x7FEAAA80, - 0x5FEAAA80, - 0x57EAAA80, - 0x55EAAA80, - 0xFFFAAA80, - 0x7FFAAA80, - 0x5FFAAA80, - 0x57FAAA80, - 0x55FAAA80, - 0x557AAA80, - 0xFFFEAA80, - 0x7FFEAA80, - 0x5FFEAA80, - 0x57FEAA80, - 0x55FEAA80, - 0x557EAA80, - 0x555EAA80, - 0xFFFFAA80, - 0x7FFFAA80, - 0x5FFFAA80, - 0x57FFAA80, - 0x55FFAA80, - 0x557FAA80, - 0x555FAA80, - 0x5557AA80, - 0xFFFFEA80, - 0x7FFFEA80, - 0x5FFFEA80, - 0x57FFEA80, - 0x55FFEA80, - 0x557FEA80, - 0x555FEA80, - 0x5557EA80, - 0x5555EA80, - 0xFFFFFA80, - 0x7FFFFA80, - 0x5FFFFA80, - 0x57FFFA80, - 0x55FFFA80, - 0x557FFA80, - 0x555FFA80, - 0x5557FA80, - 0x5555FA80, - 0x55557A80, - 0xFFFFFE80, - 0x7FFFFE80, - 0x5FFFFE80, - 0x57FFFE80, - 0x55FFFE80, - 0x557FFE80, - 0x555FFE80, - 0x5557FE80, - 0x5555FE80, - 0x55557E80, - 0x55555E80, - 0xFFFFFF80, - 0x7FFFFF80, - 0x5FFFFF80, - 0x57FFFF80, - 0x55FFFF80, - 0x557FFF80, - 0x555FFF80, - 0x5557FF80, - 0x5555FF80, - 0x55557F80, - 0x55555F80, - 0x55555780, - 0xFFFFFFC0, - 0x7FFFFFC0, - 0x5FFFFFC0, - 0x57FFFFC0, - 0x55FFFFC0, - 0x557FFFC0, - 0x555FFFC0, - 0x5557FFC0, - 0x5555FFC0, - 0x55557FC0, - 0x55555FC0, - 0x555557C0, - 0x555555C0, - 0xEAAAAAA0, - 0xFAAAAAA0, - 0x7AAAAAA0, - 0xFEAAAAA0, - 0x7EAAAAA0, - 0x5EAAAAA0, - 0xFFAAAAA0, - 0x7FAAAAA0, - 0x5FAAAAA0, - 0x57AAAAA0, - 0xFFEAAAA0, - 0x7FEAAAA0, - 0x5FEAAAA0, - 0x57EAAAA0, - 0x55EAAAA0, - 0xFFFAAAA0, - 0x7FFAAAA0, - 0x5FFAAAA0, - 0x57FAAAA0, - 0x55FAAAA0, - 0x557AAAA0, - 0xFFFEAAA0, - 0x7FFEAAA0, - 0x5FFEAAA0, - 0x57FEAAA0, - 0x55FEAAA0, - 0x557EAAA0, - 0x555EAAA0, - 0xFFFFAAA0, - 0x7FFFAAA0, - 0x5FFFAAA0, - 0x57FFAAA0, - 0x55FFAAA0, - 0x557FAAA0, - 0x555FAAA0, - 0x5557AAA0, - 0xFFFFEAA0, - 0x7FFFEAA0, - 0x5FFFEAA0, - 0x57FFEAA0, - 0x55FFEAA0, - 0x557FEAA0, - 0x555FEAA0, - 0x5557EAA0, - 0x5555EAA0, - 0xFFFFFAA0, - 0x7FFFFAA0, - 0x5FFFFAA0, - 0x57FFFAA0, - 0x55FFFAA0, - 0x557FFAA0, - 0x555FFAA0, - 0x5557FAA0, - 0x5555FAA0, - 0x55557AA0, - 0xFFFFFEA0, - 0x7FFFFEA0, - 0x5FFFFEA0, - 0x57FFFEA0, - 0x55FFFEA0, - 0x557FFEA0, - 0x555FFEA0, - 0x5557FEA0, - 0x5555FEA0, - 0x55557EA0, - 0x55555EA0, - 0xFFFFFFA0, - 0x7FFFFFA0, - 0x5FFFFFA0, - 0x57FFFFA0, - 0x55FFFFA0, - 0x557FFFA0, - 0x555FFFA0, - 0x5557FFA0, - 0x5555FFA0, - 0x55557FA0, - 0x55555FA0, - 0x555557A0, - 0xFFFFFFE0, - 0x7FFFFFE0, - 0x5FFFFFE0, - 0x57FFFFE0, - 0x55FFFFE0, - 0x557FFFE0, - 0x555FFFE0, - 0x5557FFE0, - 0x5555FFE0, - 0x55557FE0, - 0x55555FE0, - 0x555557E0, - 0x555555E0, - 0xFFFFFFF0, - 0x7FFFFFF0, - 0x5FFFFFF0, - 0x57FFFFF0, - 0x55FFFFF0, - 0x557FFFF0, - 0x555FFFF0, - 0x5557FFF0, - 0x5555FFF0, - 0x55557FF0, - 0x55555FF0, - 0x555557F0, - 0x555555F0, - 0x55555570, - 0xEAAAAAA8, - 0xFAAAAAA8, - 0x7AAAAAA8, - 0xFEAAAAA8, - 0x7EAAAAA8, - 0x5EAAAAA8, - 0xFFAAAAA8, - 0x7FAAAAA8, - 0x5FAAAAA8, - 0x57AAAAA8, - 0xFFEAAAA8, - 0x7FEAAAA8, - 0x5FEAAAA8, - 0x57EAAAA8, - 0x55EAAAA8, - 0xFFFAAAA8, - 0x7FFAAAA8, - 0x5FFAAAA8, - 0x57FAAAA8, - 0x55FAAAA8, - 0x557AAAA8, - 0xFFFEAAA8, - 0x7FFEAAA8, - 0x5FFEAAA8, - 0x57FEAAA8, - 0x55FEAAA8, - 0x557EAAA8, - 0x555EAAA8, - 0xFFFFAAA8, - 0x7FFFAAA8, - 0x5FFFAAA8, - 0x57FFAAA8, - 0x55FFAAA8, - 0x557FAAA8, - 0x555FAAA8, - 0x5557AAA8, - 0xFFFFEAA8, - 0x7FFFEAA8, - 0x5FFFEAA8, - 0x57FFEAA8, - 0x55FFEAA8, - 0x557FEAA8, - 0x555FEAA8, - 0x5557EAA8, - 0x5555EAA8, - 0xFFFFFAA8, - 0x7FFFFAA8, - 0x5FFFFAA8, - 0x57FFFAA8, - 0x55FFFAA8, - 0x557FFAA8, - 0x555FFAA8, - 0x5557FAA8, - 0x5555FAA8, - 0x55557AA8, - 0xFFFFFEA8, - 0x7FFFFEA8, - 0x5FFFFEA8, - 0x57FFFEA8, - 0x55FFFEA8, - 0x557FFEA8, - 0x555FFEA8, - 0x5557FEA8, - 0x5555FEA8, - 0x55557EA8, - 0x55555EA8, - 0xFFFFFFA8, - 0x7FFFFFA8, - 0x5FFFFFA8, - 0x57FFFFA8, - 0x55FFFFA8, - 0x557FFFA8, - 0x555FFFA8, - 0x5557FFA8, - 0x5555FFA8, - 0x55557FA8, - 0x55555FA8, - 0x555557A8, - 0xFFFFFFE8, - 0x7FFFFFE8, - 0x5FFFFFE8, - 0x57FFFFE8, - 0x55FFFFE8, - 0x557FFFE8, - 0x555FFFE8, - 0x5557FFE8, - 0x5555FFE8, - 0x55557FE8, - 0x55555FE8, - 0x555557E8, - 0x555555E8, - 0xFFFFFFF8, - 0x7FFFFFF8, - 0x5FFFFFF8, - 0x57FFFFF8, - 0x55FFFFF8, - 0x557FFFF8, - 0x555FFFF8, - 0x5557FFF8, - 0x5555FFF8, - 0x55557FF8, - 0x55555FF8, - 0x555557F8, - 0x555555F8, - 0x55555578, - 0xFFFFFFFC, - 0x7FFFFFFC, - 0x5FFFFFFC, - 0x57FFFFFC, - 0x55FFFFFC, - 0x557FFFFC, - 0x555FFFFC, - 0x5557FFFC, - 0x5555FFFC, - 0x55557FFC, - 0x55555FFC, - 0x555557FC, - 0x555555FC, - 0x5555557C, - 0x5555555C, - 0xEAAAAAAA, - 0xFAAAAAAA, - 0x7AAAAAAA, - 0xFEAAAAAA, - 0x7EAAAAAA, - 0x5EAAAAAA, - 0xFFAAAAAA, - 0x7FAAAAAA, - 0x5FAAAAAA, - 0x57AAAAAA, - 0xFFEAAAAA, - 0x7FEAAAAA, - 0x5FEAAAAA, - 0x57EAAAAA, - 0x55EAAAAA, - 0xFFFAAAAA, - 0x7FFAAAAA, - 0x5FFAAAAA, - 0x57FAAAAA, - 0x55FAAAAA, - 0x557AAAAA, - 0xFFFEAAAA, - 0x7FFEAAAA, - 0x5FFEAAAA, - 0x57FEAAAA, - 0x55FEAAAA, - 0x557EAAAA, - 0x555EAAAA, - 0xFFFFAAAA, - 0x7FFFAAAA, - 0x5FFFAAAA, - 0x57FFAAAA, - 0x55FFAAAA, - 0x557FAAAA, - 0x555FAAAA, - 0x5557AAAA, - 0xFFFFEAAA, - 0x7FFFEAAA, - 0x5FFFEAAA, - 0x57FFEAAA, - 0x55FFEAAA, - 0x557FEAAA, - 0x555FEAAA, - 0x5557EAAA, - 0x5555EAAA, - 0xFFFFFAAA, - 0x7FFFFAAA, - 0x5FFFFAAA, - 0x57FFFAAA, - 0x55FFFAAA, - 0x557FFAAA, - 0x555FFAAA, - 0x5557FAAA, - 0x5555FAAA, - 0x55557AAA, - 0xFFFFFEAA, - 0x7FFFFEAA, - 0x5FFFFEAA, - 0x57FFFEAA, - 0x55FFFEAA, - 0x557FFEAA, - 0x555FFEAA, - 0x5557FEAA, - 0x5555FEAA, - 0x55557EAA, - 0x55555EAA, - 0xFFFFFFAA, - 0x7FFFFFAA, - 0x5FFFFFAA, - 0x57FFFFAA, - 0x55FFFFAA, - 0x557FFFAA, - 0x555FFFAA, - 0x5557FFAA, - 0x5555FFAA, - 0x55557FAA, - 0x55555FAA, - 0x555557AA, - 0xFFFFFFEA, - 0x7FFFFFEA, - 0x5FFFFFEA, - 0x57FFFFEA, - 0x55FFFFEA, - 0x557FFFEA, - 0x555FFFEA, - 0x5557FFEA, - 0x5555FFEA, - 0x55557FEA, - 0x55555FEA, - 0x555557EA, - 0x555555EA, - 0xFFFFFFFA, - 0x7FFFFFFA, - 0x5FFFFFFA, - 0x57FFFFFA, - 0x55FFFFFA, - 0x557FFFFA, - 0x555FFFFA, - 0x5557FFFA, - 0x5555FFFA, - 0x55557FFA, - 0x55555FFA, - 0x555557FA, - 0x555555FA, - 0x5555557A, - 0xFFFFFFFE, - 0x7FFFFFFE, - 0x5FFFFFFE, - 0x57FFFFFE, - 0x55FFFFFE, - 0x557FFFFE, - 0x555FFFFE, - 0x5557FFFE, - 0x5555FFFE, - 0x55557FFE, - 0x55555FFE, - 0x555557FE, - 0x555555FE, - 0x5555557E, - 0x5555555E, - 0x7FFFFFFF, - 0x5FFFFFFF, - 0x57FFFFFF, - 0x55FFFFFF, - 0x557FFFFF, - 0x555FFFFF, - 0x5557FFFF, - 0x5555FFFF, - 0x55557FFF, - 0x55555FFF, - 0x555557FF, - 0x555555FF, - 0x5555557F, - 0x5555555F, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, - 0x55555557, -}; Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CompressKernel.cu =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CompressKernel.cu +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CompressKernel.cu @@ -1,1122 +1,2022 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include -#include - -#include "CudaMath.h" - -#include "../SingleColorLookup.h" - -#define NUM_THREADS 64 // Number of threads per block. - -#if __DEVICE_EMULATION__ -#define __debugsync() __syncthreads() -#else -#define __debugsync() -#endif - -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; - -template -__device__ inline void swap(T & a, T & b) -{ - T tmp = a; - a = b; - b = tmp; -} - -__constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f }; -__constant__ float3 kColorMetricSqr = { 1.0f, 1.0f, 1.0f }; - - - -//////////////////////////////////////////////////////////////////////////////// -// Sort colors -//////////////////////////////////////////////////////////////////////////////// -__device__ void sortColors(const float * values, int * cmp) -{ - int tid = threadIdx.x; - -#if 1 - cmp[tid] = (values[0] < values[tid]); - cmp[tid] += (values[1] < values[tid]); - cmp[tid] += (values[2] < values[tid]); - cmp[tid] += (values[3] < values[tid]); - cmp[tid] += (values[4] < values[tid]); - cmp[tid] += (values[5] < values[tid]); - cmp[tid] += (values[6] < values[tid]); - cmp[tid] += (values[7] < values[tid]); - cmp[tid] += (values[8] < values[tid]); - cmp[tid] += (values[9] < values[tid]); - cmp[tid] += (values[10] < values[tid]); - cmp[tid] += (values[11] < values[tid]); - cmp[tid] += (values[12] < values[tid]); - cmp[tid] += (values[13] < values[tid]); - cmp[tid] += (values[14] < values[tid]); - cmp[tid] += (values[15] < values[tid]); - - // Resolve elements with the same index. - if (tid > 0 && cmp[tid] == cmp[0]) ++cmp[tid]; - if (tid > 1 && cmp[tid] == cmp[1]) ++cmp[tid]; - if (tid > 2 && cmp[tid] == cmp[2]) ++cmp[tid]; - if (tid > 3 && cmp[tid] == cmp[3]) ++cmp[tid]; - if (tid > 4 && cmp[tid] == cmp[4]) ++cmp[tid]; - if (tid > 5 && cmp[tid] == cmp[5]) ++cmp[tid]; - if (tid > 6 && cmp[tid] == cmp[6]) ++cmp[tid]; - if (tid > 7 && cmp[tid] == cmp[7]) ++cmp[tid]; - if (tid > 8 && cmp[tid] == cmp[8]) ++cmp[tid]; - if (tid > 9 && cmp[tid] == cmp[9]) ++cmp[tid]; - if (tid > 10 && cmp[tid] == cmp[10]) ++cmp[tid]; - if (tid > 11 && cmp[tid] == cmp[11]) ++cmp[tid]; - if (tid > 12 && cmp[tid] == cmp[12]) ++cmp[tid]; - if (tid > 13 && cmp[tid] == cmp[13]) ++cmp[tid]; - if (tid > 14 && cmp[tid] == cmp[14]) ++cmp[tid]; -#else - - cmp[tid] = 0; - - #pragma unroll - for (int i = 0; i < 16; i++) - { - cmp[tid] += (values[i] < values[tid]); - } - - // Resolve elements with the same index. - #pragma unroll - for (int i = 0; i < 15; i++) - { - if (tid > 0 && cmp[tid] == cmp[i]) ++cmp[tid]; - } -#endif -} - - -//////////////////////////////////////////////////////////////////////////////// -// Load color block to shared mem -//////////////////////////////////////////////////////////////////////////////// -__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor) -{ - const int bid = blockIdx.x; - const int idx = threadIdx.x; - - __shared__ float dps[16]; - - if (idx < 16) - { - // Read color and copy to shared mem. - uint c = image[(bid) * 16 + idx]; - - colors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f); - colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f); - colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f); - - // No need to synchronize, 16 < warp size. -#if __DEVICE_EMULATION__ - } __debugsync(); if (idx < 16) { -#endif - - // Sort colors along the best fit line. - colorSums(colors, sums); - float3 axis = bestFitLine(colors, sums[0], kColorMetric); - - *sameColor = (axis == make_float3(0, 0, 0)); - - dps[idx] = dot(colors[idx], axis); - -#if __DEVICE_EMULATION__ - } __debugsync(); if (idx < 16) { -#endif - - sortColors(dps, xrefs); - - float3 tmp = colors[idx]; - colors[xrefs[idx]] = tmp; - } -} - -__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor) -{ - const int bid = blockIdx.x; - const int idx = threadIdx.x; - - __shared__ float3 rawColors[16]; - __shared__ float dps[16]; - - if (idx < 16) - { - // Read color and copy to shared mem. - uint c = image[(bid) * 16 + idx]; - - rawColors[idx].z = ((c >> 0) & 0xFF) * (1.0f / 255.0f); - rawColors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f); - rawColors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f); - weights[idx] = (((c >> 24) & 0xFF) + 1) * (1.0f / 256.0f); - - colors[idx] = rawColors[idx] * weights[idx]; - - - // No need to synchronize, 16 < warp size. -#if __DEVICE_EMULATION__ - } __debugsync(); if (idx < 16) { -#endif - - // Sort colors along the best fit line. - colorSums(colors, sums); - float3 axis = bestFitLine(colors, sums[0], kColorMetric); - - *sameColor = (axis == make_float3(0, 0, 0)); - - // Single color compressor needs unweighted colors. - if (*sameColor) colors[idx] = rawColors[idx]; - - dps[idx] = dot(rawColors[idx], axis); - -#if __DEVICE_EMULATION__ - } __debugsync(); if (idx < 16) { -#endif - - sortColors(dps, xrefs); - - float3 tmp = colors[idx]; - colors[xrefs[idx]] = tmp; - - float w = weights[idx]; - weights[xrefs[idx]] = w; - } -} - - -//////////////////////////////////////////////////////////////////////////////// -// Round color to RGB565 and expand -//////////////////////////////////////////////////////////////////////////////// -inline __device__ float3 roundAndExpand565(float3 v, ushort * w) -{ - v.x = rintf(__saturatef(v.x) * 31.0f); - v.y = rintf(__saturatef(v.y) * 63.0f); - v.z = rintf(__saturatef(v.z) * 31.0f); - *w = ((ushort)v.x << 11) | ((ushort)v.y << 5) | (ushort)v.z; - v.x *= 0.03227752766457f; // approximate integer bit expansion. - v.y *= 0.01583151765563f; - v.z *= 0.03227752766457f; - return v; -} - - -//////////////////////////////////////////////////////////////////////////////// -// Evaluate permutations -//////////////////////////////////////////////////////////////////////////////// -__device__ float evalPermutation4(const float3 * colors, uint permutation, ushort * start, ushort * end) -{ - // Compute endpoints using least squares. - float alpha2_sum = 0.0f; - float beta2_sum = 0.0f; - float alphabeta_sum = 0.0f; - float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); - float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f); - - // Compute alpha & beta for this permutation. - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - float beta = (bits & 1); - if (bits & 2) beta = (1 + beta) / 3.0f; - float alpha = 1.0f - beta; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * colors[i]; - betax_sum += beta * colors[i]; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - // Round a, b to the closest 5-6-5 color and expand... - a = roundAndExpand565(a, start); - b = roundAndExpand565(b, end); - - // compute the error - float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - - return dot(e, kColorMetricSqr); -} - -__device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end) -{ - // Compute endpoints using least squares. - float alpha2_sum = 0.0f; - float beta2_sum = 0.0f; - float alphabeta_sum = 0.0f; - float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); - float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f); - - // Compute alpha & beta for this permutation. - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - float beta = (bits & 1); - if (bits & 2) beta = 0.5f; - float alpha = 1.0f - beta; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * colors[i]; - betax_sum += beta * colors[i]; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - // Round a, b to the closest 5-6-5 color and expand... - a = roundAndExpand565(a, start); - b = roundAndExpand565(b, end); - - // compute the error - float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - - return dot(e, kColorMetricSqr); -} - -__constant__ const float alphaTable4[4] = { 9.0f, 0.0f, 6.0f, 3.0f }; -__constant__ const float alphaTable3[4] = { 4.0f, 0.0f, 2.0f, 2.0f }; -__constant__ const uint prods4[4] = { 0x090000,0x000900,0x040102,0x010402 }; -__constant__ const uint prods3[4] = { 0x040000,0x000400,0x040101,0x010401 }; - -__device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end) -{ - // Compute endpoints using least squares. - float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); - uint akku = 0; - - // Compute alpha & beta for this permutation. - #pragma unroll - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - alphax_sum += alphaTable4[bits & 3] * colors[i]; - akku += prods4[bits & 3]; - } - - float alpha2_sum = float(akku >> 16); - float beta2_sum = float((akku >> 8) & 0xff); - float alphabeta_sum = float(akku & 0xff); - float3 betax_sum = 9.0f * color_sum - alphax_sum; - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - // Round a, b to the closest 5-6-5 color and expand... - a = roundAndExpand565(a, start); - b = roundAndExpand565(b, end); - - // compute the error - float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - - return (1.0f / 9.0f) * dot(e, kColorMetricSqr); -} - -__device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end) -{ - // Compute endpoints using least squares. - float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); - uint akku = 0; - - // Compute alpha & beta for this permutation. - #pragma unroll - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - alphax_sum += alphaTable3[bits & 3] * colors[i]; - akku += prods3[bits & 3]; - } - - float alpha2_sum = float(akku >> 16); - float beta2_sum = float((akku >> 8) & 0xff); - float alphabeta_sum = float(akku & 0xff); - float3 betax_sum = 4.0f * color_sum - alphax_sum; - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - // Round a, b to the closest 5-6-5 color and expand... - a = roundAndExpand565(a, start); - b = roundAndExpand565(b, end); - - // compute the error - float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - - return (1.0f / 4.0f) * dot(e, kColorMetricSqr); -} - -__device__ float evalPermutation4(const float3 * colors, const float * weights, float3 color_sum, uint permutation, ushort * start, ushort * end) -{ - // Compute endpoints using least squares. - float alpha2_sum = 0.0f; - float beta2_sum = 0.0f; - float alphabeta_sum = 0.0f; - float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); - - // Compute alpha & beta for this permutation. - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - float beta = (bits & 1); - if (bits & 2) beta = (1 + beta) / 3.0f; - float alpha = 1.0f - beta; - - alpha2_sum += alpha * alpha * weights[i]; - beta2_sum += beta * beta * weights[i]; - alphabeta_sum += alpha * beta * weights[i]; - alphax_sum += alpha * colors[i]; - } - - float3 betax_sum = color_sum - alphax_sum; - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - // Round a, b to the closest 5-6-5 color and expand... - a = roundAndExpand565(a, start); - b = roundAndExpand565(b, end); - - // compute the error - float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - - return dot(e, kColorMetricSqr); -} - -/* -__device__ float evalPermutation3(const float3 * colors, const float * weights, uint permutation, ushort * start, ushort * end) -{ - // Compute endpoints using least squares. - float alpha2_sum = 0.0f; - float beta2_sum = 0.0f; - float alphabeta_sum = 0.0f; - float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); - - // Compute alpha & beta for this permutation. - for (int i = 0; i < 16; i++) - { - const uint bits = permutation >> (2*i); - - float beta = (bits & 1); - if (bits & 2) beta = 0.5f; - float alpha = 1.0f - beta; - - alpha2_sum += alpha * alpha * weights[i]; - beta2_sum += beta * beta * weights[i]; - alphabeta_sum += alpha * beta * weights[i]; - alphax_sum += alpha * colors[i]; - } - - float3 betax_sum = color_sum - alphax_sum; - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - // Round a, b to the closest 5-6-5 color and expand... - a = roundAndExpand565(a, start); - b = roundAndExpand565(b, end); - - // compute the error - float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); - - return dot(e, kColorMetricSqr); -} -*/ - - -//////////////////////////////////////////////////////////////////////////////// -// Evaluate all permutations -//////////////////////////////////////////////////////////////////////////////// -__device__ void evalAllPermutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) -{ - const int idx = threadIdx.x; - - float bestError = FLT_MAX; - - __shared__ uint s_permutations[160]; - - for(int i = 0; i < 16; i++) - { - int pidx = idx + NUM_THREADS * i; - if (pidx >= 992) break; - - ushort start, end; - uint permutation = permutations[pidx]; - if (pidx < 160) s_permutations[pidx] = permutation; - - float error = evalPermutation4(colors, colorSum, permutation, &start, &end); - - if (error < bestError) - { - bestError = error; - bestPermutation = permutation; - bestStart = start; - bestEnd = end; - } - } - - if (bestStart < bestEnd) - { - swap(bestEnd, bestStart); - bestPermutation ^= 0x55555555; // Flip indices. - } - - for(int i = 0; i < 3; i++) - { - int pidx = idx + NUM_THREADS * i; - if (pidx >= 160) break; - - ushort start, end; - uint permutation = s_permutations[pidx]; - float error = evalPermutation3(colors, colorSum, permutation, &start, &end); - - if (error < bestError) - { - bestError = error; - bestPermutation = permutation; - bestStart = start; - bestEnd = end; - - if (bestStart > bestEnd) - { - swap(bestEnd, bestStart); - bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices. - } - } - } - - errors[idx] = bestError; -} - -/* -__device__ void evalAllPermutations(const float3 * colors, const float * weights, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) -{ - const int idx = threadIdx.x; - - float bestError = FLT_MAX; - - __shared__ uint s_permutations[160]; - - for(int i = 0; i < 16; i++) - { - int pidx = idx + NUM_THREADS * i; - if (pidx >= 992) break; - - ushort start, end; - uint permutation = permutations[pidx]; - if (pidx < 160) s_permutations[pidx] = permutation; - - float error = evalPermutation4(colors, weights, permutation, &start, &end); - - if (error < bestError) - { - bestError = error; - bestPermutation = permutation; - bestStart = start; - bestEnd = end; - } - } - - if (bestStart < bestEnd) - { - swap(bestEnd, bestStart); - bestPermutation ^= 0x55555555; // Flip indices. - } - - for(int i = 0; i < 3; i++) - { - int pidx = idx + NUM_THREADS * i; - if (pidx >= 160) break; - - ushort start, end; - uint permutation = s_permutations[pidx]; - float error = evalPermutation3(colors, weights, permutation, &start, &end); - - if (error < bestError) - { - bestError = error; - bestPermutation = permutation; - bestStart = start; - bestEnd = end; - - if (bestStart > bestEnd) - { - swap(bestEnd, bestStart); - bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices. - } - } - } - - errors[idx] = bestError; -} -*/ - -__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) -{ - const int idx = threadIdx.x; - - float bestError = FLT_MAX; - - for(int i = 0; i < 16; i++) - { - int pidx = idx + NUM_THREADS * i; - if (pidx >= 992) break; - - ushort start, end; - uint permutation = permutations[pidx]; - - float error = evalPermutation4(colors, colorSum, permutation, &start, &end); - - if (error < bestError) - { - bestError = error; - bestPermutation = permutation; - bestStart = start; - bestEnd = end; - } - } - - if (bestStart < bestEnd) - { - swap(bestEnd, bestStart); - bestPermutation ^= 0x55555555; // Flip indices. - } - - errors[idx] = bestError; -} - -__device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) -{ - const int idx = threadIdx.x; - - float bestError = FLT_MAX; - - for(int i = 0; i < 16; i++) - { - int pidx = idx + NUM_THREADS * i; - if (pidx >= 992) break; - - ushort start, end; - uint permutation = permutations[pidx]; - - float error = evalPermutation4(colors, weights, colorSum, permutation, &start, &end); - - if (error < bestError) - { - bestError = error; - bestPermutation = permutation; - bestStart = start; - bestEnd = end; - } - } - - if (bestStart < bestEnd) - { - swap(bestEnd, bestStart); - bestPermutation ^= 0x55555555; // Flip indices. - } - - errors[idx] = bestError; -} - - -//////////////////////////////////////////////////////////////////////////////// -// Find index with minimum error -//////////////////////////////////////////////////////////////////////////////// -__device__ int findMinError(float * errors) -{ - const int idx = threadIdx.x; - - __shared__ int indices[NUM_THREADS]; - indices[idx] = idx; - -#if __DEVICE_EMULATION__ - for(int d = NUM_THREADS/2; d > 0; d >>= 1) - { - __syncthreads(); - - if (idx < d) - { - float err0 = errors[idx]; - float err1 = errors[idx + d]; - - if (err1 < err0) { - errors[idx] = err1; - indices[idx] = indices[idx + d]; - } - } - } - -#else - for(int d = NUM_THREADS/2; d > 32; d >>= 1) - { - __syncthreads(); - - if (idx < d) - { - float err0 = errors[idx]; - float err1 = errors[idx + d]; - - if (err1 < err0) { - errors[idx] = err1; - indices[idx] = indices[idx + d]; - } - } - } - - __syncthreads(); - - // unroll last 6 iterations - if (idx < 32) - { - if (errors[idx + 32] < errors[idx]) { - errors[idx] = errors[idx + 32]; - indices[idx] = indices[idx + 32]; - } - if (errors[idx + 16] < errors[idx]) { - errors[idx] = errors[idx + 16]; - indices[idx] = indices[idx + 16]; - } - if (errors[idx + 8] < errors[idx]) { - errors[idx] = errors[idx + 8]; - indices[idx] = indices[idx + 8]; - } - if (errors[idx + 4] < errors[idx]) { - errors[idx] = errors[idx + 4]; - indices[idx] = indices[idx + 4]; - } - if (errors[idx + 2] < errors[idx]) { - errors[idx] = errors[idx + 2]; - indices[idx] = indices[idx + 2]; - } - if (errors[idx + 1] < errors[idx]) { - errors[idx] = errors[idx + 1]; - indices[idx] = indices[idx + 1]; - } - } -#endif - - __syncthreads(); - - return indices[0]; -} - - -//////////////////////////////////////////////////////////////////////////////// -// Save DXT block -//////////////////////////////////////////////////////////////////////////////// -__device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result) -{ - const int bid = blockIdx.x; - - if (start == end) - { - permutation = 0; - } - - // Reorder permutation. - uint indices = 0; - for(int i = 0; i < 16; i++) - { - int ref = xrefs[i]; - indices |= ((permutation >> (2 * ref)) & 3) << (2 * i); - } - - // Write endpoints. - result[bid].x = (end << 16) | start; - - // Write palette indices. - result[bid].y = indices; -} - -__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result) -{ - const int bid = blockIdx.x; - - int r = color.x * 255; - int g = color.y * 255; - int b = color.z * 255; - - ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0]; - ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1]; - - if (color0 < color1) - { - result[bid].x = (color0 << 16) | color1; - result[bid].y = 0xffffffff; - } - else - { - result[bid].x = (color1 << 16) | color0; - result[bid].y = 0xaaaaaaaa; - } -} - - -//////////////////////////////////////////////////////////////////////////////// -// Compress color block -//////////////////////////////////////////////////////////////////////////////// -__global__ void compressDXT1(const uint * permutations, const uint * image, uint2 * result) -{ - __shared__ float3 colors[16]; - __shared__ float3 sums[16]; - __shared__ int xrefs[16]; - __shared__ int sameColor; - - loadColorBlock(image, colors, sums, xrefs, &sameColor); - - __syncthreads(); - - if (sameColor) - { - if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); - return; - } - - ushort bestStart, bestEnd; - uint bestPermutation; - - __shared__ float errors[NUM_THREADS]; - - evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); - - // Use a parallel reduction to find minimum error. - const int minIdx = findMinError(errors); - - // Only write the result of the winner thread. - if (threadIdx.x == minIdx) - { - saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result); - } -} - -__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result) -{ - __shared__ float3 colors[16]; - __shared__ float3 sums[16]; - __shared__ int xrefs[16]; - __shared__ int sameColor; - - loadColorBlock(image, colors, sums, xrefs, &sameColor); - - __syncthreads(); - - if (sameColor) - { - if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); - return; - } - - ushort bestStart, bestEnd; - uint bestPermutation; - - __shared__ float errors[NUM_THREADS]; - - evalLevel4Permutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); - - // Use a parallel reduction to find minimum error. - const int minIdx = findMinError(errors); - - // Only write the result of the winner thread. - if (threadIdx.x == minIdx) - { - saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result); - } -} - -__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result) -{ - __shared__ float3 colors[16]; - __shared__ float3 sums[16]; - __shared__ float weights[16]; - __shared__ int xrefs[16]; - __shared__ int sameColor; - - loadColorBlock(image, colors, sums, weights, xrefs, &sameColor); - - __syncthreads(); - - if (sameColor) - { - if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); - return; - } - - ushort bestStart, bestEnd; - uint bestPermutation; - - __shared__ float errors[NUM_THREADS]; - - evalLevel4Permutations(colors, weights, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); - - // Use a parallel reduction to find minimum error. - int minIdx = findMinError(errors); - - // Only write the result of the winner thread. - if (threadIdx.x == minIdx) - { - saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result); - } -} - - -/* -__device__ float computeError(const float weights[16], uchar a0, uchar a1) -{ - float palette[6]; - palette[0] = (6.0f/7.0f * a0 + 1.0f/7.0f * a1); - palette[1] = (5.0f/7.0f * a0 + 2.0f/7.0f * a1); - palette[2] = (4.0f/7.0f * a0 + 3.0f/7.0f * a1); - palette[3] = (3.0f/7.0f * a0 + 4.0f/7.0f * a1); - palette[4] = (2.0f/7.0f * a0 + 5.0f/7.0f * a1); - palette[5] = (1.0f/7.0f * a0 + 6.0f/7.0f * a1); - - float total = 0.0f; - - for (uint i = 0; i < 16; i++) - { - float alpha = weights[i]; - - float error = a0 - alpha; - error = min(error, palette[0] - alpha); - error = min(error, palette[1] - alpha); - error = min(error, palette[2] - alpha); - error = min(error, palette[3] - alpha); - error = min(error, palette[4] - alpha); - error = min(error, palette[5] - alpha); - error = min(error, a1 - alpha); - - total += error; - } - - return total; -} - -inline __device__ uchar roundAndExpand(float a) -{ - return rintf(__saturatef(a) * 255.0f); -} -*/ -/* -__device__ void optimizeAlpha8(const float alphas[16], uchar & a0, uchar & a1) -{ - float alpha2_sum = 0; - float beta2_sum = 0; - float alphabeta_sum = 0; - float alphax_sum = 0; - float betax_sum = 0; - - for (int i = 0; i < 16; i++) - { - uint idx = index[i]; - float alpha; - if (idx < 2) alpha = 1.0f - idx; - else alpha = (8.0f - idx) / 7.0f; - - float beta = 1 - alpha; - - alpha2_sum += alpha * alpha; - beta2_sum += beta * beta; - alphabeta_sum += alpha * beta; - alphax_sum += alpha * alphas[i]; - betax_sum += beta * alphas[i]; - } - - const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; - float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; - - a0 = roundAndExpand8(a); - a1 = roundAndExpand8(b); -} -*/ -/* -__device__ void compressAlpha(const float alphas[16], uint4 * result) -{ - const int tid = threadIdx.x; - - // Compress alpha block! - // Brute force approach: - // Try all color pairs: 256*256/2 = 32768, 32768/64 = 512 iterations? - - // Determine min & max alphas - - float A0, A1; - - if (tid < 16) - { - __shared__ uint s_alphas[16]; - - s_alphas[tid] = alphas[tid]; - s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^8]); - s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^4]); - s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^2]); - s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^1]); - A0 = s_alphas[tid]; - - s_alphas[tid] = alphas[tid]; - s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^8]); - s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^4]); - s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^2]); - s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^1]); - A1 = s_alphas[tid]; - } - - __syncthreads(); - - int minIdx = 0; - - if (A1 - A0 > 8) - { - float bestError = FLT_MAX; - - // 64 threads -> 8x8 - // divide [A1-A0] in partitions. - // test endpoints - - for (int i = 0; i < 128; i++) - { - uint idx = (i * NUM_THREADS + tid) * 4; - uchar a0 = idx & 255; - uchar a1 = idx >> 8; - - float error = computeError(alphas, a0, a1); - - if (error < bestError) - { - bestError = error; - A0 = a0; - A1 = a1; - } - } - - __shared__ float errors[NUM_THREADS]; - errors[tid] = bestError; - - // Minimize error. - minIdx = findMinError(errors); - - } - - if (minIdx == tid) - { - // @@ Compute indices. - - // @@ Write alpha block. - } -} - -__global__ void compressDXT5(const uint * permutations, const uint * image, uint4 * result) -{ - __shared__ float3 colors[16]; - __shared__ float3 sums[16]; - __shared__ float weights[16]; - __shared__ int xrefs[16]; - - loadColorBlock(image, colors, sums, weights, xrefs); - - __syncthreads(); - - compressAlpha(weights, result); - - ushort bestStart, bestEnd; - uint bestPermutation; - - __shared__ float errors[NUM_THREADS]; - - evalLevel4Permutations(colors, weights, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); - - // Use a parallel reduction to find minimum error. - int minIdx = findMinError(errors); - - // Only write the result of the winner thread. - if (threadIdx.x == minIdx) - { - saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, (uint2 *)result); - } -} -*/ - -//////////////////////////////////////////////////////////////////////////////// -// Setup kernel -//////////////////////////////////////////////////////////////////////////////// - -extern "C" void setupCompressKernel(const float weights[3]) -{ - // Set constants. - cudaMemcpyToSymbol(kColorMetric, weights, sizeof(float) * 3, 0); - - float weightsSqr[3]; - weightsSqr[0] = weights[0] * weights[0]; - weightsSqr[1] = weights[1] * weights[1]; - weightsSqr[2] = weights[2] * weights[2]; - - cudaMemcpyToSymbol(kColorMetricSqr, weightsSqr, sizeof(float) * 3, 0); -} - - -//////////////////////////////////////////////////////////////////////////////// -// Launch kernel -//////////////////////////////////////////////////////////////////////////////// - -extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) -{ - compressDXT1<<>>(d_bitmaps, d_data, (uint2 *)d_result); -} - -extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) -{ - compressLevel4DXT1<<>>(d_bitmaps, d_data, (uint2 *)d_result); -} - -extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) -{ - compressWeightedDXT1<<>>(d_bitmaps, d_data, (uint2 *)d_result); -} +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include +#include // FLT_MAX + +#include "CudaMath.h" + + +#define NUM_THREADS 64 // Number of threads per block. + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +template +__device__ inline void swap(T & a, T & b) +{ + T tmp = a; + a = b; + b = tmp; +} + +__constant__ uchar OMatch5[256][2]; +__constant__ uchar OMatch6[256][2]; + +__constant__ float3 kColorMetric = { 1.0f, 1.0f, 1.0f }; +__constant__ float3 kColorMetricSqr = { 1.0f, 1.0f, 1.0f }; + +// Some kernels read the input through texture. +texture tex; + + +//////////////////////////////////////////////////////////////////////////////// +// Color helpers +//////////////////////////////////////////////////////////////////////////////// + +__device__ inline uint float_to_u8(float value) +{ + return min(max(__float2int_rn((255 * value + 0.5f) / (1.0f + 1.0f/255.0f)), 0), 255); +} + +__device__ inline uint float_to_u6(float value) +{ + return min(max(__float2int_rn((63 * value + 0.5f) / (1.0f + 1.0f/63.0f)), 0), 63); +} + +__device__ inline uint float_to_u5(float value) +{ + return min(max(__float2int_rn((31 * value + 0.5f) / (1.0f + 1.0f/31.0f)), 0), 31); +} + +__device__ inline float u8_to_float(uint value) +{ + return __saturatef(__uint2float_rn(value) / 255.0f); + //return (value) / 255.0f; +} + +__device__ float3 color32ToFloat3(uint c) +{ + float3 color; + color.z = u8_to_float((c >> 0) & 0xFF); + color.y = u8_to_float((c >> 8) & 0xFF); + color.x = u8_to_float((c >> 16) & 0xFF); + return color; +} + +__device__ int3 color16ToInt3(ushort c) +{ + int3 color; + + color.z = ((c >> 0) & 0x1F); + color.z = (color.z << 3) | (color.z >> 2); + + color.y = ((c >> 5) & 0x3F); + color.y = (color.y << 2) | (color.y >> 4); + + color.x = ((c >> 11) & 0x1F); + color.x = (color.x << 3) | (color.x >> 2); + + return color; +} + +__device__ float3 color16ToFloat3(ushort c) +{ + int3 color = color16ToInt3(c); + return make_float3(color.x, color.y, color.z) * (1.0f / 255.0f); +} + +__device__ int3 float3ToInt3(float3 c) +{ + return make_int3(c.x * 255, c.y * 255, c.z * 255); +} + +__device__ float3 int3ToFloat3(int3 c) +{ + return make_float3(float_to_u8(c.x), float_to_u8(c.y), float_to_u8(c.z)); +} + + +__device__ int colorDistance(int3 c0, int3 c1) +{ + int dx = c0.x-c1.x; + int dy = c0.y-c1.y; + int dz = c0.z-c1.z; + return __mul24(dx, dx) + __mul24(dy, dy) + __mul24(dz, dz); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Round color to RGB565 and expand +//////////////////////////////////////////////////////////////////////////////// + + +#if 0 +__device__ inline uint float_to_u8(float value) +{ + //uint result; + //asm("cvt.sat.rni.u8.f32 %0, %1;" : "=r" (result) : "f" (value)); + //return result; + //return __float2uint_rn(__saturatef(value) * 255.0f); + + int result = __float2int_rn((255 * value + 0.5f) / (1.0f + 1.0f/255.0f)); + result = max(result, 0); + result = min(result, 255); + return result; +} + +__device__ inline float u8_to_float(uint value) +{ + //float result; + //asm("cvt.sat.rn.f32.u8 %0, %1;" : "=f" (result) : "r" (value)); // this is wrong! + //return result; + return __saturatef(__uint2float_rn(value) / 255.0f); +} + +inline __device__ float3 roundAndExpand565(float3 v, ushort * w) +{ + uint x = float_to_u8(v.x) >> 3; + uint y = float_to_u8(v.y) >> 2; + uint z = float_to_u8(v.z) >> 3; + *w = (x << 11) | (y << 5) | z; + v.x = u8_to_float((x << 3) | (x >> 2)); + v.y = u8_to_float((y << 2) | (y >> 4)); + v.z = u8_to_float((z << 3) | (z >> 2)); +// v.x = u8_to_float(x) * 255.0f / 31.0f; +// v.y = u8_to_float(y) * 255.0f / 63.0f; +// v.z = u8_to_float(z) * 255.0f / 31.0f; + return v; +} +#else + +inline __device__ float3 roundAndExpand565(float3 v, ushort * w) +{ + uint x = __float2uint_rn(__saturatef(v.x) * 31.0f); + uint y = __float2uint_rn(__saturatef(v.y) * 63.0f); + uint z = __float2uint_rn(__saturatef(v.z) * 31.0f); + + //uint x = float_to_u5(v.x); + //uint y = float_to_u6(v.y); + //uint z = float_to_u5(v.z); + + *w = (x << 11) | (y << 5) | z; + + v.x = __uint2float_rn(x) * 1.0f / 31.0f; + v.y = __uint2float_rn(y) * 1.0f / 63.0f; + v.z = __uint2float_rn(z) * 1.0f / 31.0f; + + //v.x = u8_to_float((x << 3) | (x >> 2)); + //v.y = u8_to_float((y << 2) | (y >> 4)); + //v.z = u8_to_float((z << 3) | (z >> 2)); + + return v; +} +#endif +inline __device__ float2 roundAndExpand56(float2 v, ushort * w) +{ + uint x = __float2uint_rn(__saturatef(v.x) * 31.0f); + uint y = __float2uint_rn(__saturatef(v.y) * 63.0f); + *w = (x << 11) | (y << 5); + v.x = __uint2float_rn(x) * 1.0f / 31.0f; + v.y = __uint2float_rn(y) * 1.0f / 63.0f; + return v; +} + +inline __device__ float2 roundAndExpand88(float2 v, ushort * w) +{ + uint x = __float2uint_rn(__saturatef(v.x) * 255.0f); + uint y = __float2uint_rn(__saturatef(v.y) * 255.0f); + *w = (x << 8) | y; + v.x = __uint2float_rn(x) * 1.0f / 255.0f; + v.y = __uint2float_rn(y) * 1.0f / 255.0f; + return v; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Block errors +//////////////////////////////////////////////////////////////////////////////// + +__device__ float3 blockError4(const float3 * colors, uint permutation, float3 a, float3 b) +{ + float3 error = make_float3(0.0f, 0.0f, 0.0f); + + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + float beta = (bits & 1); + if (bits & 2) beta = (1 + beta) / 3.0f; + float alpha = 1.0f - beta; + + float3 diff = colors[i] - (a*alpha + b*beta); + + error += diff*diff; + } + + return error; +} + +__device__ float3 blockError4(const float3 * colors, uint permutation, ushort c0, ushort c1) +{ + float3 error = make_float3(0.0f, 0.0f, 0.0f); + + int3 color0 = color16ToInt3(c0); + int3 color1 = color16ToInt3(c1); + + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + int beta = (bits & 1); + if (bits & 2) beta = (1 + beta); + float alpha = 3 - beta; + + int3 color; + color.x = (color0.x * alpha + color1.x * beta) / 3; + color.y = (color0.y * alpha + color1.y * beta) / 3; + color.z = (color0.z * alpha + color1.z * beta) / 3; + + float3 diff = colors[i] - int3ToFloat3(color); + + error += diff*diff; + } + + return error; +} + + +__device__ float3 blockError3(const float3 * colors, uint permutation, float3 a, float3 b) +{ + float3 error = make_float3(0.0f, 0.0f, 0.0f); + + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + float beta = (bits & 1); + if (bits & 2) beta = 0.5f; + float alpha = 1.0f - beta; + + float3 diff = colors[i] - (a*alpha + b*beta); + + error += diff*diff; + } + + return error; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Sort colors +//////////////////////////////////////////////////////////////////////////////// + +// @@ Experimental code to avoid duplicate colors for faster compression. +// We could first sort along the best fit line and only compare colors that have the same projection. +// The hardest part is to maintain the indices to map packed/sorted colors to the input colors. +// We also need to update several functions that assume the number of colors is fixed to 16. +// And compute different bit maps for the different color counts. +// This is a fairly high amount of work. +__device__ int packColors(float3 * values, float * weights, int * ranks) +{ + const int tid = threadIdx.x; + + __shared__ int count; + count = 0; + + bool alive = true; + + // Append this + for (int i = 0; i < 16; i++) + { + // One thread leads on each iteration. + if (tid == i) { + + // If thread alive, then append element. + if (alive) { + values[count] = values[i]; + weights[count] = weights[i]; + count++; + } + + // Otherwise update weight. + else { + weights[ranks[i]] += weights[i]; + } + } + + // Kill all threads that have the same element and record rank. + if (values[i] == values[tid]) { + alive = false; + ranks[tid] = count - 1; + } + } + + return count; +} + + +__device__ void sortColors(const float * values, int * ranks) +{ + const int tid = threadIdx.x; + + int rank = 0; + + #pragma unroll + for (int i = 0; i < 16; i++) + { + rank += (values[i] < values[tid]); + } + + ranks[tid] = rank; + + // Resolve elements with the same index. + #pragma unroll + for (int i = 0; i < 15; i++) + { + if ((tid > i) & (ranks[tid] == ranks[i])) ++ranks[tid]; + } +} + +__device__ void sortColors(const float * values, int * ranks, int count) +{ + const int tid = threadIdx.x; + + int rank = 0; + + #pragma unroll + for (int i = 0; i < count; i++) + { + rank += (values[i] < values[tid]); + } + + ranks[tid] = rank; + + // Resolve elements with the same index. + #pragma unroll + for (int i = 0; i < count-1; i++) + { + if ((tid > i) & (ranks[tid] == ranks[i])) ++ranks[tid]; + } +} + + + +//////////////////////////////////////////////////////////////////////////////// +// Load color block to shared mem +//////////////////////////////////////////////////////////////////////////////// + +__device__ void loadColorBlockTex(uint firstBlock, uint blockWidth, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor) +{ + const int bid = blockIdx.x; + const int idx = threadIdx.x; + + __shared__ float dps[16]; + + if (idx < 16) + { + float x = 4 * ((firstBlock + bid) % blockWidth) + idx % 4; // @@ Avoid mod and div by using 2D grid? + float y = 4 * ((firstBlock + bid) / blockWidth) + idx / 4; + + // Read color and copy to shared mem. + float4 c = tex2D(tex, x, y); + + colors[idx].x = c.z; + colors[idx].y = c.y; + colors[idx].z = c.x; + + // Sort colors along the best fit line. + colorSums(colors, sums); + float3 axis = bestFitLine(colors, sums[0], kColorMetric); + + *sameColor = (axis == make_float3(0, 0, 0)); + + dps[idx] = dot(colors[idx], axis); + + sortColors(dps, xrefs); + + float3 tmp = colors[idx]; + colors[xrefs[idx]] = tmp; + } +} + +/* +__device__ void loadColorBlockTex(uint firstBlock, uint w, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor) +{ + const int bid = blockIdx.x; + const int idx = threadIdx.x; + + __shared__ float dps[16]; + + if (idx < 16) + { + float x = 4 * ((firstBlock + bid) % w) + idx % 4; // @@ Avoid mod and div by using 2D grid? + float y = 4 * ((firstBlock + bid) / w) + idx / 4; + + // Read color and copy to shared mem. + float4 c = tex2D(tex, x, y); + + colors[idx].x = c.z; + colors[idx].y = c.y; + colors[idx].z = c.x; + weights[idx] = 1; + + int count = packColors(colors, weights); + if (idx < count) + { + // Sort colors along the best fit line. + colorSums(colors, sums); + float3 axis = bestFitLine(colors, sums[0], kColorMetric); + + *sameColor = (axis == make_float3(0, 0, 0)); + + dps[idx] = dot(colors[idx], axis); + + sortColors(dps, xrefs); + + float3 tmp = colors[idx]; + colors[xrefs[idx]] = tmp; + } + } +} +*/ + +__device__ void loadColorBlockTex(uint firstBlock, uint width, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor) +{ + const int bid = blockIdx.x; + const int idx = threadIdx.x; + + __shared__ float3 rawColors[16]; + __shared__ float dps[16]; + + if (idx < 16) + { + float x = 4 * ((firstBlock + bid) % width) + idx % 4; // @@ Avoid mod and div by using 2D grid? + float y = 4 * ((firstBlock + bid) / width) + idx / 4; + + // Read color and copy to shared mem. + float4 c = tex2D(tex, x, y); + + rawColors[idx].x = c.z; + rawColors[idx].y = c.y; + rawColors[idx].z = c.x; + weights[idx] = c.w; + + colors[idx] = rawColors[idx] * weights[idx]; + + // Sort colors along the best fit line. + colorSums(colors, sums); + float3 axis = bestFitLine(colors, sums[0], kColorMetric); + + *sameColor = (axis == make_float3(0, 0, 0)); + + // Single color compressor needs unweighted colors. + if (*sameColor) colors[idx] = rawColors[idx]; + + dps[idx] = dot(colors[idx], axis); + + sortColors(dps, xrefs); + + float3 tmp = colors[idx]; + float w = weights[idx]; + colors[xrefs[idx]] = tmp; + weights[xrefs[idx]] = w; + } +} + +__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16], int * sameColor) +{ + const int bid = blockIdx.x; + const int idx = threadIdx.x; + + __shared__ float dps[16]; + + if (idx < 16) + { + // Read color and copy to shared mem. + uint c = image[(bid) * 16 + idx]; + + colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f); + colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f); + + // Sort colors along the best fit line. + colorSums(colors, sums); + float2 axis = bestFitLine(colors, sums[0]); + + *sameColor = (axis == make_float2(0, 0)); + + dps[idx] = dot(colors[idx], axis); + + sortColors(dps, xrefs); + + float2 tmp = colors[idx]; + colors[xrefs[idx]] = tmp; + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// Evaluate permutations +//////////////////////////////////////////////////////////////////////////////// +__device__ float evalPermutation4(const float3 * colors, uint permutation, ushort * start, ushort * end) +{ + // Compute endpoints using least squares. + float alpha2_sum = 0.0f; + float beta2_sum = 0.0f; + float alphabeta_sum = 0.0f; + float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); + float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f); + + // Compute alpha & beta for this permutation. + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + float beta = (bits & 1); + if (bits & 2) beta = (1 + beta) / 3.0f; + float alpha = 1.0f - beta; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * colors[i]; + betax_sum += beta * colors[i]; + } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + // Round a, b to the closest 5-6-5 color and expand... + a = roundAndExpand565(a, start); + b = roundAndExpand565(b, end); + + // compute the error + float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); + + return dot(e, kColorMetricSqr); +} + +__device__ float evalPermutation3(const float3 * colors, uint permutation, ushort * start, ushort * end) +{ + // Compute endpoints using least squares. + float alpha2_sum = 0.0f; + float beta2_sum = 0.0f; + float alphabeta_sum = 0.0f; + float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); + float3 betax_sum = make_float3(0.0f, 0.0f, 0.0f); + + // Compute alpha & beta for this permutation. + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + float beta = (bits & 1); + if (bits & 2) beta = 0.5f; + float alpha = 1.0f - beta; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * colors[i]; + betax_sum += beta * colors[i]; + } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + // Round a, b to the closest 5-6-5 color and expand... + a = roundAndExpand565(a, start); + b = roundAndExpand565(b, end); + + // compute the error + float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); + + return dot(e, kColorMetricSqr); +} + +__constant__ const float alphaTable4[4] = { 9.0f, 0.0f, 6.0f, 3.0f }; +__constant__ const float alphaTable3[4] = { 4.0f, 0.0f, 2.0f, 2.0f }; +__constant__ const uint prods4[4] = { 0x090000,0x000900,0x040102,0x010402 }; +__constant__ const uint prods3[4] = { 0x040000,0x000400,0x040101,0x010401 }; + +__device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end) +{ + // Compute endpoints using least squares. + float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); + uint akku = 0; + + // Compute alpha & beta for this permutation. + #pragma unroll + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + alphax_sum += alphaTable4[bits & 3] * colors[i]; + akku += prods4[bits & 3]; + } + + float alpha2_sum = float(akku >> 16); + float beta2_sum = float((akku >> 8) & 0xff); + float alphabeta_sum = float(akku & 0xff); + float3 betax_sum = 9.0f * color_sum - alphax_sum; + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + // Round a, b to the closest 5-6-5 color and expand... + a = roundAndExpand565(a, start); + b = roundAndExpand565(b, end); + + // compute the error + float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); + + //float3 e = blockError4(colors, permutation, *start, *end); + + return (1.0f / 9.0f) * dot(e, kColorMetricSqr); +} + +__device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint permutation, ushort * start, ushort * end) +{ + // Compute endpoints using least squares. + float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); + uint akku = 0; + + // Compute alpha & beta for this permutation. + #pragma unroll + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + alphax_sum += alphaTable3[bits & 3] * colors[i]; + akku += prods3[bits & 3]; + } + + float alpha2_sum = float(akku >> 16); + float beta2_sum = float((akku >> 8) & 0xff); + float alphabeta_sum = float(akku & 0xff); + float3 betax_sum = 4.0f * color_sum - alphax_sum; + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + // Round a, b to the closest 5-6-5 color and expand... + a = roundAndExpand565(a, start); + b = roundAndExpand565(b, end); + + // compute the error + float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); + + //float3 e = blockError3(colors, permutation, a, b); + + return (1.0f / 4.0f) * dot(e, kColorMetricSqr); +} + +__device__ float evalPermutation4(const float3 * colors, const float * weights, float3 color_sum, uint permutation, ushort * start, ushort * end) +{ + // Compute endpoints using least squares. + float alpha2_sum = 0.0f; + float beta2_sum = 0.0f; + float alphabeta_sum = 0.0f; + float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); + + // Compute alpha & beta for this permutation. + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + float beta = (bits & 1); + if (bits & 2) beta = (1 + beta) / 3.0f; + float alpha = 1.0f - beta; + + alpha2_sum += alpha * alpha * weights[i]; + beta2_sum += beta * beta * weights[i]; + alphabeta_sum += alpha * beta * weights[i]; + alphax_sum += alpha * colors[i]; + } + + float3 betax_sum = color_sum - alphax_sum; + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + // Round a, b to the closest 5-6-5 color and expand... + a = roundAndExpand565(a, start); + b = roundAndExpand565(b, end); + + // compute the error + float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); + + return dot(e, kColorMetricSqr); +} + +/* +__device__ float evalPermutation3(const float3 * colors, const float * weights, uint permutation, ushort * start, ushort * end) +{ + // Compute endpoints using least squares. + float alpha2_sum = 0.0f; + float beta2_sum = 0.0f; + float alphabeta_sum = 0.0f; + float3 alphax_sum = make_float3(0.0f, 0.0f, 0.0f); + + // Compute alpha & beta for this permutation. + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + float beta = (bits & 1); + if (bits & 2) beta = 0.5f; + float alpha = 1.0f - beta; + + alpha2_sum += alpha * alpha * weights[i]; + beta2_sum += beta * beta * weights[i]; + alphabeta_sum += alpha * beta * weights[i]; + alphax_sum += alpha * colors[i]; + } + + float3 betax_sum = color_sum - alphax_sum; + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float3 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float3 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + // Round a, b to the closest 5-6-5 color and expand... + a = roundAndExpand565(a, start); + b = roundAndExpand565(b, end); + + // compute the error + float3 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); + + return dot(e, kColorMetricSqr); +} +*/ + +__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end) +{ + // Compute endpoints using least squares. + float2 alphax_sum = make_float2(0.0f, 0.0f); + uint akku = 0; + + // Compute alpha & beta for this permutation. + #pragma unroll + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + alphax_sum += alphaTable4[bits & 3] * colors[i]; + akku += prods4[bits & 3]; + } + + float alpha2_sum = float(akku >> 16); + float beta2_sum = float((akku >> 8) & 0xff); + float alphabeta_sum = float(akku & 0xff); + float2 betax_sum = 9.0f * color_sum - alphax_sum; + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + // Round a, b to the closest 5-6 color and expand... + a = roundAndExpand56(a, start); + b = roundAndExpand56(b, end); + + // compute the error + float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); + + return (1.0f / 9.0f) * (e.x + e.y); +} + +__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end) +{ + // Compute endpoints using least squares. + float2 alphax_sum = make_float2(0.0f, 0.0f); + uint akku = 0; + + // Compute alpha & beta for this permutation. + #pragma unroll + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + alphax_sum += alphaTable3[bits & 3] * colors[i]; + akku += prods3[bits & 3]; + } + + float alpha2_sum = float(akku >> 16); + float beta2_sum = float((akku >> 8) & 0xff); + float alphabeta_sum = float(akku & 0xff); + float2 betax_sum = 4.0f * color_sum - alphax_sum; + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + // Round a, b to the closest 5-6 color and expand... + a = roundAndExpand56(a, start); + b = roundAndExpand56(b, end); + + // compute the error + float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); + + return (1.0f / 4.0f) * (e.x + e.y); +} + +__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end) +{ + // Compute endpoints using least squares. + float2 alphax_sum = make_float2(0.0f, 0.0f); + uint akku = 0; + + // Compute alpha & beta for this permutation. + #pragma unroll + for (int i = 0; i < 16; i++) + { + const uint bits = permutation >> (2*i); + + alphax_sum += alphaTable4[bits & 3] * colors[i]; + akku += prods4[bits & 3]; + } + + float alpha2_sum = float(akku >> 16); + float beta2_sum = float((akku >> 8) & 0xff); + float alphabeta_sum = float(akku & 0xff); + float2 betax_sum = 9.0f * color_sum - alphax_sum; + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + // Round a, b to the closest 8-8 color and expand... + a = roundAndExpand88(a, start); + b = roundAndExpand88(b, end); + + // compute the error + float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum); + + return (1.0f / 9.0f) * (e.x + e.y); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Evaluate all permutations +//////////////////////////////////////////////////////////////////////////////// +__device__ void evalAllPermutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) +{ + const int idx = threadIdx.x; + + float bestError = FLT_MAX; + + __shared__ uint s_permutations[160]; + + for(int i = 0; i < 16; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 992) break; + + ushort start, end; + uint permutation = permutations[pidx]; + if (pidx < 160) s_permutations[pidx] = permutation; + + float error = evalPermutation4(colors, colorSum, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + } + } + + if (bestStart < bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= 0x55555555; // Flip indices. + } + + for(int i = 0; i < 3; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 160) break; + + ushort start, end; + uint permutation = s_permutations[pidx]; + float error = evalPermutation3(colors, colorSum, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + + if (bestStart > bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices. + } + } + } + + errors[idx] = bestError; +} + +/* +__device__ void evalAllPermutations(const float3 * colors, const float * weights, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) +{ + const int idx = threadIdx.x; + + float bestError = FLT_MAX; + + __shared__ uint s_permutations[160]; + + for(int i = 0; i < 16; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 992) break; + + ushort start, end; + uint permutation = permutations[pidx]; + if (pidx < 160) s_permutations[pidx] = permutation; + + float error = evalPermutation4(colors, weights, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + } + } + + if (bestStart < bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= 0x55555555; // Flip indices. + } + + for(int i = 0; i < 3; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 160) break; + + ushort start, end; + uint permutation = s_permutations[pidx]; + float error = evalPermutation3(colors, weights, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + + if (bestStart > bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices. + } + } + } + + errors[idx] = bestError; +} +*/ + +__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) +{ + const int idx = threadIdx.x; + + float bestError = FLT_MAX; + + __shared__ uint s_permutations[160]; + + for(int i = 0; i < 16; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 992) break; + + ushort start, end; + uint permutation = permutations[pidx]; + if (pidx < 160) s_permutations[pidx] = permutation; + + float error = evalPermutation4(colors, colorSum, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + } + } + + if (bestStart < bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= 0x55555555; // Flip indices. + } + + for(int i = 0; i < 3; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 160) break; + + ushort start, end; + uint permutation = s_permutations[pidx]; + float error = evalPermutation3(colors, colorSum, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + + if (bestStart > bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices. + } + } + } + + errors[idx] = bestError; +} + +__device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) +{ + const int idx = threadIdx.x; + + float bestError = FLT_MAX; + + for(int i = 0; i < 16; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 992) break; + + ushort start, end; + uint permutation = permutations[pidx]; + + float error = evalPermutation4(colors, colorSum, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + } + } + + if (bestStart < bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= 0x55555555; // Flip indices. + } + + errors[idx] = bestError; +} + +__device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) +{ + const int idx = threadIdx.x; + + float bestError = FLT_MAX; + + for(int i = 0; i < 16; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 992) break; + + ushort start, end; + uint permutation = permutations[pidx]; + + float error = evalPermutation4(colors, weights, colorSum, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + } + } + + if (bestStart < bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= 0x55555555; // Flip indices. + } + + errors[idx] = bestError; +} + +__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) +{ + const int idx = threadIdx.x; + + float bestError = FLT_MAX; + + for(int i = 0; i < 16; i++) + { + int pidx = idx + NUM_THREADS * i; + if (pidx >= 704) break; + + ushort start, end; + uint permutation = permutations[pidx]; + + float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end); + + if (error < bestError) + { + bestError = error; + bestPermutation = permutation; + bestStart = start; + bestEnd = end; + } + } + + if (bestStart < bestEnd) + { + swap(bestEnd, bestStart); + bestPermutation ^= 0x55555555; // Flip indices. + } + + errors[idx] = bestError; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Find index with minimum error +//////////////////////////////////////////////////////////////////////////////// +__device__ int findMinError(float * errors) +{ + const int idx = threadIdx.x; + + __shared__ int indices[NUM_THREADS]; + indices[idx] = idx; + + for(int d = NUM_THREADS/2; d > 32; d >>= 1) + { + __syncthreads(); + + if (idx < d) + { + float err0 = errors[idx]; + float err1 = errors[idx + d]; + + if (err1 < err0) { + errors[idx] = err1; + indices[idx] = indices[idx + d]; + } + } + } + + __syncthreads(); + + // unroll last 6 iterations + if (idx < 32) + { + if (errors[idx + 32] < errors[idx]) { + errors[idx] = errors[idx + 32]; + indices[idx] = indices[idx + 32]; + } + if (errors[idx + 16] < errors[idx]) { + errors[idx] = errors[idx + 16]; + indices[idx] = indices[idx + 16]; + } + if (errors[idx + 8] < errors[idx]) { + errors[idx] = errors[idx + 8]; + indices[idx] = indices[idx + 8]; + } + if (errors[idx + 4] < errors[idx]) { + errors[idx] = errors[idx + 4]; + indices[idx] = indices[idx + 4]; + } + if (errors[idx + 2] < errors[idx]) { + errors[idx] = errors[idx + 2]; + indices[idx] = indices[idx + 2]; + } + if (errors[idx + 1] < errors[idx]) { + errors[idx] = errors[idx + 1]; + indices[idx] = indices[idx + 1]; + } + } + + __syncthreads(); + + return indices[0]; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Save DXT block +//////////////////////////////////////////////////////////////////////////////// +__device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result) +{ + const int bid = blockIdx.x; + + if (start == end) + { + permutation = 0; + } + + // Reorder permutation. + uint indices = 0; + for(int i = 0; i < 16; i++) + { + int ref = xrefs[i]; + indices |= ((permutation >> (2 * ref)) & 3) << (2 * i); + } + + // Write endpoints. + result[bid].x = (end << 16) | start; + + // Write palette indices. + result[bid].y = indices; +} + +__device__ void saveBlockDXT1_Parallel(uint endpoints, float3 colors[16], int xrefs[16], uint * result) +{ + const int tid = threadIdx.x; + const int bid = blockIdx.x; + + if (tid < 16) + { + int3 color = float3ToInt3(colors[xrefs[tid]]); + + ushort endpoint0 = endpoints & 0xFFFF; + ushort endpoint1 = endpoints >> 16; + + int3 palette[4]; + palette[0] = color16ToInt3(endpoint0); + palette[1] = color16ToInt3(endpoint1); + + int d0 = colorDistance(palette[0], color); + int d1 = colorDistance(palette[1], color); + + uint index; + if (endpoint0 > endpoint1) + { + palette[2].x = (2 * palette[0].x + palette[1].x) / 3; + palette[2].y = (2 * palette[0].y + palette[1].y) / 3; + palette[2].z = (2 * palette[0].z + palette[1].z) / 3; + + palette[3].x = (2 * palette[1].x + palette[0].x) / 3; + palette[3].y = (2 * palette[1].y + palette[0].y) / 3; + palette[3].z = (2 * palette[1].z + palette[0].z) / 3; + + int d2 = colorDistance(palette[2], color); + int d3 = colorDistance(palette[3], color); + + // Compute the index that best fit color. + uint b0 = d0 > d3; + uint b1 = d1 > d2; + uint b2 = d0 > d2; + uint b3 = d1 > d3; + uint b4 = d2 > d3; + + uint x0 = b1 & b2; + uint x1 = b0 & b3; + uint x2 = b0 & b4; + + index = (x2 | ((x0 | x1) << 1)); + } + else { + palette[2].x = (palette[0].x + palette[1].x) / 2; + palette[2].y = (palette[0].y + palette[1].y) / 2; + palette[2].z = (palette[0].z + palette[1].z) / 2; + + int d2 = colorDistance(palette[2], color); + + index = 0; + if (d1 < d0 && d1 < d2) index = 1; + else if (d2 < d0) index = 2; + } + + __shared__ uint indices[16]; + + indices[tid] = index << (2 * tid); + if (tid < 8) indices[tid] |= indices[tid+8]; + if (tid < 4) indices[tid] |= indices[tid+4]; + if (tid < 2) indices[tid] |= indices[tid+2]; + if (tid < 1) indices[tid] |= indices[tid+1]; + + if (tid < 2) { + result[2 * bid + tid] = tid == 0 ? endpoints : indices[0]; + } + } +} + +__device__ void saveBlockDXT1_Parallel(uint endpoints, uint permutation, int xrefs[16], uint * result) +{ + const int tid = threadIdx.x; + const int bid = blockIdx.x; + + if (tid < 16) + { + // Reorder permutation. + uint index = ((permutation >> (2 * xrefs[tid])) & 3) << (2 * tid); + __shared__ uint indices[16]; + + indices[tid] = index; + if (tid < 8) indices[tid] |= indices[tid+8]; + if (tid < 4) indices[tid] |= indices[tid+4]; + if (tid < 2) indices[tid] |= indices[tid+2]; + if (tid < 1) indices[tid] |= indices[tid+1]; + + if (tid < 2) { + result[2 * bid + tid] = tid == 0 ? endpoints : indices[0]; + } + } +} + + +__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result) +{ + saveBlockDXT1(start, end, permutation, xrefs, result); +} + +__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result) +{ + const int bid = blockIdx.x; + + int r = color.x * 255; + int g = color.y * 255; + int b = color.z * 255; + + ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0]; + ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1]; + + if (color0 < color1) + { + result[bid].x = (color0 << 16) | color1; + result[bid].y = 0xffffffff; + } + else + { + result[bid].x = (color1 << 16) | color0; + result[bid].y = 0xaaaaaaaa; + } +} + +__device__ void saveSingleColorBlockDXT1(float2 color, uint2 * result) +{ + const int bid = blockIdx.x; + + int r = color.x * 255; + int g = color.y * 255; + + ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5); + ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5); + + if (color0 < color1) + { + result[bid].x = (color0 << 16) | color1; + result[bid].y = 0xffffffff; + } + else + { + result[bid].x = (color1 << 16) | color0; + result[bid].y = 0xaaaaaaaa; + } +} + +__device__ void saveSingleColorBlockCTX1(float2 color, uint2 * result) +{ + const int bid = blockIdx.x; + + int r = color.x * 255; + int g = color.y * 255; + + ushort color0 = (r << 8) | (g); + + result[bid].x = (color0 << 16) | color0; + result[bid].y = 0x00000000; +} + + +//////////////////////////////////////////////////////////////////////////////// +// Compress color block +//////////////////////////////////////////////////////////////////////////////// + +__global__ void compressDXT1(uint firstBlock, uint blockWidth, const uint * permutations, uint2 * result) +{ + __shared__ float3 colors[16]; + __shared__ float3 sums[16]; + __shared__ int xrefs[16]; + __shared__ int sameColor; + + loadColorBlockTex(firstBlock, blockWidth, colors, sums, xrefs, &sameColor); + + __syncthreads(); + + if (sameColor) + { + if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); + return; + } + + ushort bestStart, bestEnd; + uint bestPermutation; + + __shared__ float errors[NUM_THREADS]; + evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); + + // Use a parallel reduction to find minimum error. + const int minIdx = findMinError(errors); + + __shared__ uint s_bestEndPoints; + //__shared__ uint s_bestPermutation; + + // Only write the result of the winner thread. + if (threadIdx.x == minIdx) + { + s_bestEndPoints = (bestEnd << 16) | bestStart; + //s_bestPermutation = (bestStart != bestEnd) ? bestPermutation : 0; + } + + __syncthreads(); + + saveBlockDXT1_Parallel(s_bestEndPoints, colors, xrefs, (uint *)result); + //saveBlockDXT1_Parallel(s_bestEndPoints, s_bestPermutation, xrefs, (uint *)result); +} + + +__global__ void compressLevel4DXT1(uint firstBlock, uint blockWidth, const uint * permutations, uint2 * result) +{ + __shared__ float3 colors[16]; + __shared__ float3 sums[16]; + __shared__ int xrefs[16]; + __shared__ int sameColor; + + loadColorBlockTex(firstBlock, blockWidth, colors, sums, xrefs, &sameColor); + + __syncthreads(); + + if (sameColor) + { + if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); + return; + } + + ushort bestStart, bestEnd; + uint bestPermutation; + + __shared__ float errors[NUM_THREADS]; + + evalLevel4Permutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); + + // Use a parallel reduction to find minimum error. + const int minIdx = findMinError(errors); + + // Only write the result of the winner thread. + if (threadIdx.x == minIdx) + { + saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result); + } +} + +__global__ void compressWeightedDXT1(uint firstBlock, uint blockWidth, const uint * permutations, uint2 * result) +{ + __shared__ float3 colors[16]; + __shared__ float3 sums[16]; + __shared__ float weights[16]; + __shared__ int xrefs[16]; + __shared__ int sameColor; + + loadColorBlockTex(firstBlock, blockWidth, colors, sums, weights, xrefs, &sameColor); + + __syncthreads(); + + if (sameColor) + { + if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); + return; + } + + ushort bestStart, bestEnd; + uint bestPermutation; + + __shared__ float errors[NUM_THREADS]; + + evalLevel4Permutations(colors, weights, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); + + // Use a parallel reduction to find minimum error. + int minIdx = findMinError(errors); + + // Only write the result of the winner thread. + if (threadIdx.x == minIdx) + { + saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result); + } +} + + +__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result) +{ + __shared__ float2 colors[16]; + __shared__ float2 sums[16]; + __shared__ int xrefs[16]; + __shared__ int sameColor; + + loadColorBlock(image, colors, sums, xrefs, &sameColor); + + __syncthreads(); + + if (sameColor) + { + if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result); + return; + } + + ushort bestStart, bestEnd; + uint bestPermutation; + + __shared__ float errors[NUM_THREADS]; + + evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); + + // Use a parallel reduction to find minimum error. + const int minIdx = findMinError(errors); + + // Only write the result of the winner thread. + if (threadIdx.x == minIdx) + { + saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result); + } +} + +__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result) +{ + __shared__ float2 colors[16]; + __shared__ float2 sums[16]; + __shared__ int xrefs[16]; + __shared__ int sameColor; + + loadColorBlock(image, colors, sums, xrefs, &sameColor); + + __syncthreads(); + + if (sameColor) + { + if (threadIdx.x == 0) saveSingleColorBlockCTX1(colors[0], result); + return; + } + + ushort bestStart, bestEnd; + uint bestPermutation; + + __shared__ float errors[NUM_THREADS]; + + evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); + + // Use a parallel reduction to find minimum error. + const int minIdx = findMinError(errors); + + // Only write the result of the winner thread. + if (threadIdx.x == minIdx) + { + saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result); + } +} + + +/* +__device__ float computeError(const float weights[16], uchar a0, uchar a1) +{ + float palette[6]; + palette[0] = (6.0f/7.0f * a0 + 1.0f/7.0f * a1); + palette[1] = (5.0f/7.0f * a0 + 2.0f/7.0f * a1); + palette[2] = (4.0f/7.0f * a0 + 3.0f/7.0f * a1); + palette[3] = (3.0f/7.0f * a0 + 4.0f/7.0f * a1); + palette[4] = (2.0f/7.0f * a0 + 5.0f/7.0f * a1); + palette[5] = (1.0f/7.0f * a0 + 6.0f/7.0f * a1); + + float total = 0.0f; + + for (uint i = 0; i < 16; i++) + { + float alpha = weights[i]; + + float error = a0 - alpha; + error = min(error, palette[0] - alpha); + error = min(error, palette[1] - alpha); + error = min(error, palette[2] - alpha); + error = min(error, palette[3] - alpha); + error = min(error, palette[4] - alpha); + error = min(error, palette[5] - alpha); + error = min(error, a1 - alpha); + + total += error; + } + + return total; +} + +inline __device__ uchar roundAndExpand(float a) +{ + return rintf(__saturatef(a) * 255.0f); +} +*/ +/* +__device__ void optimizeAlpha8(const float alphas[16], uchar & a0, uchar & a1) +{ + float alpha2_sum = 0; + float beta2_sum = 0; + float alphabeta_sum = 0; + float alphax_sum = 0; + float betax_sum = 0; + + for (int i = 0; i < 16; i++) + { + uint idx = index[i]; + float alpha; + if (idx < 2) alpha = 1.0f - idx; + else alpha = (8.0f - idx) / 7.0f; + + float beta = 1 - alpha; + + alpha2_sum += alpha * alpha; + beta2_sum += beta * beta; + alphabeta_sum += alpha * beta; + alphax_sum += alpha * alphas[i]; + betax_sum += beta * alphas[i]; + } + + const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor; + float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor; + + a0 = roundAndExpand8(a); + a1 = roundAndExpand8(b); +} +*/ +/* +__device__ void compressAlpha(const float alphas[16], uint4 * result) +{ + const int tid = threadIdx.x; + + // Compress alpha block! + // Brute force approach: + // Try all color pairs: 256*256/2 = 32768, 32768/64 = 512 iterations? + + // Determine min & max alphas + + float A0, A1; + + if (tid < 16) + { + __shared__ uint s_alphas[16]; + + s_alphas[tid] = alphas[tid]; + s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^8]); + s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^4]); + s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^2]); + s_alphas[tid] = min(s_alphas[tid], s_alphas[tid^1]); + A0 = s_alphas[tid]; + + s_alphas[tid] = alphas[tid]; + s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^8]); + s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^4]); + s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^2]); + s_alphas[tid] = max(s_alphas[tid], s_alphas[tid^1]); + A1 = s_alphas[tid]; + } + + __syncthreads(); + + int minIdx = 0; + + if (A1 - A0 > 8) + { + float bestError = FLT_MAX; + + // 64 threads -> 8x8 + // divide [A1-A0] in partitions. + // test endpoints + + for (int i = 0; i < 128; i++) + { + uint idx = (i * NUM_THREADS + tid) * 4; + uchar a0 = idx & 255; + uchar a1 = idx >> 8; + + float error = computeError(alphas, a0, a1); + + if (error < bestError) + { + bestError = error; + A0 = a0; + A1 = a1; + } + } + + __shared__ float errors[NUM_THREADS]; + errors[tid] = bestError; + + // Minimize error. + minIdx = findMinError(errors); + + } + + if (minIdx == tid) + { + // @@ Compute indices. + + // @@ Write alpha block. + } +} + +__global__ void compressDXT5(const uint * permutations, const uint * image, uint4 * result) +{ + __shared__ float3 colors[16]; + __shared__ float3 sums[16]; + __shared__ float weights[16]; + __shared__ int xrefs[16]; + + loadColorBlock(image, colors, sums, weights, xrefs); + + __syncthreads(); + + compressAlpha(weights, result); + + ushort bestStart, bestEnd; + uint bestPermutation; + + __shared__ float errors[NUM_THREADS]; + + evalLevel4Permutations(colors, weights, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors); + + // Use a parallel reduction to find minimum error. + int minIdx = findMinError(errors); + + // Only write the result of the winner thread. + if (threadIdx.x == minIdx) + { + saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, (uint2 *)result); + } +} +*/ + +/*__device__ void evaluatePalette(uint alpha0, uint alpha1, uint alphas[8]) +{ + alpha[0] = alpha0; + alpha[1] = alpha1; + alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7; // bit code 010 + alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; // bit code 011 + alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; // bit code 100 + alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; // bit code 101 + alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; // bit code 110 + alpha[7] = (1 * alpha[0] + 6 * alpha[1]) / 7; // bit code 111 +} + +__device__ uint computeAlphaError(const uint block[16], uint alpha0, uint alpha1, int bestError = INT_MAX) +{ + uint8 alphas[8]; + evaluatePalette(alpha0, alpha1, alphas); + + int totalError = 0; + + for (uint i = 0; i < 16; i++) + { + uint8 alpha = block[i]; + + // @@ It should be possible to do this much faster. + + int minDist = INT_MAX; + for (uint p = 0; p < 8; p++) + { + int dist = alphaDistance(alpha, alphas[p]); + minDist = min(dist, minDist); + } + + + + totalError += minDist; + + if (totalError > bestError) + { + // early out + return totalError; + } + } + + return totalError; +} + + +void compressDXT5A(uint alpha[16]) +{ + // Get min/max alpha. + for (uint i = 0; i < 16; i++) + { + mina = min(mina, alpha[i]); + maxa = max(maxa, alpha[i]); + } + + dxtBlock->alpha0 = maxa; + dxtBlock->alpha1 = mina; + + if (maxa - mina > 8) + { + int besterror = computeAlphaError(rgba, dxtBlock); + int besta0 = maxa; + int besta1 = mina; + + // Expand search space a bit. + const int alphaExpand = 8; + mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand; + maxa = (maxa <= 255-alphaExpand) ? 255 : maxa + alphaExpand; + + for (int a0 = mina+9; a0 < maxa; a0++) + { + for (int a1 = mina; a1 < a0-8; a1++) + { + nvDebugCheck(a0 - a1 > 8); + + dxtBlock->alpha0 = a0; + dxtBlock->alpha1 = a1; + int error = computeAlphaError(rgba, dxtBlock, besterror); + + if (error < besterror) + { + besterror = error; + besta0 = a0; + besta1 = a1; + } + } + } + + dxtBlock->alpha0 = besta0; + dxtBlock->alpha1 = besta1; + } +} + +__global__ void compressDXT5n(uint blockNum, uint2 * d_result) +{ + uint idx = blockIdx.x * 128 + threadIdx.x; + + if (idx >= blockNum) + { + return; + } + + // @@ Ideally we would load the data to shared mem to achieve coalesced global mem access. + // @@ Blocks would require too much shared memory (8k) and limit occupancy. + + // @@ Ideally we should use SIMD processing, multiple threads (4-8) processing the same block. + // That simplifies coalescing, and reduces divergence. + + // @@ Experiment with texture. That's probably the most simple approach. + + uint x[16]; + uint y[16]; + + +} +*/ + + +//////////////////////////////////////////////////////////////////////////////// +// Setup kernel +//////////////////////////////////////////////////////////////////////////////// + +extern "C" void setupOMatchTables(const void * OMatch5Src, size_t OMatch5Size, const void * OMatch6Src, size_t OMatch6Size) +{ + // Init single color lookup contant tables. + cudaMemcpyToSymbol(OMatch5, OMatch5Src, OMatch5Size, 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(OMatch6, OMatch6Src, OMatch6Size, 0, cudaMemcpyHostToDevice); +} + +extern "C" void setupCompressKernel(const float weights[3]) +{ + // Set constants. + cudaMemcpyToSymbol(kColorMetric, weights, sizeof(float) * 3, 0); + + float weightsSqr[3]; + weightsSqr[0] = weights[0] * weights[0]; + weightsSqr[1] = weights[1] * weights[1]; + weightsSqr[2] = weights[2] * weights[2]; + + cudaMemcpyToSymbol(kColorMetricSqr, weightsSqr, sizeof(float) * 3, 0); +} + +extern "C" void bindTextureToArray(cudaArray * d_data) +{ + // Setup texture + tex.normalized = false; + tex.filterMode = cudaFilterModePoint; + tex.addressMode[0] = cudaAddressModeClamp; + tex.addressMode[1] = cudaAddressModeClamp; + + cudaBindTextureToArray(tex, d_data); +} + + + +//////////////////////////////////////////////////////////////////////////////// +// Launch kernel +//////////////////////////////////////////////////////////////////////////////// + +// DXT1 compressors: +extern "C" void compressKernelDXT1(uint firstBlock, uint blockNum, uint blockWidth, uint * d_result, uint * d_bitmaps) +{ + compressDXT1<<>>(firstBlock, blockWidth, d_bitmaps, (uint2 *)d_result); +} + +extern "C" void compressKernelDXT1_Level4(uint firstBlock, uint blockNum, uint blockWidth, uint * d_result, uint * d_bitmaps) +{ + compressLevel4DXT1<<>>(firstBlock, blockWidth, d_bitmaps, (uint2 *)d_result); +} + +extern "C" void compressWeightedKernelDXT1(uint firstBlock, uint blockNum, uint blockWidth, uint * d_result, uint * d_bitmaps) +{ + compressWeightedDXT1<<>>(firstBlock, blockWidth, d_bitmaps, (uint2 *)d_result); +} + +// @@ DXT1a compressors. + + +// @@ DXT3 compressors: +extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint blockWidth, uint * d_result, uint * d_bitmaps) +{ + //compressDXT3<<>>(firstBlock, blockWidth, d_bitmaps, (uint2 *)d_result); +} + +extern "C" void compressWeightedKernelDXT3(uint firstBlock, uint blockNum, uint blockWidth, uint * d_result, uint * d_bitmaps) +{ + //compressWeightedDXT3<<>>(firstBlock, blockWidth, d_bitmaps, (uint2 *)d_result); +} + + +// @@ DXT5 compressors. +extern "C" void compressKernelDXT5(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps) +{ + //compressDXT5<<>>(firstBlock, w, d_bitmaps, (uint2 *)d_result); +} + +extern "C" void compressWeightedKernelDXT5(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps) +{ + //compressWeightedDXT5<<>>(firstBlock, w, d_bitmaps, (uint2 *)d_result); +} + + + + + +/* +extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) +{ + compressNormalDXT1<<>>(d_bitmaps, d_data, (uint2 *)d_result); +} + +extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) +{ + compressCTX1<<>>(d_bitmaps, d_data, (uint2 *)d_result); +} +*/ +/* +extern "C" void compressKernelDXT5n(uint blockNum, cudaArray * d_data, uint * d_result) +{ +// compressDXT5n<<>>(blockNum, (uint2 *)d_result); +} +*/ Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/ConvolveKernel.cu =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/ConvolveKernel.cu +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/ConvolveKernel.cu @@ -1,4 +1,5 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressDXT.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressDXT.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressDXT.h @@ -1,61 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_CUDACOMPRESSDXT_H -#define NV_TT_CUDACOMPRESSDXT_H - -#include -#include - -namespace nv -{ - class Image; - - class CudaCompressor - { - public: - CudaCompressor(); - ~CudaCompressor(); - - bool isValid() const; - - void setImage(const Image * image, nvtt::AlphaMode alphaMode); - - void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - - private: - - uint * m_bitmapTable; - uint * m_data; - uint * m_result; - - const Image * m_image; - nvtt::AlphaMode m_alphaMode; - }; - -} // nv namespace - - -#endif // NV_TT_CUDAUTILS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressDXT.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressDXT.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressDXT.cpp @@ -1,380 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "CudaCompressDXT.h" -#include "CudaUtils.h" - - -#if defined HAVE_CUDA -#include -#endif - -#include -#include - -using namespace nv; -using namespace nvtt; - -#if defined HAVE_CUDA - -#define MAX_BLOCKS 8192U // 32768, 65535 - - -extern "C" void setupCompressKernel(const float weights[3]); -extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); -extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); -extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); - -#include "Bitmaps.h" // @@ Rename to BitmapTable.h - -// Convert linear image to block linear. -static void convertToBlockLinear(const Image * image, uint * blockLinearImage) -{ - const uint w = (image->width() + 3) / 4; - const uint h = (image->height() + 3) / 4; - - for(uint by = 0; by < h; by++) { - for(uint bx = 0; bx < w; bx++) { - const uint bw = min(image->width() - bx * 4, 4U); - const uint bh = min(image->height() - by * 4, 4U); - - for (uint i = 0; i < 16; i++) { - const int x = (i % 4) % bw; - const int y = (i / 4) % bh; - blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u; - } - } - } -} - -#endif - - -CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_data(NULL), m_result(NULL) -{ -#if defined HAVE_CUDA - // Allocate and upload bitmaps. - cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint)); - if (m_bitmapTable != NULL) - { - cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice); - } - - // Allocate scratch buffers. - cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U); - cudaMalloc((void**) &m_result, MAX_BLOCKS * 8U); -#endif -} - -CudaCompressor::~CudaCompressor() -{ -#if defined HAVE_CUDA - // Free device mem allocations. - cudaFree(m_data); - cudaFree(m_result); - cudaFree(m_bitmapTable); -#endif -} - -bool CudaCompressor::isValid() const -{ -#if defined HAVE_CUDA - if (cudaGetLastError() != cudaSuccess) - { - return false; - } -#endif - return m_data != NULL && m_result != NULL && m_bitmapTable != NULL; -} - -// @@ This code is very repetitive and needs to be cleaned up. - -void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode) -{ - m_image = image; - m_alphaMode = alphaMode; -} - -/// Compress image using CUDA. -void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - nvDebugCheck(cuda::isHardwarePresent()); -#if defined HAVE_CUDA - - // Image size in blocks. - const uint w = (m_image->width() + 3) / 4; - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); - uint * blockLinearImage = (uint *) ::malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! - - const uint blockNum = w * h; - const uint compressedSize = blockNum * 8; - - clock_t start = clock(); - - setupCompressKernel(compressionOptions.colorWeight.ptr()); - - // TODO: Add support for multiple GPUs. - uint bn = 0; - while(bn != blockNum) - { - uint count = min(blockNum - bn, MAX_BLOCKS); - - cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); - - // Launch kernel. - compressKernelDXT1(count, m_data, m_result, m_bitmapTable); - - // Check for errors. - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); - - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } - } - - // Copy result to host, overwrite swizzled image. - cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost); - - // Output result. - if (outputOptions.outputHandler != NULL) - { - outputOptions.outputHandler->writeData(blockLinearImage, count * 8); - } - - bn += count; - } - - clock_t end = clock(); - //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); - - free(blockLinearImage); - -#else - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } -#endif -} - - -/// Compress image using CUDA. -void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - nvDebugCheck(cuda::isHardwarePresent()); -#if defined HAVE_CUDA - - // Image size in blocks. - const uint w = (m_image->width() + 3) / 4; - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); - uint * blockLinearImage = (uint *) ::malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); - - const uint blockNum = w * h; - const uint compressedSize = blockNum * 8; - - AlphaBlockDXT3 * alphaBlocks = NULL; - alphaBlocks = (AlphaBlockDXT3 *)::malloc(min(compressedSize, MAX_BLOCKS * 8U)); - - setupCompressKernel(compressionOptions.colorWeight.ptr()); - - clock_t start = clock(); - - uint bn = 0; - while(bn != blockNum) - { - uint count = min(blockNum - bn, MAX_BLOCKS); - - cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); - - // Launch kernel. - if (m_alphaMode == AlphaMode_Transparency) - { - compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable); - } - else - { - compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable); - } - - // Compress alpha in parallel with the GPU. - for (uint i = 0; i < count; i++) - { - ColorBlock rgba(blockLinearImage + (bn + i) * 16); - OptimalCompress::compressDXT3A(rgba, alphaBlocks + i); - } - - // Check for errors. - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); - - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } - } - - // Copy result to host, overwrite swizzled image. - cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost); - - // Output result. - if (outputOptions.outputHandler != NULL) - { - for (uint i = 0; i < count; i++) - { - outputOptions.outputHandler->writeData(alphaBlocks + i, 8); - outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8); - } - } - - bn += count; - } - - clock_t end = clock(); - //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); - - free(alphaBlocks); - free(blockLinearImage); - -#else - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } -#endif -} - - -/// Compress image using CUDA. -void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - nvDebugCheck(cuda::isHardwarePresent()); -#if defined HAVE_CUDA - - // Image size in blocks. - const uint w = (m_image->width() + 3) / 4; - const uint h = (m_image->height() + 3) / 4; - - uint imageSize = w * h * 16 * sizeof(Color32); - uint * blockLinearImage = (uint *) ::malloc(imageSize); - convertToBlockLinear(m_image, blockLinearImage); - - const uint blockNum = w * h; - const uint compressedSize = blockNum * 8; - - AlphaBlockDXT5 * alphaBlocks = NULL; - alphaBlocks = (AlphaBlockDXT5 *)::malloc(min(compressedSize, MAX_BLOCKS * 8U)); - - setupCompressKernel(compressionOptions.colorWeight.ptr()); - - clock_t start = clock(); - - uint bn = 0; - while(bn != blockNum) - { - uint count = min(blockNum - bn, MAX_BLOCKS); - - cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); - - // Launch kernel. - if (m_alphaMode == AlphaMode_Transparency) - { - compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable); - } - else - { - compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable); - } - - // Compress alpha in parallel with the GPU. - for (uint i = 0; i < count; i++) - { - ColorBlock rgba(blockLinearImage + (bn + i) * 16); - QuickCompress::compressDXT5A(rgba, alphaBlocks + i); - } - - // Check for errors. - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) - { - nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); - - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } - } - - // Copy result to host, overwrite swizzled image. - cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost); - - // Output result. - if (outputOptions.outputHandler != NULL) - { - for (uint i = 0; i < count; i++) - { - outputOptions.outputHandler->writeData(alphaBlocks + i, 8); - outputOptions.outputHandler->writeData(blockLinearImage + i * 2, 8); - } - } - - bn += count; - } - - clock_t end = clock(); - //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); - - free(alphaBlocks); - free(blockLinearImage); - -#else - if (outputOptions.errorHandler != NULL) - { - outputOptions.errorHandler->error(Error_CudaError); - } -#endif -} - - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressorDXT.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressorDXT.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressorDXT.h @@ -0,0 +1,113 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NV_TT_CUDACOMPRESSORDXT_H +#define NV_TT_CUDACOMPRESSORDXT_H + +#include "nvtt/nvtt.h" +#include "nvtt/Compressor.h" // CompressorInterface + +struct cudaArray; + +namespace nv +{ + class CudaContext + { + public: + CudaContext(); + ~CudaContext(); + + bool isValid() const; + + public: + // Device pointers. + uint * bitmapTable; + uint * bitmapTableCTX; + uint * data; + uint * result; + }; + +#if defined HAVE_CUDA + + struct CudaCompressor : public CompressorInterface + { + CudaCompressor(CudaContext & ctx); + + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + + virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) = 0; + virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; + virtual uint blockSize() const = 0; + + protected: + CudaContext & m_ctx; + }; + + struct CudaCompressorDXT1 : public CudaCompressor + { + CudaCompressorDXT1(CudaContext & ctx) : CudaCompressor(ctx) {} + + virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions); + virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; }; + }; + + /*struct CudaCompressorDXT1n : public CudaCompressor + { + virtual void setup(const CompressionOptions::Private & compressionOptions); + virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; + virtual uint blockSize() const { return 8; }; + };*/ + + struct CudaCompressorDXT3 : public CudaCompressor + { + CudaCompressorDXT3(CudaContext & ctx) : CudaCompressor(ctx) {} + + virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions); + virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; }; + }; + + struct CudaCompressorDXT5 : public CudaCompressor + { + CudaCompressorDXT5(CudaContext & ctx) : CudaCompressor(ctx) {} + + virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions); + virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 16; }; + }; + + /*struct CudaCompressorCXT1 : public CudaCompressor + { + virtual void setup(const CompressionOptions::Private & compressionOptions); + virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; + virtual uint blockSize() const { return 8; }; + };*/ + +#endif // defined HAVE_CUDA + +} // nv namespace + + +#endif // NV_TT_CUDAUTILS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressorDXT.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressorDXT.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaCompressorDXT.cpp @@ -0,0 +1,608 @@ +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "CudaCompressorDXT.h" +#include "CudaUtils.h" + +#include "nvcore/Debug.h" +#include "nvmath/Color.h" +#include "nvmath/Vector.inl" +#include "nvimage/Image.h" +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" +#include "nvtt/CompressionOptions.h" +#include "nvtt/OutputOptions.h" +#include "nvtt/QuickCompressDXT.h" +#include "nvtt/OptimalCompressDXT.h" + +#include +#include + +#if defined HAVE_CUDA +#include + +#define MAX_BLOCKS 8192U // 32768, 65535 // @@ Limit number of blocks on slow devices to prevent hitting the watchdog timer. + +extern "C" void setupOMatchTables(const void * OMatch5Src, size_t OMatch5Size, const void * OMatch6Src, size_t OMatch6Size); +extern "C" void setupCompressKernel(const float weights[3]); +extern "C" void bindTextureToArray(cudaArray * d_data); + +extern "C" void compressKernelDXT1(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps); +extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); +extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); +extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps); +//extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); +//extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); + +#include "BitmapTable.h" +#include "nvtt/SingleColorLookup.h" + +#endif + +using namespace nv; +using namespace nvtt; + + +CudaContext::CudaContext() : + bitmapTable(NULL), + bitmapTableCTX(NULL), + data(NULL), + result(NULL) +{ +#if defined HAVE_CUDA + // Allocate and upload bitmaps. + cudaMalloc((void**) &bitmapTable, 992 * sizeof(uint)); + if (bitmapTable != NULL) + { + cudaMemcpy(bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice); + } + + cudaMalloc((void**) &bitmapTableCTX, 704 * sizeof(uint)); + if (bitmapTableCTX != NULL) + { + cudaMemcpy(bitmapTableCTX, s_bitmapTableCTX, 704 * sizeof(uint), cudaMemcpyHostToDevice); + } + + // Allocate scratch buffers. + cudaMalloc((void**) &data, MAX_BLOCKS * 64U); + cudaMalloc((void**) &result, MAX_BLOCKS * 8U); + + // Init single color lookup contant tables. + setupOMatchTables(OMatch5, sizeof(OMatch5), OMatch6, sizeof(OMatch6)); +#endif +} + +CudaContext::~CudaContext() +{ +#if defined HAVE_CUDA + // Free device mem allocations. + cudaFree(bitmapTableCTX); + cudaFree(bitmapTable); + cudaFree(data); + cudaFree(result); +#endif +} + +bool CudaContext::isValid() const +{ +#if defined HAVE_CUDA + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(err)); + return false; + } +#endif + return bitmapTable != NULL && bitmapTableCTX != NULL && data != NULL && result != NULL; +} + + +#if defined HAVE_CUDA + +CudaCompressor::CudaCompressor(CudaContext & ctx) : m_ctx(ctx) +{ + +} + +void CudaCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + nvDebugCheck(cuda::isHardwarePresent()); + +#if defined HAVE_CUDA + + // Allocate image as a cuda array. + const uint count = w * h; + Color32 * tmp = malloc(count); + for (uint i = 0; i < count; i++) { + tmp[i].r = uint8(clamp(data[i + count*0], 0.0f, 1.0f) * 255); + tmp[i].g = uint8(clamp(data[i + count*1], 0.0f, 1.0f) * 255); + tmp[i].b = uint8(clamp(data[i + count*2], 0.0f, 1.0f) * 255); + tmp[i].a = uint8(clamp(data[i + count*3], 0.0f, 1.0f) * 255); + } + + cudaArray * d_image; + cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned); + cudaMallocArray(&d_image, &channelDesc, w, h); + + cudaMemcpyToArray(d_image, 0, 0, tmp, count * sizeof(Color32), cudaMemcpyHostToDevice); + + free(tmp); + + // To avoid the copy we could keep the data in floating point format, but the channels are not interleaved like the kernel expects. + /* + cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat); + cudaMallocArray(&d_image, &channelDesc, w, h); + + const int imageSize = w * h * sizeof(float) * 4; + cudaMemcpyToArray(d_image, 0, 0, data, imageSize, cudaMemcpyHostToDevice); + */ + + // Image size in blocks. + const uint bw = (w + 3) / 4; + const uint bh = (h + 3) / 4; + const uint bs = blockSize(); + const uint blockNum = bw * bh; + //const uint compressedSize = blockNum * bs; + + void * h_result = ::malloc(min(blockNum, MAX_BLOCKS) * bs); + + setup(d_image, compressionOptions); + + // Timer timer; + // timer.start(); + + uint bn = 0; + while (bn != blockNum) + { + uint count = min(blockNum - bn, MAX_BLOCKS); + + compressBlocks(bn, count, bw, bh, alphaMode, compressionOptions, h_result); + + // Check for errors. + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + //nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); + outputOptions.error(Error_CudaError); + } + + // Output result. + outputOptions.writeData(h_result, count * bs); + + bn += count; + } + + //timer.stop(); + //printf("\rCUDA time taken: %.3f seconds\n", timer.elapsed() / CLOCKS_PER_SEC); + + free(h_result); + cudaFreeArray(d_image); + +#else + outputOptions.error(Error_CudaError); +#endif +} + +#if defined HAVE_CUDA + +void CudaCompressorDXT1::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) +{ + setupCompressKernel(compressionOptions.colorWeight.ptr()); + bindTextureToArray(image); +} + +void CudaCompressorDXT1::compressBlocks(uint first, uint count, uint bw, uint bh, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + // Launch kernel. + compressKernelDXT1(first, count, bw, m_ctx.result, m_ctx.bitmapTable); + + // Copy result to host. + cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); +} + + +void CudaCompressorDXT3::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) +{ + setupCompressKernel(compressionOptions.colorWeight.ptr()); + bindTextureToArray(image); +} + +void CudaCompressorDXT3::compressBlocks(uint first, uint count, uint bw, uint bh, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + // Launch kernel. + compressKernelDXT3(first, count, bw, m_ctx.result, m_ctx.bitmapTable); + + // Copy result to host. + cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost); +} + + +void CudaCompressorDXT5::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) +{ + setupCompressKernel(compressionOptions.colorWeight.ptr()); + bindTextureToArray(image); +} + +void CudaCompressorDXT5::compressBlocks(uint first, uint count, uint bw, uint bh, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) +{ + /*// Launch kernel. + compressKernelDXT5(first, count, bw, m_ctx.result, m_ctx.bitmapTable); + + // Copy result to host. + cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost);*/ + + // Launch kernel. + if (alphaMode == AlphaMode_Transparency) + { + // compressWeightedKernelDXT1(first, count, bw, m_ctx.result, m_ctx.bitmapTable); + } + else + { + // compressKernelDXT1_Level4(first, count, w, m_ctx.result, m_ctx.bitmapTable); + } + + // Compress alpha in parallel with the GPU. + for (uint i = 0; i < count; i++) + { + //ColorBlock rgba(blockLinearImage + (first + i) * 16); + //OptimalCompress::compressDXT3A(rgba, alphaBlocks + i); + } + + // Copy result to host. + cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); + + // @@ Interleave color and alpha blocks. + +} + +#endif // defined HAVE_CUDA + + + + +// @@ This code is very repetitive and needs to be cleaned up. + +#if 0 + + +/* +// Convert linear image to block linear. +static void convertToBlockLinear(const Image * image, uint * blockLinearImage) +{ + const uint w = (image->width() + 3) / 4; + const uint h = (image->height() + 3) / 4; + + for(uint by = 0; by < h; by++) { + for(uint bx = 0; bx < w; bx++) { + const uint bw = min(image->width() - bx * 4, 4U); + const uint bh = min(image->height() - by * 4, 4U); + + for (uint i = 0; i < 16; i++) { + const int x = (i % 4) % bw; + const int y = (i / 4) % bh; + blockLinearImage[(by * w + bx) * 16 + i] = image->pixel(bx * 4 + x, by * 4 + y).u; + } + } + } +} +*/ + + +/// Compress image using CUDA. +void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(cuda::isHardwarePresent()); +#if defined HAVE_CUDA + + // Image size in blocks. + const uint w = (m_image->width() + 3) / 4; + const uint h = (m_image->height() + 3) / 4; + + uint imageSize = w * h * 16 * sizeof(Color32); + uint * blockLinearImage = (uint *) malloc(imageSize); + convertToBlockLinear(m_image, blockLinearImage); + + const uint blockNum = w * h; + const uint compressedSize = blockNum * 8; + + AlphaBlockDXT3 * alphaBlocks = NULL; + alphaBlocks = (AlphaBlockDXT3 *)malloc(min(compressedSize, MAX_BLOCKS * 8U)); + + setupCompressKernel(compressionOptions.colorWeight.ptr()); + + clock_t start = clock(); + + uint bn = 0; + while(bn != blockNum) + { + uint count = min(blockNum - bn, MAX_BLOCKS); + + cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); + + // Launch kernel. + if (m_alphaMode == AlphaMode_Transparency) + { + compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); + } + else + { + compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); + } + + // Compress alpha in parallel with the GPU. + for (uint i = 0; i < count; i++) + { + ColorBlock rgba(blockLinearImage + (bn + i) * 16); + OptimalCompress::compressDXT3A(rgba, alphaBlocks + i); + } + + // Check for errors. + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); + outputOptions.error(Error_CudaError); + } + + // Copy result to host, overwrite swizzled image. + cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); + + // Output result. + for (uint i = 0; i < count; i++) + { + outputOptions.writeData(alphaBlocks + i, 8); + outputOptions.writeData(blockLinearImage + i * 2, 8); + } + + bn += count; + } + + clock_t end = clock(); + //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); + + free(alphaBlocks); + free(blockLinearImage); + +#else + outputOptions.error(Error_CudaError); +#endif +} + + +/// Compress image using CUDA. +void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(cuda::isHardwarePresent()); +#if defined HAVE_CUDA + + // Image size in blocks. + const uint w = (m_image->width() + 3) / 4; + const uint h = (m_image->height() + 3) / 4; + + uint imageSize = w * h * 16 * sizeof(Color32); + uint * blockLinearImage = (uint *) malloc(imageSize); + convertToBlockLinear(m_image, blockLinearImage); + + const uint blockNum = w * h; + const uint compressedSize = blockNum * 8; + + AlphaBlockDXT5 * alphaBlocks = NULL; + alphaBlocks = (AlphaBlockDXT5 *)malloc(min(compressedSize, MAX_BLOCKS * 8U)); + + setupCompressKernel(compressionOptions.colorWeight.ptr()); + + clock_t start = clock(); + + uint bn = 0; + while(bn != blockNum) + { + uint count = min(blockNum - bn, MAX_BLOCKS); + + cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); + + // Launch kernel. + if (m_alphaMode == AlphaMode_Transparency) + { + compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); + } + else + { + compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); + } + + // Compress alpha in parallel with the GPU. + for (uint i = 0; i < count; i++) + { + ColorBlock rgba(blockLinearImage + (bn + i) * 16); + QuickCompress::compressDXT5A(rgba, alphaBlocks + i); + } + + // Check for errors. + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); + outputOptions.error(Error_CudaError); + } + + // Copy result to host, overwrite swizzled image. + cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); + + // Output result. + for (uint i = 0; i < count; i++) + { + outputOptions.writeData(alphaBlocks + i, 8); + outputOptions.writeData(blockLinearImage + i * 2, 8); + } + + bn += count; + } + + clock_t end = clock(); + //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); + + free(alphaBlocks); + free(blockLinearImage); + +#else + outputOptions.error(Error_CudaError); +#endif +} + + +void CudaCompressor::compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck(cuda::isHardwarePresent()); +#if defined HAVE_CUDA + + // Image size in blocks. + const uint w = (m_image->width() + 3) / 4; + const uint h = (m_image->height() + 3) / 4; + + uint imageSize = w * h * 16 * sizeof(Color32); + uint * blockLinearImage = (uint *) malloc(imageSize); + convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! + + const uint blockNum = w * h; + const uint compressedSize = blockNum * 8; + + clock_t start = clock(); + + setupCompressKernel(compressionOptions.colorWeight.ptr()); + + // TODO: Add support for multiple GPUs. + uint bn = 0; + while(bn != blockNum) + { + uint count = min(blockNum - bn, MAX_BLOCKS); + + cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); + + // Launch kernel. + compressNormalKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable); + + // Check for errors. + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); + outputOptions.error(Error_CudaError); + } + + // Copy result to host, overwrite swizzled image. + cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); + + // Output result. + outputOptions.writeData(blockLinearImage, count * 8); + + bn += count; + } + + clock_t end = clock(); + //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); + + free(blockLinearImage); + +#else + outputOptions.error(Error_CudaError); +#endif +} + + +void CudaCompressor::compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck(cuda::isHardwarePresent()); +#if defined HAVE_CUDA + + // Image size in blocks. + const uint w = (m_image->width() + 3) / 4; + const uint h = (m_image->height() + 3) / 4; + + uint imageSize = w * h * 16 * sizeof(Color32); + uint * blockLinearImage = (uint *) malloc(imageSize); + convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! + + const uint blockNum = w * h; + const uint compressedSize = blockNum * 8; + + clock_t start = clock(); + + setupCompressKernel(compressionOptions.colorWeight.ptr()); + + // TODO: Add support for multiple GPUs. + uint bn = 0; + while(bn != blockNum) + { + uint count = min(blockNum - bn, MAX_BLOCKS); + + cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); + + // Launch kernel. + compressKernelCTX1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTableCTX); + + // Check for errors. + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + { + nvDebug("CUDA Error: %s\n", cudaGetErrorString(err)); + + outputOptions.error(Error_CudaError); + } + + // Copy result to host, overwrite swizzled image. + cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost); + + // Output result. + outputOptions.writeData(blockLinearImage, count * 8); + + bn += count; + } + + clock_t end = clock(); + //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); + + free(blockLinearImage); + +#else + outputOptions.error(Error_CudaError); +#endif +} + + +void CudaCompressor::compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + nvDebugCheck(cuda::isHardwarePresent()); +#if defined HAVE_CUDA + + // @@ TODO + +#else + outputOptions.error(Error_CudaError); +#endif +} + +#endif // 0 + +#endif // defined HAVE_CUDA Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaMath.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaMath.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaMath.h @@ -1,260 +1,433 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -// Math functions and operators to be used with vector types. - -#ifndef CUDAMATH_H -#define CUDAMATH_H - -#include - - -inline __device__ __host__ float3 operator *(float3 a, float3 b) -{ - return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); -} - -inline __device__ __host__ float3 operator *(float f, float3 v) -{ - return make_float3(v.x*f, v.y*f, v.z*f); -} - -inline __device__ __host__ float3 operator *(float3 v, float f) -{ - return make_float3(v.x*f, v.y*f, v.z*f); -} - -inline __device__ __host__ float3 operator +(float3 a, float3 b) -{ - return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); -} - -inline __device__ __host__ void operator +=(float3 & b, float3 a) -{ - b.x += a.x; - b.y += a.y; - b.z += a.z; -} - -inline __device__ __host__ float3 operator -(float3 a, float3 b) -{ - return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); -} - -inline __device__ __host__ void operator -=(float3 & b, float3 a) -{ - b.x -= a.x; - b.y -= a.y; - b.z -= a.z; -} - -inline __device__ __host__ float3 operator /(float3 v, float f) -{ - float inv = 1.0f / f; - return v * inv; -} - -inline __device__ __host__ void operator /=(float3 & b, float f) -{ - float inv = 1.0f / f; - b.x *= inv; - b.y *= inv; - b.z *= inv; -} - -inline __device__ __host__ bool operator ==(float3 a, float3 b) -{ - return a.x == b.x && a.y == b.y && a.z == b.z; -} - -inline __device__ __host__ float dot(float3 a, float3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} - -inline __device__ __host__ float dot(float4 a, float4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -inline __device__ __host__ float clamp(float f, float a, float b) -{ - return max(a, min(f, b)); -} - -inline __device__ __host__ float3 clamp(float3 v, float a, float b) -{ - return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} - -inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) -{ - return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} - - -inline __device__ __host__ float3 normalize(float3 v) -{ - float len = 1.0f / sqrtf(dot(v, v)); - return make_float3(v.x * len, v.y * len, v.z * len); -} - - - - -// Use power method to find the first eigenvector. -// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html -inline __device__ __host__ float3 firstEigenVector( float matrix[6] ) -{ - // 8 iterations seems to be more than enough. - - float3 row0 = make_float3(matrix[0], matrix[1], matrix[2]); - float3 row1 = make_float3(matrix[1], matrix[3], matrix[4]); - float3 row2 = make_float3(matrix[2], matrix[4], matrix[5]); - - float r0 = dot(row0, row0); - float r1 = dot(row1, row1); - float r2 = dot(row2, row2); - - float3 v; - if (r0 > r1 && r0 > r2) v = row0; - else if (r1 > r2) v = row1; - else v = row2; - - //float3 v = make_float3(1.0f, 1.0f, 1.0f); - for(int i = 0; i < 8; i++) { - float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2]; - float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4]; - float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5]; - float m = max(max(x, y), z); - float iv = 1.0f / m; - if (m == 0.0f) iv = 0.0f; - v = make_float3(x*iv, y*iv, z*iv); - } - - return v; -} - -inline __device__ bool singleColor(const float3 * colors) -{ -#if __DEVICE_EMULATION__ - bool sameColor = false; - for (int i = 0; i < 16; i++) - { - sameColor &= (colors[i] == colors[0]); - } - return sameColor; -#else - __shared__ int sameColor[16]; - - const int idx = threadIdx.x; - - sameColor[idx] = (colors[idx] == colors[0]); - sameColor[idx] &= sameColor[idx^8]; - sameColor[idx] &= sameColor[idx^4]; - sameColor[idx] &= sameColor[idx^2]; - sameColor[idx] &= sameColor[idx^1]; - - return sameColor[0]; -#endif -} - -inline __device__ void colorSums(const float3 * colors, float3 * sums) -{ -#if __DEVICE_EMULATION__ - float3 color_sum = make_float3(0.0f, 0.0f, 0.0f); - for (int i = 0; i < 16; i++) - { - color_sum += colors[i]; - } - - for (int i = 0; i < 16; i++) - { - sums[i] = color_sum; - } -#else - - const int idx = threadIdx.x; - - sums[idx] = colors[idx]; - sums[idx] += sums[idx^8]; - sums[idx] += sums[idx^4]; - sums[idx] += sums[idx^2]; - sums[idx] += sums[idx^1]; - -#endif -} - -inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, float3 colorMetric) -{ - // Compute covariance matrix of the given colors. -#if __DEVICE_EMULATION__ - float covariance[6] = {0, 0, 0, 0, 0, 0}; - for (int i = 0; i < 16; i++) - { - float3 a = (colors[i] - color_sum * (1.0f / 16.0f)) * colorMetric; - covariance[0] += a.x * a.x; - covariance[1] += a.x * a.y; - covariance[2] += a.x * a.z; - covariance[3] += a.y * a.y; - covariance[4] += a.y * a.z; - covariance[5] += a.z * a.z; - } -#else - - const int idx = threadIdx.x; - - float3 diff = (colors[idx] - color_sum * (1.0f / 16.0f)) * colorMetric; - - // @@ Eliminate two-way bank conflicts here. - // @@ It seems that doing that and unrolling the reduction doesn't help... - __shared__ float covariance[16*6]; - - covariance[6 * idx + 0] = diff.x * diff.x; // 0, 6, 12, 2, 8, 14, 4, 10, 0 - covariance[6 * idx + 1] = diff.x * diff.y; - covariance[6 * idx + 2] = diff.x * diff.z; - covariance[6 * idx + 3] = diff.y * diff.y; - covariance[6 * idx + 4] = diff.y * diff.z; - covariance[6 * idx + 5] = diff.z * diff.z; - - for(int d = 8; d > 0; d >>= 1) - { - if (idx < d) - { - covariance[6 * idx + 0] += covariance[6 * (idx+d) + 0]; - covariance[6 * idx + 1] += covariance[6 * (idx+d) + 1]; - covariance[6 * idx + 2] += covariance[6 * (idx+d) + 2]; - covariance[6 * idx + 3] += covariance[6 * (idx+d) + 3]; - covariance[6 * idx + 4] += covariance[6 * (idx+d) + 4]; - covariance[6 * idx + 5] += covariance[6 * (idx+d) + 5]; - } - } - -#endif - - // Compute first eigen vector. - return firstEigenVector(covariance); -} - - -#endif // CUDAMATH_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// Math functions and operators to be used with vector types. + +#ifndef CUDAMATH_H +#define CUDAMATH_H + + + +inline __device__ __host__ float3 operator *(float3 a, float3 b) +{ + return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); +} + +inline __device__ __host__ float3 operator *(float f, float3 v) +{ + return make_float3(v.x*f, v.y*f, v.z*f); +} + +inline __device__ __host__ float3 operator *(float3 v, float f) +{ + return make_float3(v.x*f, v.y*f, v.z*f); +} + +inline __device__ __host__ float3 operator +(float3 a, float3 b) +{ + return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); +} + +inline __device__ __host__ void operator +=(float3 & b, float3 a) +{ + b.x += a.x; + b.y += a.y; + b.z += a.z; +} + +inline __device__ __host__ float3 operator -(float3 a, float3 b) +{ + return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); +} + +inline __device__ __host__ void operator -=(float3 & b, float3 a) +{ + b.x -= a.x; + b.y -= a.y; + b.z -= a.z; +} + +inline __device__ __host__ float3 operator /(float3 v, float f) +{ + float inv = 1.0f / f; + return v * inv; +} + +inline __device__ __host__ void operator /=(float3 & b, float f) +{ + float inv = 1.0f / f; + b.x *= inv; + b.y *= inv; + b.z *= inv; +} + +inline __device__ __host__ bool operator ==(float3 a, float3 b) +{ + return a.x == b.x && a.y == b.y && a.z == b.z; +} + + +// float2 operators +inline __device__ __host__ float2 operator *(float2 a, float2 b) +{ + return make_float2(a.x*b.x, a.y*b.y); +} + +inline __device__ __host__ float2 operator *(float f, float2 v) +{ + return make_float2(v.x*f, v.y*f); +} + +inline __device__ __host__ float2 operator *(float2 v, float f) +{ + return make_float2(v.x*f, v.y*f); +} + +inline __device__ __host__ float2 operator +(float2 a, float2 b) +{ + return make_float2(a.x+b.x, a.y+b.y); +} + +inline __device__ __host__ void operator +=(float2 & b, float2 a) +{ + b.x += a.x; + b.y += a.y; +} + +inline __device__ __host__ float2 operator -(float2 a, float2 b) +{ + return make_float2(a.x-b.x, a.y-b.y); +} + +inline __device__ __host__ void operator -=(float2 & b, float2 a) +{ + b.x -= a.x; + b.y -= a.y; +} + +inline __device__ __host__ float2 operator /(float2 v, float f) +{ + float inv = 1.0f / f; + return v * inv; +} + +inline __device__ __host__ void operator /=(float2 & b, float f) +{ + float inv = 1.0f / f; + b.x *= inv; + b.y *= inv; +} + +inline __device__ __host__ bool operator ==(float2 a, float2 b) +{ + return a.x == b.x && a.y == b.y; +} + + +inline __device__ __host__ float dot(float2 a, float2 b) +{ + return a.x * b.x + a.y * b.y; +} + +inline __device__ __host__ float dot(float3 a, float3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +inline __device__ __host__ float dot(float4 a, float4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +inline __device__ __host__ float clamp(float f, float a, float b) +{ + return max(a, min(f, b)); +} + +inline __device__ __host__ float3 clamp(float3 v, float a, float b) +{ + return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} + +inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) +{ + return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} + + +inline __device__ __host__ float3 normalize(float3 v) +{ + float len = 1.0f / sqrtf(dot(v, v)); + return make_float3(v.x * len, v.y * len, v.z * len); +} + +inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) +{ + const float s = 1.0f - t; + return make_float3(s * a.x + t * b.x, s * a.y + t * b.y, s * a.z + t * b.z); +} + +inline __device__ __host__ float lengthSquared(float3 a) +{ + return dot(a, a); +} + +inline __device__ __host__ float lengthSquared(float2 a) +{ + return dot(a, a); +} + + +// Use power method to find the first eigenvector. +// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html +inline __device__ __host__ float3 firstEigenVector( float matrix[6] ) +{ + // 8 iterations seems to be more than enough. + + float3 row0 = make_float3(matrix[0], matrix[1], matrix[2]); + float3 row1 = make_float3(matrix[1], matrix[3], matrix[4]); + float3 row2 = make_float3(matrix[2], matrix[4], matrix[5]); + + float r0 = dot(row0, row0); + float r1 = dot(row1, row1); + float r2 = dot(row2, row2); + + float3 v; + if (r0 > r1 && r0 > r2) v = row0; + else if (r1 > r2) v = row1; + else v = row2; + + //float3 v = make_float3(1.0f, 1.0f, 1.0f); + for(int i = 0; i < 8; i++) { + float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2]; + float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4]; + float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5]; + float m = max(max(x, y), z); + float iv = 1.0f / m; + if (m == 0.0f) iv = 0.0f; + v = make_float3(x*iv, y*iv, z*iv); + } + + return v; +} + + +inline __device__ bool singleColor(const float3 * colors) +{ +#if __DEVICE_EMULATION__ + bool sameColor = false; + for (int i = 0; i < 16; i++) + { + sameColor &= (colors[i] == colors[0]); + } + return sameColor; +#else + __shared__ int sameColor[16]; + + const int idx = threadIdx.x; + + sameColor[idx] = (colors[idx] == colors[0]); + sameColor[idx] &= sameColor[idx^8]; + sameColor[idx] &= sameColor[idx^4]; + sameColor[idx] &= sameColor[idx^2]; + sameColor[idx] &= sameColor[idx^1]; + + return sameColor[0]; +#endif +} + +inline __device__ void colorSums(const float3 * colors, float3 * sums) +{ +#if __DEVICE_EMULATION__ + float3 color_sum = make_float3(0.0f, 0.0f, 0.0f); + for (int i = 0; i < 16; i++) + { + color_sum += colors[i]; + } + + for (int i = 0; i < 16; i++) + { + sums[i] = color_sum; + } +#else + + const int idx = threadIdx.x; + + sums[idx] = colors[idx]; + sums[idx] += sums[idx^8]; + sums[idx] += sums[idx^4]; + sums[idx] += sums[idx^2]; + sums[idx] += sums[idx^1]; + +#endif +} + +inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, float3 colorMetric) +{ + // Compute covariance matrix of the given colors. +#if __DEVICE_EMULATION__ + float covariance[6] = {0, 0, 0, 0, 0, 0}; + for (int i = 0; i < 16; i++) + { + float3 a = (colors[i] - color_sum * (1.0f / 16.0f)) * colorMetric; + covariance[0] += a.x * a.x; + covariance[1] += a.x * a.y; + covariance[2] += a.x * a.z; + covariance[3] += a.y * a.y; + covariance[4] += a.y * a.z; + covariance[5] += a.z * a.z; + } +#else + + const int idx = threadIdx.x; + + float3 diff = (colors[idx] - color_sum * (1.0f / 16.0f)) * colorMetric; + + // @@ Eliminate two-way bank conflicts here. + // @@ It seems that doing that and unrolling the reduction doesn't help... + __shared__ float covariance[16*6]; + + covariance[6 * idx + 0] = diff.x * diff.x; // 0, 6, 12, 2, 8, 14, 4, 10, 0 + covariance[6 * idx + 1] = diff.x * diff.y; + covariance[6 * idx + 2] = diff.x * diff.z; + covariance[6 * idx + 3] = diff.y * diff.y; + covariance[6 * idx + 4] = diff.y * diff.z; + covariance[6 * idx + 5] = diff.z * diff.z; + + for(int d = 8; d > 0; d >>= 1) + { + if (idx < d) + { + covariance[6 * idx + 0] += covariance[6 * (idx+d) + 0]; + covariance[6 * idx + 1] += covariance[6 * (idx+d) + 1]; + covariance[6 * idx + 2] += covariance[6 * (idx+d) + 2]; + covariance[6 * idx + 3] += covariance[6 * (idx+d) + 3]; + covariance[6 * idx + 4] += covariance[6 * (idx+d) + 4]; + covariance[6 * idx + 5] += covariance[6 * (idx+d) + 5]; + } + } + +#endif + + // Compute first eigen vector. + return firstEigenVector(covariance); +} + + +// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right? +inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] ) +{ + // @@ 8 iterations is probably more than enough. + + const float2 row0 = make_float2(matrix[0], matrix[1]); + const float2 row1 = make_float2(matrix[1], matrix[2]); + + float r0 = lengthSquared(row0); + float r1 = lengthSquared(row1); + + float2 v; + if (r0 > r1) v = row0; + v = row1; + + //float2 v = make_float2(1.0f, 1.0f); + for(int i = 0; i < 8; i++) { + float x = v.x * matrix[0] + v.y * matrix[1]; + float y = v.x * matrix[1] + v.y * matrix[2]; + float m = max(x, y); + float iv = 1.0f / m; + if (m == 0.0f) iv = 0.0f; + v = make_float2(x*iv, y*iv); + } + + return v; +} + +inline __device__ void colorSums(const float2 * colors, float2 * sums) +{ +#if __DEVICE_EMULATION__ + float2 color_sum = make_float2(0.0f, 0.0f); + for (int i = 0; i < 16; i++) + { + color_sum += colors[i]; + } + + for (int i = 0; i < 16; i++) + { + sums[i] = color_sum; + } +#else + + const int idx = threadIdx.x; + + sums[idx] = colors[idx]; + sums[idx] += sums[idx^8]; + sums[idx] += sums[idx^4]; + sums[idx] += sums[idx^2]; + sums[idx] += sums[idx^1]; + +#endif +} + +inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum) +{ + // Compute covariance matrix of the given colors. +#if __DEVICE_EMULATION__ + float covariance[3] = {0, 0, 0}; + for (int i = 0; i < 16; i++) + { + float2 a = (colors[i] - color_sum * (1.0f / 16.0f)); + covariance[0] += a.x * a.x; + covariance[1] += a.x * a.y; + covariance[2] += a.y * a.y; + } +#else + + const int idx = threadIdx.x; + + float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f)); + + __shared__ float covariance[16*3]; + + covariance[3 * idx + 0] = diff.x * diff.x; + covariance[3 * idx + 1] = diff.x * diff.y; + covariance[3 * idx + 2] = diff.y * diff.y; + + for(int d = 8; d > 0; d >>= 1) + { + if (idx < d) + { + covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0]; + covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1]; + covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2]; + } + } + +#endif + + // Compute first eigen vector. + return firstEigenVector2D(covariance); +} + + +#endif // CUDAMATH_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaUtils.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaUtils.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaUtils.h @@ -1,4 +1,5 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation @@ -32,10 +33,8 @@ bool isHardwarePresent(); int deviceCount(); int getFastestDevice(); - bool isValidDevice(int i); - - bool initDevice(int * device_ptr); - void exitDevice(); + bool setDevice(int i); + void exit(); }; } // nv namespace Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaUtils.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaUtils.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/cuda/CudaUtils.cpp @@ -1,300 +1,239 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include "CudaUtils.h" - -#if defined HAVE_CUDA -#include -#include -#endif - -using namespace nv; -using namespace cuda; - -/* @@ Move this to win32 utils or somewhere else. -#if NV_OS_WIN32 - -#define WINDOWS_LEAN_AND_MEAN -#include - -static bool isWindowsVista() -{ -OSVERSIONINFO osvi; -osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); - -::GetVersionEx(&osvi); -return osvi.dwMajorVersion >= 6; -} - - -typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); - -static bool isWow32() -{ -LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); - -BOOL bIsWow64 = FALSE; - -if (NULL != fnIsWow64Process) -{ -if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) -{ -// Assume 32 bits. -return true; -} -} - -return !bIsWow64; -} - -#endif -*/ - - -static bool isCudaDriverAvailable(int version) -{ -#if defined HAVE_CUDA -#if NV_OS_WIN32 - Library nvcuda("nvcuda.dll"); -#else - Library nvcuda(NV_LIBRARY_NAME(cuda)); -#endif - - if (!nvcuda.isValid()) - { - nvDebug("*** CUDA driver not found.\n"); - return false; - } - - if (version >= 2000) - { - void * address = nvcuda.bindSymbol("cuStreamCreate"); - if (address == NULL) { - nvDebug("*** CUDA driver version < 2.0.\n"); - return false; - } - } - - if (version >= 2010) - { - void * address = nvcuda.bindSymbol("cuModuleLoadDataEx"); - if (address == NULL) { - nvDebug("*** CUDA driver version < 2.1.\n"); - return false; - } - } - - if (version >= 2020) - { - typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); - - PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); - if (driverGetVersion == NULL) { - nvDebug("*** CUDA driver version < 2.2.\n"); - return false; - } - - int driverVersion; - CUresult err = driverGetVersion(&driverVersion); - if (err != CUDA_SUCCESS) { - nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err)); - return false; - } - - return driverVersion >= version; - } -#endif // HAVE_CUDA - - return true; -} - - -/// Determine if CUDA is available. -bool nv::cuda::isHardwarePresent() -{ -#if defined HAVE_CUDA - // Make sure that CUDA driver matches CUDA runtime. - if (!isCudaDriverAvailable(CUDART_VERSION)) - { - nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION); - return false; - } - - int count = deviceCount(); - if (count == 1) - { - // Make sure it's not an emulation device. - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - - // deviceProp.name != Device Emulation (CPU) - if (deviceProp.major == -1 || deviceProp.minor == -1) - { - return false; - } - } - - // @@ Make sure that warp size == 32 - - // @@ Make sure available GPU is faster than the CPU. - - return count > 0; -#else - return false; -#endif -} - -/// Get number of CUDA enabled devices. -int nv::cuda::deviceCount() -{ -#if defined HAVE_CUDA - int gpuCount = 0; - - cudaError_t result = cudaGetDeviceCount(&gpuCount); - - if (result == cudaSuccess) - { - return gpuCount; - } -#endif - return 0; -} - - -// Make sure device meets requirements: -// - Not an emulation device. -// - Not an integrated device? -// - Faster than CPU. -bool nv::cuda::isValidDevice(int i) -{ -#if defined HAVE_CUDA - - cudaDeviceProp device_properties; - cudaGetDeviceProperties(&device_properties, i); - int gflops = device_properties.multiProcessorCount * device_properties.clockRate; - - if (device_properties.major == -1 || device_properties.minor == -1) { - // Emulation device. - return false; - } - -#if CUDART_VERSION >= 2030 // 2.3 - /*if (device_properties.integrated) - { - // Integrated devices. - return false; - }*/ -#endif - - return true; -#else - return false; -#endif -} - -int nv::cuda::getFastestDevice() -{ - int max_gflops_device = -1; -#if defined HAVE_CUDA - int max_gflops = 0; - - const int device_count = deviceCount(); - for (int i = 0; i < device_count; i++) - { - if (isValidDevice(i)) - { - cudaDeviceProp device_properties; - cudaGetDeviceProperties(&device_properties, i); - int gflops = device_properties.multiProcessorCount * device_properties.clockRate; - - if (gflops > max_gflops) - { - max_gflops = gflops; - max_gflops_device = i; - } - } - } -#endif - return max_gflops_device; -} - - -/// Activate the given devices. -bool nv::cuda::initDevice(int * device_ptr) -{ - nvDebugCheck(device_ptr != NULL); -#if defined HAVE_CUDA - -#if CUDART_VERSION >= 2030 // 2.3 - - // Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active. - cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield); - -#endif - - int device = getFastestDevice(); - - if (device == -1) - { - // No device is fast enough. - *device_ptr = -1; - return false; - } - - // Select CUDA device. - cudaError_t result = cudaSetDevice(device); - - if (result == cudaErrorSetOnActiveProcess) - { - int device; - result = cudaGetDevice(&device); - - *device_ptr = -1; // No device to cleanup. - return isValidDevice(device); // Return true if device is valid. - } - else if (result != cudaSuccess) - { - nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); - *device_ptr = -1; - return false; - } - - *device_ptr = device; - return true; -#else - return false; -#endif -} - -void nv::cuda::exitDevice() -{ -#if defined HAVE_CUDA - cudaError_t result = cudaThreadExit(); - - if (result != cudaSuccess) { - nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); - } -#endif -} +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "nvcore/Debug.h" +#include "CudaUtils.h" + +#if defined HAVE_CUDA +#include +#include +#endif + +using namespace nv; +using namespace cuda; + +/* @@ Move this to win32 utils or somewhere else. +#if NV_OS_WIN32 + +#define WINDOWS_LEAN_AND_MEAN +#include + +static bool isWindowsVista() +{ + OSVERSIONINFO osvi; + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + + ::GetVersionEx(&osvi); + return osvi.dwMajorVersion >= 6; +} + + +typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); + +static bool isWow32() +{ + LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); + + BOOL bIsWow64 = FALSE; + + if (NULL != fnIsWow64Process) + { + if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) + { + // Assume 32 bits. + return true; + } + } + + return !bIsWow64; +} + +#endif +*/ + + +static bool isCudaDriverAvailable(int version) +{ +#if defined HAVE_CUDA +#if NV_OS_WIN32 + Library nvcuda("nvcuda.dll"); +#else + Library nvcuda(NV_LIBRARY_NAME(cuda)); +#endif + + if (!nvcuda.isValid()) + { + nvDebug("*** CUDA driver not found.\n"); + return false; + } + + if (version >= 2000) + { + void * address = nvcuda.bindSymbol("cuStreamCreate"); + if (address == NULL) { + nvDebug("*** CUDA driver version < 2.0.\n"); + return false; + } + } + + if (version >= 2010) + { + void * address = nvcuda.bindSymbol("cuModuleLoadDataEx"); + if (address == NULL) { + nvDebug("*** CUDA driver version < 2.1.\n"); + return false; + } + } + + if (version >= 2020) + { + typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); + + PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); + if (driverGetVersion == NULL) { + nvDebug("*** CUDA driver version < 2.2.\n"); + return false; + } + + int driverVersion; + CUresult err = driverGetVersion(&driverVersion); + if (err != CUDA_SUCCESS) { + nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err)); + return false; + } + + return driverVersion >= version; + } +#endif // HAVE_CUDA + + return true; +} + + +/// Determine if CUDA is available. +bool nv::cuda::isHardwarePresent() +{ +#if defined HAVE_CUDA + // Make sure that CUDA driver matches CUDA runtime. + if (!isCudaDriverAvailable(CUDART_VERSION)) + { + nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION); + return false; + } + + int count = deviceCount(); + if (count == 1) + { + // Make sure it's not an emulation device. + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + + // deviceProp.name != Device Emulation (CPU) + if (deviceProp.major == -1 || deviceProp.minor == -1) + { + return false; + } + } + + // @@ Make sure that warp size == 32 + + return count > 0; +#else + return false; +#endif +} + +/// Get number of CUDA enabled devices. +int nv::cuda::deviceCount() +{ +#if defined HAVE_CUDA + int gpuCount = 0; + + cudaError_t result = cudaGetDeviceCount(&gpuCount); + + if (result == cudaSuccess) + { + return gpuCount; + } +#endif + return 0; +} + +int nv::cuda::getFastestDevice() +{ + int max_gflops_device = 0; +#if defined HAVE_CUDA + int max_gflops = 0; + + const int device_count = deviceCount(); + int current_device = 0; + while (current_device < device_count) + { + cudaDeviceProp device_properties; + cudaGetDeviceProperties(&device_properties, current_device); + int gflops = device_properties.multiProcessorCount * device_properties.clockRate; + + if (device_properties.major != -1 && device_properties.minor != -1) + { + if( gflops > max_gflops ) + { + max_gflops = gflops; + max_gflops_device = current_device; + } + } + + current_device++; + } +#endif + return max_gflops_device; +} + + +/// Activate the given devices. +bool nv::cuda::setDevice(int i) +{ + nvCheck(i < deviceCount()); +#if defined HAVE_CUDA + cudaError_t result = cudaSetDevice(i); + + if (result != cudaSuccess) { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); + } + + return result == cudaSuccess; +#else + return false; +#endif +} + +void nv::cuda::exit() +{ +#if defined HAVE_CUDA + cudaError_t result = cudaThreadExit(); + + if (result != cudaSuccess) { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); + } +#endif +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/experimental/nvtt_experimental.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/experimental/nvtt_experimental.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/experimental/nvtt_experimental.h @@ -0,0 +1,103 @@ + +#ifndef NVTT_EXPERIMENTAL_H +#define NVTT_EXPERIMENTAL_H + +#include + +typedef struct NvttTexture NvttTexture; +typedef struct NvttOutputOptions NvttOutputOptions; + + +// Global functions +void nvttInitialize(...); +unsigned int nvttGetVersion(); +const char * nvttGetErrorString(unsigned int error); + + +// Texture functions +NvttTexture * nvttCreateTexture(); +void nvttDestroyTexture(NvttTexture * tex); + +void nvttSetTexture2D(NvttTexture * tex, NvttInputFormat format, uint w, uint h, uint idx, void * data); + +void nvttResize(NvttTexture * img, uint w, uint h); +unsigned int nvttDownsample(NvttTexture * img); + +void nvttOutputCompressed(NvttTexture * img, NvttOutputFormat format); +void nvttOutputPixelFormat(NvttTexture * img, NvttOutputFormat format); + + + + +// How to control the compression parameters? + +// Using many arguments: +// void nvttCompressImage(img, format, quality, r, g, b, a, ...); + +// Using existing compression option class: +// compressionOptions = nvttCreateCompressionOptions(); +// nvttSetCompressionOptionsFormat(compressionOptions, format); +// nvttSetCompressionOptionsQuality(compressionOptions, quality); +// nvttSetCompressionOptionsQuality(compressionOptions, quality); +// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a); +// ... +// nvttCompressImage(img, compressionOptions); + +// Using thread local context state: +// void nvttSetCompressionFormat(format); +// void nvttSetCompressionQuality(quality); +// void nvttSetCompressionColorWeights(r, g, b, a); +// ... +// nvttCompressImage(img); + +// Using thread local context state, but with GL style function arguments: +// nvttCompressorParameteri(NVTT_FORMAT, format); +// nvttCompressorParameteri(NVTT_QUALITY, quality); +// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r); +// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g); +// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b); +// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a); +// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a); +// ... +// nvttCompressImage(img); + +// How do we get the compressed output? +// - Using callbacks. (via new entrypoints, or through outputOptions) +// - Return it explicitely from nvttCompressImage. +// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)' + +/* + +// Global functions +void nvttInitialize(...); +unsigned int nvttGetVersion(); +const char * nvttGetErrorString(unsigned int error); + +// Context object +void nvttCreateContext(); +void nvttDestroyContext(); + +void nvttSetParameter1i(unsigned int name, int value); + +void nvttSetParameter1f(unsigned int name, float value); +void nvttSetParameter2f(unsigned int name, float v0, float v1); +void nvttSetParameter3f(unsigned int name, float v0, float v1, float v2); +void nvttSetParameter4f(unsigned int name, float v0, float v1, float v2, float v3); + +// Image object +NvttImage * nvttCreateImage(); +void nvttDestroyImage(NvttImage * img); + +void nvttSetImageData(NvttImage * image, NvttInputFormat format, unsigned int w, unsigned int h, void * data); + +void nvttSetImageParameter1i(NvttImage * image, unsigned int name, int value); +void nvttSetImageParameter1f(NvttImage * image, unsigned int name, float value); + +void nvttResizeImage(NvttImage * image, unsigned int w, unsigned int h); +void nvttQuantizeImage(NvttImage * image, bool dither, unsigned int rbits, unsigned int gbits, unsigned int bbits, unsigned int abits); +void nvttCompressImage(NvttImage * image, void * buffer, int size); + +*/ + + +#endif // NVTT_EXPERIMENTAL_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/experimental/nvtt_experimental.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/experimental/nvtt_experimental.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/experimental/nvtt_experimental.cpp @@ -0,0 +1,57 @@ + +#include "nvtt_experimental.h" + +struct NvttTexture +{ + NvttTexture() : + m_constant(false), + m_image(NULL), + m_floatImage(NULL) + { + } + + ~NvttTexture() + { + if (m_constant && m_image) m_image->unwrap(); + delete m_image; + delete m_floatImage; + } + + bool m_constant; + Image * m_image; + FloatImage * m_floatImage; +}; + +NvttTexture * nvttCreateTexture() +{ + return new NvttTexture(); +} + +void nvttDestroyTexture(NvttTexture * tex) +{ + delete tex; +} + +void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data) +{ + nvCheck(img != NULL); + + if (format == NVTT_InputFormat_BGRA_8UB) + { + img->m_constant = false; + img->m_image->allocate(w, h); + memcpy(img->m_image->pixels(), data, w * h * 4); + } + else + { + nvCheck(false); + } +} + +void nvttCompressImage(NvttImage * img, NvttFormat format) +{ + nvCheck(img != NULL); + + // @@ Invoke appropriate compressor. +} + Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/experimental/test.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/experimental/test.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/experimental/test.cpp @@ -0,0 +1,61 @@ + +#include "nvtt_experimental.h" + +/* +Errors in the original API: +- Too many memory copies. +- Implementation too complicated. +- Error output should not be in output options. +- Data driven interface. Follows the dialog model. Provide all the data upfront. +*/ + + +// Output texture with mipmaps +void example0() +{ + CompressionOptions compressionOptions; + OutputOptions outputOptions; + + Texture img; + img.setTexture2D(format, w, h, 0, data); + + Compressor context; + context.outputHeader(outputOptions); + context.outputCompressed(img, compressionOptions, outputOptions); + + img.toLinear(2.2); + while (img.downsample(NVTT_FILTER_BOX)) + { + img.toGamma(2.2); + outputCompressed(img, compressionOptions, outputOptions); + } +} + + +// Output texture with colored mipmaps +void example1() +{ + CompressionOptions compressionOptions; + OutputOptions outputOptions; + + Texture img; + img.setTexture2D(format, w, h, 0, data); + + Compressor context; + context.outputHeader(outputOptions); + context.outputCompressed(img, compressionOptions, outputOptions); + + img.toLinear(2.2); + while (img.downsample(NVTT_FILTER_BOX)) + { + img.toGamma(2.2); + + Texture mipmap = img; + mipmap.blend(color[i].r, color[i].g, color[i].b, 0.5f); + + context.outputCompressed(mipmap, compressionOptions, outputOptions); + } +} + + + Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt.h @@ -1,308 +1,676 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NV_TT_H -#define NV_TT_H - -// Function linkage -#if NVTT_SHARED - -#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__ || defined __MINGW32__ -# ifdef NVTT_EXPORTS -# define NVTT_API __declspec(dllexport) -# else -# define NVTT_API __declspec(dllimport) -# endif -#endif - -#if defined __GNUC__ >= 4 -# ifdef NVTT_EXPORTS -# define NVTT_API __attribute__((visibility("default"))) -# endif -#endif - -#endif // NVTT_SHARED - -#if !defined NVTT_API -# define NVTT_API -#endif - -#define NVTT_VERSION 200 - -#define NVTT_DECLARE_PIMPL(Class) \ - private: \ - Class(const Class &); \ - void operator=(const Class &); \ - public: \ - struct Private; \ - Private & m - - -// Public interface. -namespace nvtt -{ - /// Supported compression formats. - enum Format - { - // No compression. - Format_RGB, - Format_RGBA = Format_RGB, - - // DX9 formats. - Format_DXT1, - Format_DXT1a, // DXT1 with binary alpha. - Format_DXT3, - Format_DXT5, - Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x - - // DX10 formats. - Format_BC1 = Format_DXT1, - Format_BC1a = Format_DXT1a, - Format_BC2 = Format_DXT3, - Format_BC3 = Format_DXT5, - Format_BC3n = Format_DXT5n, - Format_BC4, // ATI1 - Format_BC5, // 3DC, ATI2 - }; - - /// Quality modes. - enum Quality - { - Quality_Fastest, - Quality_Normal, - Quality_Production, - Quality_Highest, - }; - - /// Compression options. This class describes the desired compression format and other compression settings. - struct CompressionOptions - { - NVTT_DECLARE_PIMPL(CompressionOptions); - - NVTT_API CompressionOptions(); - NVTT_API ~CompressionOptions(); - - NVTT_API void reset(); - - NVTT_API void setFormat(Format format); - NVTT_API void setQuality(Quality quality); - NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f); - - NVTT_API void setExternalCompressor(const char * name); - - // Set color mask to describe the RGB/RGBA format. - NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask); - - NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127); - }; - - - /// Wrap modes. - enum WrapMode - { - WrapMode_Clamp, - WrapMode_Repeat, - WrapMode_Mirror, - }; - - /// Texture types. - enum TextureType - { - TextureType_2D, - TextureType_Cube, - // TextureType_3D, - }; - - /// Input formats. - enum InputFormat - { - InputFormat_BGRA_8UB, - // InputFormat_RGBE_8UB, - // InputFormat_BGRA_32F, - }; - - /// Mipmap downsampling filters. - enum MipmapFilter - { - MipmapFilter_Box, ///< Box filter is quite good and very fast. - MipmapFilter_Triangle, ///< Triangle filter blurs the results too much, but that might be what you want. - MipmapFilter_Kaiser, ///< Kaiser-windowed Sinc filter is the best downsampling filter. - }; - - /// Color transformation. - enum ColorTransform - { - ColorTransform_None, - ColorTransform_Linear, - }; - - /// Extents rounding mode. - enum RoundMode - { - RoundMode_None, - RoundMode_ToNextPowerOfTwo, - RoundMode_ToNearestPowerOfTwo, - RoundMode_ToPreviousPowerOfTwo, - }; - - /// Alpha mode. - enum AlphaMode - { - AlphaMode_None, - AlphaMode_Transparency, - AlphaMode_Premultiplied, - }; - - /// Input options. Specify format and layout of the input texture. - struct InputOptions - { - NVTT_DECLARE_PIMPL(InputOptions); - - NVTT_API InputOptions(); - NVTT_API ~InputOptions(); - - // Set default options. - NVTT_API void reset(); - - // Setup input layout. - NVTT_API void setTextureLayout(TextureType type, int w, int h, int d = 1); - NVTT_API void resetTextureLayout(); - - // Set mipmap data. Copies the data. - NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0); - - // Describe the format of the input. - NVTT_API void setFormat(InputFormat format); - - // Set the way the input alpha channel is interpreted. - NVTT_API void setAlphaMode(AlphaMode alphaMode); - - // Set gamma settings. - NVTT_API void setGamma(float inputGamma, float outputGamma); - - // Set texture wrappign mode. - NVTT_API void setWrapMode(WrapMode mode); - - // Set mipmapping options. - NVTT_API void setMipmapFilter(MipmapFilter filter); - NVTT_API void setMipmapGeneration(bool enabled, int maxLevel = -1); - NVTT_API void setKaiserParameters(float width, float alpha, float stretch); - - // Set normal map options. - NVTT_API void setNormalMap(bool b); - NVTT_API void setConvertToNormalMap(bool convert); - NVTT_API void setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale); - NVTT_API void setNormalFilter(float sm, float medium, float big, float large); - NVTT_API void setNormalizeMipmaps(bool b); - - // Set color transforms. @@ Not implemented! - NVTT_API void setColorTransform(ColorTransform t); - NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3); - - // Set resizing options. - NVTT_API void setMaxExtents(int d); - NVTT_API void setRoundMode(RoundMode mode); - }; - - - /// Output handler. - struct OutputHandler - { - virtual ~OutputHandler() {} - - /// Indicate the start of a new compressed image that's part of the final texture. - virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) = 0; - - /// Output data. Compressed data is output as soon as it's generated to minimize memory allocations. - virtual bool writeData(const void * data, int size) = 0; - }; - - /// Error codes. - enum Error - { - Error_Unknown, - Error_InvalidInput, - Error_UnsupportedFeature, - Error_CudaError, - Error_FileOpen, - Error_FileWrite, - }; - - /// Error handler. - struct ErrorHandler - { - virtual ~ErrorHandler() {} - - // Signal error. - virtual void error(Error e) = 0; - }; - - - /// Output Options. This class holds pointers to the interfaces that are used to report the output of - /// the compressor to the user. - struct OutputOptions - { - NVTT_DECLARE_PIMPL(OutputOptions); - - NVTT_API OutputOptions(); - NVTT_API ~OutputOptions(); - - // Set default options. - NVTT_API void reset(); - - NVTT_API void setFileName(const char * fileName); - - NVTT_API void setOutputHandler(OutputHandler * outputHandler); - NVTT_API void setErrorHandler(ErrorHandler * errorHandler); - NVTT_API void setOutputHeader(bool outputHeader); - }; - - - /// Texture compressor. - struct Compressor - { - NVTT_DECLARE_PIMPL(Compressor); - - NVTT_API Compressor(); - NVTT_API ~Compressor(); - - NVTT_API void enableCudaAcceleration(bool enable); - NVTT_API bool isCudaAccelerationEnabled() const; - - // Main entrypoint of the compression library. - NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; - - // Estimate the size of compressing the input with the given options. - NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const; - }; - - - // Return string for the given error code. - NVTT_API const char * errorString(Error e); - - // Return NVTT version. - NVTT_API unsigned int version(); - -} // nvtt namespace - -#endif // NV_TT_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#pragma once +#ifndef NVTT_H +#define NVTT_H + +// Function linkage +#if NVTT_SHARED + +#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__ || defined __MINGW32__ +# ifdef NVTT_EXPORTS +# define NVTT_API __declspec(dllexport) +# else +# define NVTT_API __declspec(dllimport) +# endif +#endif + +#if defined __GNUC__ >= 4 +# ifdef NVTT_EXPORTS +# define NVTT_API __attribute__((visibility("default"))) +# endif +#endif + +#endif // NVTT_SHARED + +#if !defined NVTT_API +# define NVTT_API +#endif + +#define NVTT_VERSION 20100 + +#define NVTT_FORBID_COPY(Class) \ + private: \ + Class(const Class &); \ + void operator=(const Class &); \ + public: + +#define NVTT_DECLARE_PIMPL(Class) \ + public: \ + struct Private; \ + Private & m + + +// Public interface. +namespace nvtt +{ + // Forward declarations. + struct Surface; + struct CubeSurface; + + + // Supported block-compression formats. + // @@ I wish I had distinguished between "formats" and compressors. + // That is: + // - 'DXT1' is a format 'DXT1a' and 'DXT1n' are DXT1 compressors. + // - 'DXT3' is a format 'DXT3n' is a DXT3 compressor. + // Having multiple enums for the same ids only creates confusion. Clean this up. + enum Format + { + // No block-compression (linear). + Format_RGB, + Format_RGBA = Format_RGB, + + // DX9 formats. + Format_DXT1, + Format_DXT1a, // DXT1 with binary alpha. + Format_DXT3, + Format_DXT5, + Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x + + // DX10 formats. + Format_BC1 = Format_DXT1, + Format_BC1a = Format_DXT1a, + Format_BC2 = Format_DXT3, + Format_BC3 = Format_DXT5, + Format_BC3n = Format_DXT5n, + Format_BC4, // ATI1 + Format_BC5, // 3DC, ATI2 + + Format_DXT1n, // Not supported. + Format_CTX1, // Not supported. + + Format_BC6, + Format_BC7, + + Format_BC3_RGBM, // + + Format_Count + }; + + // Pixel types. These basically indicate how the output should be interpreted, but do not have any influence over the input. They are only relevant in RGBA mode. + enum PixelType + { + PixelType_UnsignedNorm = 0, + PixelType_SignedNorm = 1, // Not supported yet. + PixelType_UnsignedInt = 2, // Not supported yet. + PixelType_SignedInt = 3, // Not supported yet. + PixelType_Float = 4, + PixelType_UnsignedFloat = 5, + PixelType_SharedExp = 6, // Shared exponent. + }; + + // Quality modes. + enum Quality + { + Quality_Fastest, + Quality_Normal, + Quality_Production, + Quality_Highest, + }; + + // DXT decoder. + enum Decoder + { + Decoder_D3D10, + Decoder_D3D9, + Decoder_NV5x, + //Decoder_RSX, // To take advantage of DXT5 bug. + }; + + + // Compression options. This class describes the desired compression format and other compression settings. + struct CompressionOptions + { + NVTT_FORBID_COPY(CompressionOptions); + NVTT_DECLARE_PIMPL(CompressionOptions); + + NVTT_API CompressionOptions(); + NVTT_API ~CompressionOptions(); + + NVTT_API void reset(); + + NVTT_API void setFormat(Format format); + NVTT_API void setQuality(Quality quality); + NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f); + + NVTT_API void setExternalCompressor(const char * name); + + // Set color mask to describe the RGB/RGBA format. + NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask); + NVTT_API void setPixelFormat(unsigned char rsize, unsigned char gsize, unsigned char bsize, unsigned char asize); + + NVTT_API void setPixelType(PixelType pixelType); + + NVTT_API void setPitchAlignment(int pitchAlignment); + + // @@ I wish this wasn't part of the compression options. Quantization is applied before compression. We don't have compressors with error diffusion. + // @@ These options are only taken into account when using the InputOptions API. + NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127); + + NVTT_API void setTargetDecoder(Decoder decoder); + + // Translate to and from D3D formats. + NVTT_API unsigned int d3d9Format() const; + //NVTT_API bool setD3D9Format(unsigned int format); + //NVTT_API unsigned int dxgiFormat() const; + //NVTT_API bool setDxgiFormat(unsigned int format); + }; + + /* + // DXGI_FORMAT_R16G16_FLOAT + compressionOptions.setPixelType(PixelType_Float); + compressionOptions.setPixelFormat2(16, 16, 0, 0); + + // DXGI_FORMAT_R32G32B32A32_FLOAT + compressionOptions.setPixelType(PixelType_Float); + compressionOptions.setPixelFormat2(32, 32, 32, 32); + */ + + + // Wrap modes. + enum WrapMode + { + WrapMode_Clamp, + WrapMode_Repeat, + WrapMode_Mirror, + }; + + // Texture types. + enum TextureType + { + TextureType_2D, + TextureType_Cube, + TextureType_3D, + TextureType_Array, + }; + + // Input formats. + enum InputFormat + { + InputFormat_BGRA_8UB, // Normalized [0, 1] 8 bit fixed point. + InputFormat_RGBA_16F, // 16 bit floating point. + InputFormat_RGBA_32F, // 32 bit floating point. + InputFormat_R_32F, // Single channel 32 bit floating point. + }; + + // Mipmap downsampling filters. + enum MipmapFilter + { + MipmapFilter_Box, // Box filter is quite good and very fast. + MipmapFilter_Triangle, // Triangle filter blurs the results too much, but that might be what you want. + MipmapFilter_Kaiser, // Kaiser-windowed Sinc filter is the best downsampling filter. + }; + + // Texture resize filters. + enum ResizeFilter + { + ResizeFilter_Box, + ResizeFilter_Triangle, + ResizeFilter_Kaiser, + ResizeFilter_Mitchell, + }; + + // Extents rounding mode. + enum RoundMode + { + RoundMode_None, + RoundMode_ToNextPowerOfTwo, + RoundMode_ToNearestPowerOfTwo, + RoundMode_ToPreviousPowerOfTwo, + RoundMode_ToNextMultipleOfFour, // (New in NVTT 2.1) + RoundMode_ToNearestMultipleOfFour, // (New in NVTT 2.1) + RoundMode_ToPreviousMultipleOfFour, // (New in NVTT 2.1) + }; + + // Alpha mode. + enum AlphaMode + { + AlphaMode_None, + AlphaMode_Transparency, + AlphaMode_Premultiplied, + }; + + // Input options. Specify format and layout of the input texture. (Deprecated in NVTT 2.1) + struct InputOptions + { + NVTT_FORBID_COPY(InputOptions); + NVTT_DECLARE_PIMPL(InputOptions); + + NVTT_API InputOptions(); + NVTT_API ~InputOptions(); + + // Set default options. + NVTT_API void reset(); + + // Setup input layout. + NVTT_API void setTextureLayout(TextureType type, int w, int h, int d = 1, int arraySize = 1); + NVTT_API void resetTextureLayout(); + + // Set mipmap data. Copies the data. + NVTT_API bool setMipmapData(const void * data, int w, int h, int d = 1, int face = 0, int mipmap = 0); + + // Describe the format of the input. + NVTT_API void setFormat(InputFormat format); + + // Set the way the input alpha channel is interpreted. @@ Not implemented! + NVTT_API void setAlphaMode(AlphaMode alphaMode); + + // Set gamma settings. + NVTT_API void setGamma(float inputGamma, float outputGamma); + + // Set texture wrapping mode. + NVTT_API void setWrapMode(WrapMode mode); + + // Set mipmapping options. + NVTT_API void setMipmapFilter(MipmapFilter filter); + NVTT_API void setMipmapGeneration(bool enabled, int maxLevel = -1); + NVTT_API void setKaiserParameters(float width, float alpha, float stretch); + + // Set normal map options. + NVTT_API void setNormalMap(bool b); + NVTT_API void setConvertToNormalMap(bool convert); + NVTT_API void setHeightEvaluation(float redScale, float greenScale, float blueScale, float alphaScale); + NVTT_API void setNormalFilter(float sm, float medium, float big, float large); + NVTT_API void setNormalizeMipmaps(bool b); + + // Set resizing options. + NVTT_API void setMaxExtents(int d); + NVTT_API void setRoundMode(RoundMode mode); + }; + + + // Output handler. + struct OutputHandler + { + virtual ~OutputHandler() {} + + // Indicate the start of a new compressed image that's part of the final texture. + virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) = 0; + + // Output data. Compressed data is output as soon as it's generated to minimize memory allocations. + virtual bool writeData(const void * data, int size) = 0; + + // Indicate the end of the compressed image. (New in NVTT 2.1) + virtual void endImage() = 0; + }; + + // Error codes. + enum Error + { + Error_Unknown, + Error_InvalidInput, + Error_UnsupportedFeature, + Error_CudaError, + Error_FileOpen, + Error_FileWrite, + Error_UnsupportedOutputFormat, + Error_Count + }; + + // Error handler. + struct ErrorHandler + { + virtual ~ErrorHandler() {} + + // Signal error. + virtual void error(Error e) = 0; + }; + + // Container. + enum Container + { + Container_DDS, + Container_DDS10, + // Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/ + // Container_VTF, // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format + }; + + + // Output Options. This class holds pointers to the interfaces that are used to report the output of + // the compressor to the user. + struct OutputOptions + { + NVTT_FORBID_COPY(OutputOptions); + NVTT_DECLARE_PIMPL(OutputOptions); + + NVTT_API OutputOptions(); + NVTT_API ~OutputOptions(); + + // Set default options. + NVTT_API void reset(); + + NVTT_API void setFileName(const char * fileName); + NVTT_API void setFileHandle(void * fp); + + NVTT_API void setOutputHandler(OutputHandler * outputHandler); + NVTT_API void setErrorHandler(ErrorHandler * errorHandler); + + NVTT_API void setOutputHeader(bool outputHeader); + NVTT_API void setContainer(Container container); + NVTT_API void setUserVersion(int version); + NVTT_API void setSrgbFlag(bool b); + }; + + // (New in NVTT 2.1) + typedef void Task(void * context, int id); + + // (New in NVTT 2.1) + struct TaskDispatcher + { + virtual ~TaskDispatcher() {} + + virtual void dispatch(Task * task, void * context, int count) = 0; + }; + + // Context. + struct Compressor + { + NVTT_FORBID_COPY(Compressor); + NVTT_DECLARE_PIMPL(Compressor); + + NVTT_API Compressor(); + NVTT_API ~Compressor(); + + // Context settings. + NVTT_API void enableCudaAcceleration(bool enable); + NVTT_API bool isCudaAccelerationEnabled() const; + NVTT_API void setTaskDispatcher(TaskDispatcher * disp); // (New in NVTT 2.1) + + // InputOptions API. + NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const; + + // Surface API. (New in NVTT 2.1) + NVTT_API bool outputHeader(const Surface & img, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API bool compress(const Surface & img, int face, int mipmap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API int estimateSize(const Surface & img, int mipmapCount, const CompressionOptions & compressionOptions) const; + + // CubeSurface API. (New in NVTT 2.1) + NVTT_API bool outputHeader(const CubeSurface & cube, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API bool compress(const CubeSurface & cube, int mipmap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API int estimateSize(const CubeSurface & cube, int mipmapCount, const CompressionOptions & compressionOptions) const; + + // Raw API. (New in NVTT 2.1) + NVTT_API bool outputHeader(TextureType type, int w, int h, int d, int arraySize, int mipmapCount, bool isNormalMap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API bool compress(int w, int h, int d, int face, int mipmap, const float * rgba, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; + NVTT_API int estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions & compressionOptions) const; + }; + + // "Compressor" is deprecated. This should have been called "Context" + typedef Compressor Context; + + // (New in NVTT 2.1) + enum NormalTransform { + NormalTransform_Orthographic, + NormalTransform_Stereographic, + NormalTransform_Paraboloid, + NormalTransform_Quartic + //NormalTransform_DualParaboloid, + }; + + // (New in NVTT 2.1) + enum ToneMapper { + ToneMapper_Linear, + ToneMapper_Reindhart, + ToneMapper_Halo, + ToneMapper_Lightmap, + }; + + + // A surface is one level of a 2D or 3D texture. (New in NVTT 2.1) + // @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression. + struct Surface + { + NVTT_API Surface(); + NVTT_API Surface(const Surface & img); + NVTT_API ~Surface(); + + NVTT_API void operator=(const Surface & img); + + // Texture parameters. + NVTT_API void setWrapMode(WrapMode mode); + NVTT_API void setAlphaMode(AlphaMode alphaMode); + NVTT_API void setNormalMap(bool isNormalMap); + + // Queries. + NVTT_API bool isNull() const; + NVTT_API int width() const; + NVTT_API int height() const; + NVTT_API int depth() const; + NVTT_API TextureType type() const; + NVTT_API WrapMode wrapMode() const; + NVTT_API AlphaMode alphaMode() const; + NVTT_API bool isNormalMap() const; + NVTT_API int countMipmaps() const; + NVTT_API int countMipmaps(int min_size) const; + NVTT_API float alphaTestCoverage(float alphaRef = 0.5, int alpha_channel = 3) const; + NVTT_API float average(int channel, int alpha_channel = -1, float gamma = 2.2f) const; + NVTT_API const float * data() const; + NVTT_API const float * channel(int i) const; + NVTT_API void histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const; + NVTT_API void range(int channel, float * rangeMin, float * rangeMax, int alpha_channel = -1, float alpha_ref = 0.f) const; + + // Texture data. + NVTT_API bool load(const char * fileName, bool * hasAlpha = 0); + NVTT_API bool save(const char * fileName, bool hasAlpha = 0, bool hdr = 0) const; + NVTT_API bool setImage(int w, int h, int d); + NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * data); + NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * r, const void * g, const void * b, const void * a); + NVTT_API bool setImage2D(Format format, Decoder decoder, int w, int h, const void * data); + + // Resizing methods. + NVTT_API void resize(int w, int h, int d, ResizeFilter filter); + NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0); + NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter); + NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0); + NVTT_API void resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter); + + NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1); + NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1); + NVTT_API bool buildNextMipmapSolidColor(const float * const color_components); + NVTT_API void canvasSize(int w, int h, int d); + // associated to resizing: + NVTT_API bool canMakeNextMipmap(int min_size = 1); + + // Color transforms. + NVTT_API void toLinear(float gamma); + NVTT_API void toGamma(float gamma); + NVTT_API void toLinear(int channel, float gamma); + NVTT_API void toGamma(int channel, float gamma); + NVTT_API void toSrgb(); + NVTT_API void toLinearFromSrgb(); + NVTT_API void toXenonSrgb(); + NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]); + NVTT_API void swizzle(int r, int g, int b, int a); + NVTT_API void scaleBias(int channel, float scale, float bias); + NVTT_API void clamp(int channel, float low = 0.0f, float high = 1.0f); + NVTT_API void blend(float r, float g, float b, float a, float t); + NVTT_API void premultiplyAlpha(); + NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale); + NVTT_API void setBorder(float r, float g, float b, float a); + NVTT_API void fill(float r, float g, float b, float a); + NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f, int alpha_channel = 3); + NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.25f); + NVTT_API void fromRGBM(float range = 1.0f, float threshold = 0.25f); + NVTT_API void toLM(float range = 1.0f, float threshold = 0.0f); + NVTT_API void toRGBE(int mantissaBits, int exponentBits); + NVTT_API void fromRGBE(int mantissaBits, int exponentBits); + NVTT_API void toYCoCg(); + NVTT_API void blockScaleCoCg(int bits = 5, float threshold = 0.0f); + NVTT_API void fromYCoCg(); + NVTT_API void toLUVW(float range = 1.0f); + NVTT_API void fromLUVW(float range = 1.0f); + NVTT_API void abs(int channel); + NVTT_API void convolve(int channel, int kernelSize, float * kernelData); + NVTT_API void toLogScale(int channel, float base); + NVTT_API void fromLogScale(int channel, float base); + NVTT_API void setAtlasBorder(int w, int h, float r, float g, float b, float a); + + NVTT_API void toneMap(ToneMapper tm, float * parameters); + + //NVTT_API void blockLuminanceScale(float scale); + + // Color quantization. + NVTT_API void binarize(int channel, float threshold, bool dither); + NVTT_API void quantize(int channel, int bits, bool exactEndPoints, bool dither); + + // Normal map transforms. + NVTT_API void toNormalMap(float sm, float medium, float big, float large); + NVTT_API void normalizeNormalMap(); + NVTT_API void transformNormals(NormalTransform xform); + NVTT_API void reconstructNormals(NormalTransform xform); + NVTT_API void toCleanNormalMap(); + NVTT_API void packNormals(float scale = 0.5f, float bias = 0.5f); // [-1,1] -> [ 0,1] + NVTT_API void expandNormals(float scale = 2.0f, float bias = -1.0f); // [ 0,1] -> [-1,1] + NVTT_API Surface createToksvigMap(float power) const; + NVTT_API Surface createCleanMap() const; + + // Geometric transforms. + NVTT_API void flipX(); + NVTT_API void flipY(); + NVTT_API void flipZ(); + NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const; + + // Copy image data. + NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel); + NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel, int dstChannel); + + NVTT_API bool addChannel(const Surface & img, int srcChannel, int dstChannel, float scale); + + NVTT_API bool copy(const Surface & src, int xsrc, int ysrc, int zsrc, int xsize, int ysize, int zsize, int xdst, int ydst, int zdst); + + + //private: + void detach(); + + struct Private; + Private * m; + }; + + + // Cube layout formats. (New in NVTT 2.1) + enum CubeLayout { + CubeLayout_VerticalCross, + CubeLayout_HorizontalCross, + CubeLayout_Column, + CubeLayout_Row, + CubeLayout_LatitudeLongitude + }; + + // (New in NVTT 2.1) + enum EdgeFixup { + EdgeFixup_None, + EdgeFixup_Stretch, + EdgeFixup_Warp, + EdgeFixup_Average, + }; + + // A CubeSurface is one level of a cube map texture. (New in NVTT 2.1) + struct CubeSurface + { + NVTT_API CubeSurface(); + NVTT_API CubeSurface(const CubeSurface & img); + NVTT_API ~CubeSurface(); + + NVTT_API void operator=(const CubeSurface & img); + + // Queries. + NVTT_API bool isNull() const; + NVTT_API int edgeLength() const; + NVTT_API int countMipmaps() const; + + // Texture data. + NVTT_API bool load(const char * fileName, int mipmap); + NVTT_API bool save(const char * fileName) const; + + NVTT_API Surface & face(int face); + NVTT_API const Surface & face(int face) const; + + // Layout conversion. @@ Not implemented. + NVTT_API void fold(const Surface & img, CubeLayout layout); + NVTT_API Surface unfold(CubeLayout layout) const; + + // @@ Angular extent filtering. + + // @@ Add resizing methods. + + // @@ Add edge fixup methods. + + NVTT_API float average(int channel) const; + NVTT_API void range(int channel, float * minimum_ptr, float * maximum_ptr) const; + NVTT_API void clamp(int channel, float low = 0.0f, float high = 1.0f); + + + // Filtering. + NVTT_API CubeSurface irradianceFilter(int size, EdgeFixup fixupMethod) const; + NVTT_API CubeSurface cosinePowerFilter(int size, float cosinePower, EdgeFixup fixupMethod) const; + + NVTT_API CubeSurface fastResample(int size, EdgeFixup fixupMethod) const; + + + /* + NVTT_API void resize(int w, int h, ResizeFilter filter); + NVTT_API void resize(int w, int h, ResizeFilter filter, float filterWidth, const float * params = 0); + NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter); + NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0); + NVTT_API bool buildNextMipmap(MipmapFilter filter); + NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0); + */ + + // Color transforms. + NVTT_API void toLinear(float gamma); + NVTT_API void toGamma(float gamma); + + //private: + void detach(); + + struct Private; + Private * m; + }; + + + // Return string for the given error code. + NVTT_API const char * errorString(Error e); + + // Return NVTT version. + NVTT_API unsigned int version(); + + // Image comparison and error measurement functions. (New in NVTT 2.1) + NVTT_API float rmsError(const Surface & reference, const Surface & img); + NVTT_API float rmsAlphaError(const Surface & reference, const Surface & img); + NVTT_API float cieLabError(const Surface & reference, const Surface & img); + NVTT_API float angularError(const Surface & reference, const Surface & img); + NVTT_API Surface diff(const Surface & reference, const Surface & img, float scale); + + NVTT_API float rmsToneMappedError(const Surface & reference, const Surface & img, float exposure); + + + NVTT_API Surface histogram(const Surface & img, int width, int height); + NVTT_API Surface histogram(const Surface & img, float minRange, float maxRange, int width, int height); + +} // nvtt namespace + +#endif // NVTT_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt.cpp @@ -1,55 +1,59 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "nvtt.h" - -using namespace nvtt; - -/// Return a string for the given error. -const char * nvtt::errorString(Error e) -{ - switch(e) - { - case Error_Unknown: - return "Unknown error"; - case Error_InvalidInput: - return "Invalid input"; - case Error_UnsupportedFeature: - return "Unsupported feature"; - case Error_CudaError: - return "CUDA error"; - case Error_FileOpen: - return "Error opening file"; - case Error_FileWrite: - return "Error writing through output handler"; - } - - return "Invalid error"; -} - -/// Return NVTT version. -unsigned int nvtt::version() -{ - return NVTT_VERSION; -} - +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "nvtt.h" +#include "nvcore/nvcore.h" + +using namespace nvtt; + +// Return a string for the given error. +const char * nvtt::errorString(Error e) +{ + NV_COMPILER_CHECK(Error_Count == 7); + switch(e) + { + case Error_Unknown: + return "Unknown error"; + case Error_InvalidInput: + return "Invalid input"; + case Error_UnsupportedFeature: + return "Unsupported feature"; + case Error_CudaError: + return "CUDA error"; + case Error_FileOpen: + return "Error opening file"; + case Error_FileWrite: + return "Error writing through output handler"; + case Error_UnsupportedOutputFormat: + return "The container file does not support the selected output format"; + } + + return "Invalid error"; +} + +// Return NVTT version. +unsigned int nvtt::version() +{ + return NVTT_VERSION; +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt_wrapper.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt_wrapper.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt_wrapper.h @@ -1,241 +1,235 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NVTT_WRAPPER_H -#define NVTT_WRAPPER_H - -// Function linkage -#if NVTT_SHARED - -#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__ || defined __MINGW32__ -# ifdef NVTT_EXPORTS -# define NVTT_API __declspec(dllexport) -# else -# define NVTT_API __declspec(dllimport) -# endif -#endif - -#if defined __GNUC__ >= 4 -# ifdef NVTT_EXPORTS -# define NVTT_API __attribute__((visibility("default"))) -# endif -#endif - -#endif // NVTT_SHARED - -#if !defined NVTT_API -# define NVTT_API -#endif - -#define NVTT_VERSION 200 - -#ifdef __cplusplus -typedef struct nvtt::InputOptions NvttInputOptions; -typedef struct nvtt::CompressionOptions NvttCompressionOptions; -typedef struct nvtt::OutputOptions NvttOutputOptions; -typedef struct nvtt::Compressor NvttCompressor; -#else -typedef struct NvttInputOptions NvttInputOptions; -typedef struct NvttCompressionOptions NvttCompressionOptions; -typedef struct NvttOutputOptions NvttOutputOptions; -typedef struct NvttCompressor NvttCompressor; -#endif - -/// Supported compression formats. -typedef enum -{ - // No compression. - NVTT_Format_RGB, - NVTT_Format_RGBA = NVTT_Format_RGB, - - // DX9 formats. - NVTT_Format_DXT1, - NVTT_Format_DXT1a, - NVTT_Format_DXT3, - NVTT_Format_DXT5, - NVTT_Format_DXT5n, - - // DX10 formats. - NVTT_Format_BC1 = NVTT_Format_DXT1, - NVTT_Format_BC1a = NVTT_Format_DXT1a, - NVTT_Format_BC2 = NVTT_Format_DXT3, - NVTT_Format_BC3 = NVTT_Format_DXT5, - NVTT_Format_BC3n = NVTT_Format_DXT5n, - NVTT_Format_BC4, - NVTT_Format_BC5, -} NvttFormat; - -/// Quality modes. -typedef enum -{ - NVTT_Quality_Fastest, - NVTT_Quality_Normal, - NVTT_Quality_Production, - NVTT_Quality_Highest, -} NvttQuality; - -/// Wrap modes. -typedef enum -{ - NVTT_WrapMode_Clamp, - NVTT_WrapMode_Repeat, - NVTT_WrapMode_Mirror, -} NvttWrapMode; - -/// Texture types. -typedef enum -{ - NVTT_TextureType_2D, - NVTT_TextureType_Cube, -} NvttTextureType; - -/// Input formats. -typedef enum -{ - NVTT_InputFormat_BGRA_8UB, -} NvttInputFormat; - -/// Mipmap downsampling filters. -typedef enum -{ - NVTT_MipmapFilter_Box, - NVTT_MipmapFilter_Triangle, - NVTT_MipmapFilter_Kaiser, -} NvttMipmapFilter; - -/// Color transformation. -typedef enum -{ - NVTT_ColorTransform_None, - NVTT_ColorTransform_Linear, -} NvttColorTransform; - -/// Extents rounding mode. -typedef enum -{ - NVTT_RoundMode_None, - NVTT_RoundMode_ToNextPowerOfTwo, - NVTT_RoundMode_ToNearestPowerOfTwo, - NVTT_RoundMode_ToPreviousPowerOfTwo, -} NvttRoundMode; - -/// Alpha mode. -typedef enum -{ - NVTT_AlphaMode_None, - NVTT_AlphaMode_Transparency, - NVTT_AlphaMode_Premultiplied, -} NvttAlphaMode; - -typedef enum -{ - NVTT_Error_InvalidInput, - NVTT_Error_UserInterruption, - NVTT_Error_UnsupportedFeature, - NVTT_Error_CudaError, - NVTT_Error_Unknown, - NVTT_Error_FileOpen, - NVTT_Error_FileWrite, -} NvttError; - -typedef enum -{ - NVTT_False, - NVTT_True, -} NvttBoolean; - - -#ifdef __cplusplus -extern "C" { -#endif - -// Callbacks -//typedef void (* nvttErrorHandler)(NvttError e); -//typedef void (* nvttOutputHandler)(const void * data, int size); -//typedef void (* nvttImageHandler)(int size, int width, int height, int depth, int face, int miplevel); - - -// InputOptions class. -NVTT_API NvttInputOptions * nvttCreateInputOptions(); -NVTT_API void nvttDestroyInputOptions(NvttInputOptions * inputOptions); - -NVTT_API void nvttSetInputOptionsTextureLayout(NvttInputOptions * inputOptions, NvttTextureType type, int w, int h, int d); -NVTT_API void nvttResetInputOptionsTextureLayout(NvttInputOptions * inputOptions); -NVTT_API NvttBoolean nvttSetInputOptionsMipmapData(NvttInputOptions * inputOptions, const void * data, int w, int h, int d, int face, int mipmap); -NVTT_API void nvttSetInputOptionsFormat(NvttInputOptions * inputOptions, NvttInputFormat format); -NVTT_API void nvttSetInputOptionsAlphaMode(NvttInputOptions * inputOptions, NvttAlphaMode alphaMode); -NVTT_API void nvttSetInputOptionsGamma(NvttInputOptions * inputOptions, float inputGamma, float outputGamma); -NVTT_API void nvttSetInputOptionsWrapMode(NvttInputOptions * inputOptions, NvttWrapMode mode); -NVTT_API void nvttSetInputOptionsMipmapFilter(NvttInputOptions * inputOptions, NvttMipmapFilter filter); -NVTT_API void nvttSetInputOptionsMipmapGeneration(NvttInputOptions * inputOptions, NvttBoolean enabled, int maxLevel); -NVTT_API void nvttSetInputOptionsKaiserParameters(NvttInputOptions * inputOptions, float width, float alpha, float stretch); -NVTT_API void nvttSetInputOptionsNormalMap(NvttInputOptions * inputOptions, NvttBoolean b); -NVTT_API void nvttSetInputOptionsConvertToNormalMap(NvttInputOptions * inputOptions, NvttBoolean convert); -NVTT_API void nvttSetInputOptionsHeightEvaluation(NvttInputOptions * inputOptions, float redScale, float greenScale, float blueScale, float alphaScale); -NVTT_API void nvttSetInputOptionsNormalFilter(NvttInputOptions * inputOptions, float sm, float medium, float big, float large); -NVTT_API void nvttSetInputOptionsNormalizeMipmaps(NvttInputOptions * inputOptions, NvttBoolean b); -NVTT_API void nvttSetInputOptionsColorTransform(NvttInputOptions * inputOptions, NvttColorTransform t); -NVTT_API void nvttSetInputOptionsLinearTransform(NvttInputOptions * inputOptions, int channel, float w0, float w1, float w2, float w3); -NVTT_API void nvttSetInputOptionsMaxExtents(NvttInputOptions * inputOptions, int dim); -NVTT_API void nvttSetInputOptionsRoundMode(NvttInputOptions * inputOptions, NvttRoundMode mode); - - -// CompressionOptions class. -NVTT_API NvttCompressionOptions * nvttCreateCompressionOptions(); -NVTT_API void nvttDestroyCompressionOptions(NvttCompressionOptions * compressionOptions); - -NVTT_API void nvttSetCompressionOptionsFormat(NvttCompressionOptions * compressionOptions, NvttFormat format); -NVTT_API void nvttSetCompressionOptionsQuality(NvttCompressionOptions * compressionOptions, NvttQuality quality); -NVTT_API void nvttSetCompressionOptionsColorWeights(NvttCompressionOptions * compressionOptions, float red, float green, float blue, float alpha); -NVTT_API void nvttSetCompressionOptionsPixelFormat(NvttCompressionOptions * compressionOptions, unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask); -NVTT_API void nvttSetCompressionOptionsQuantization(NvttCompressionOptions * compressionOptions, NvttBoolean colorDithering, NvttBoolean alphaDithering, NvttBoolean binaryAlpha, int alphaThreshold); - - -// OutputOptions class. -NVTT_API NvttOutputOptions * nvttCreateOutputOptions(); -NVTT_API void nvttDestroyOutputOptions(NvttOutputOptions * outputOptions); - -NVTT_API void nvttSetOutputOptionsFileName(NvttOutputOptions * outputOptions, const char * fileName); -NVTT_API void nvttSetOutputOptionsOutputHeader(NvttOutputOptions * outputOptions, NvttBoolean b); -//NVTT_API void nvttSetOutputOptionsErrorHandler(NvttOutputOptions * outputOptions, nvttErrorHandler errorHandler); -//NVTT_API void nvttSetOutputOptionsOutputHandler(NvttOutputOptions * outputOptions, nvttOutputHandler outputHandler, nvttImageHandler imageHandler); - - -// Compressor class. -NVTT_API NvttCompressor * nvttCreateCompressor(); -NVTT_API void nvttDestroyCompressor(NvttCompressor * compressor); - -NVTT_API NvttBoolean nvttCompress(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions, const NvttOutputOptions * outputOptions); -NVTT_API int nvttEstimateSize(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions); - - -// Global functions. -NVTT_API const char * nvttErrorString(NvttError e); -NVTT_API unsigned int nvttVersion(); - - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // NVTT_WRAPPER_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_WRAPPER_H +#define NVTT_WRAPPER_H + +// Function linkage +#if NVTT_SHARED + +#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__ || defined __MINGW32__ +# ifdef NVTT_EXPORTS +# define NVTT_API __declspec(dllexport) +# else +# define NVTT_API __declspec(dllimport) +# endif +#endif + +#if defined __GNUC__ >= 4 +# ifdef NVTT_EXPORTS +# define NVTT_API __attribute__((visibility("default"))) +# endif +#endif + +#endif // NVTT_SHARED + +#if !defined NVTT_API +# define NVTT_API +#endif + +#define NVTT_VERSION 20100 + +#ifdef __cplusplus +typedef struct nvtt::InputOptions NvttInputOptions; +typedef struct nvtt::CompressionOptions NvttCompressionOptions; +typedef struct nvtt::OutputOptions NvttOutputOptions; +typedef struct nvtt::Compressor NvttCompressor; +#else +typedef struct NvttInputOptions NvttInputOptions; +typedef struct NvttCompressionOptions NvttCompressionOptions; +typedef struct NvttOutputOptions NvttOutputOptions; +typedef struct NvttCompressor NvttCompressor; +#endif + +/// Supported compression formats. +typedef enum +{ + // No compression. + NVTT_Format_RGB, + NVTT_Format_RGBA = NVTT_Format_RGB, + + // DX9 formats. + NVTT_Format_DXT1, + NVTT_Format_DXT1a, + NVTT_Format_DXT3, + NVTT_Format_DXT5, + NVTT_Format_DXT5n, + + // DX10 formats. + NVTT_Format_BC1 = NVTT_Format_DXT1, + NVTT_Format_BC1a = NVTT_Format_DXT1a, + NVTT_Format_BC2 = NVTT_Format_DXT3, + NVTT_Format_BC3 = NVTT_Format_DXT5, + NVTT_Format_BC3n = NVTT_Format_DXT5n, + NVTT_Format_BC4, + NVTT_Format_BC5, +} NvttFormat; + +/// Quality modes. +typedef enum +{ + NVTT_Quality_Fastest, + NVTT_Quality_Normal, + NVTT_Quality_Production, + NVTT_Quality_Highest, +} NvttQuality; + +/// Wrap modes. +typedef enum +{ + NVTT_WrapMode_Clamp, + NVTT_WrapMode_Repeat, + NVTT_WrapMode_Mirror, +} NvttWrapMode; + +/// Texture types. +typedef enum +{ + NVTT_TextureType_2D, + NVTT_TextureType_Cube, +} NvttTextureType; + +/// Input formats. +typedef enum +{ + NVTT_InputFormat_BGRA_8UB, +} NvttInputFormat; + +/// Mipmap downsampling filters. +typedef enum +{ + NVTT_MipmapFilter_Box, + NVTT_MipmapFilter_Triangle, + NVTT_MipmapFilter_Kaiser, +} NvttMipmapFilter; + +/// Extents rounding mode. +typedef enum +{ + NVTT_RoundMode_None, + NVTT_RoundMode_ToNextPowerOfTwo, + NVTT_RoundMode_ToNearestPowerOfTwo, + NVTT_RoundMode_ToPreviousPowerOfTwo, +} NvttRoundMode; + +/// Alpha mode. +typedef enum +{ + NVTT_AlphaMode_None, + NVTT_AlphaMode_Transparency, + NVTT_AlphaMode_Premultiplied, +} NvttAlphaMode; + +typedef enum +{ + NVTT_Error_InvalidInput, + NVTT_Error_UserInterruption, + NVTT_Error_UnsupportedFeature, + NVTT_Error_CudaError, + NVTT_Error_Unknown, + NVTT_Error_FileOpen, + NVTT_Error_FileWrite, + NVTT_Error_UnsupportedOutputFormat, +} NvttError; + +typedef enum +{ + NVTT_False, + NVTT_True, +} NvttBoolean; + + +#ifdef __cplusplus +extern "C" { +#endif + +// Callbacks +//typedef void (* nvttErrorHandler)(NvttError e); +typedef void (* nvttBeginImageHandler)(int size, int width, int height, int depth, int face, int miplevel); +typedef bool (* nvttOutputHandler)(const void * data, int size); +typedef void (* nvttEndImageHandler)(); + + +// InputOptions class. +NVTT_API NvttInputOptions * nvttCreateInputOptions(); +NVTT_API void nvttDestroyInputOptions(NvttInputOptions * inputOptions); + +NVTT_API void nvttSetInputOptionsTextureLayout(NvttInputOptions * inputOptions, NvttTextureType type, int w, int h, int d); +NVTT_API void nvttResetInputOptionsTextureLayout(NvttInputOptions * inputOptions); +NVTT_API NvttBoolean nvttSetInputOptionsMipmapData(NvttInputOptions * inputOptions, const void * data, int w, int h, int d, int face, int mipmap); +NVTT_API void nvttSetInputOptionsFormat(NvttInputOptions * inputOptions, NvttInputFormat format); +NVTT_API void nvttSetInputOptionsAlphaMode(NvttInputOptions * inputOptions, NvttAlphaMode alphaMode); +NVTT_API void nvttSetInputOptionsGamma(NvttInputOptions * inputOptions, float inputGamma, float outputGamma); +NVTT_API void nvttSetInputOptionsWrapMode(NvttInputOptions * inputOptions, NvttWrapMode mode); +NVTT_API void nvttSetInputOptionsMipmapFilter(NvttInputOptions * inputOptions, NvttMipmapFilter filter); +NVTT_API void nvttSetInputOptionsMipmapGeneration(NvttInputOptions * inputOptions, NvttBoolean enabled, int maxLevel); +NVTT_API void nvttSetInputOptionsKaiserParameters(NvttInputOptions * inputOptions, float width, float alpha, float stretch); +NVTT_API void nvttSetInputOptionsNormalMap(NvttInputOptions * inputOptions, NvttBoolean b); +NVTT_API void nvttSetInputOptionsConvertToNormalMap(NvttInputOptions * inputOptions, NvttBoolean convert); +NVTT_API void nvttSetInputOptionsHeightEvaluation(NvttInputOptions * inputOptions, float redScale, float greenScale, float blueScale, float alphaScale); +NVTT_API void nvttSetInputOptionsNormalFilter(NvttInputOptions * inputOptions, float sm, float medium, float big, float large); +NVTT_API void nvttSetInputOptionsNormalizeMipmaps(NvttInputOptions * inputOptions, NvttBoolean b); +NVTT_API void nvttSetInputOptionsMaxExtents(NvttInputOptions * inputOptions, int dim); +NVTT_API void nvttSetInputOptionsRoundMode(NvttInputOptions * inputOptions, NvttRoundMode mode); + + +// CompressionOptions class. +NVTT_API NvttCompressionOptions * nvttCreateCompressionOptions(); +NVTT_API void nvttDestroyCompressionOptions(NvttCompressionOptions * compressionOptions); + +NVTT_API void nvttSetCompressionOptionsFormat(NvttCompressionOptions * compressionOptions, NvttFormat format); +NVTT_API void nvttSetCompressionOptionsQuality(NvttCompressionOptions * compressionOptions, NvttQuality quality); +NVTT_API void nvttSetCompressionOptionsColorWeights(NvttCompressionOptions * compressionOptions, float red, float green, float blue, float alpha); +NVTT_API void nvttSetCompressionOptionsPixelFormat(NvttCompressionOptions * compressionOptions, unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask); +NVTT_API void nvttSetCompressionOptionsQuantization(NvttCompressionOptions * compressionOptions, NvttBoolean colorDithering, NvttBoolean alphaDithering, NvttBoolean binaryAlpha, int alphaThreshold); + + +// OutputOptions class. +NVTT_API NvttOutputOptions * nvttCreateOutputOptions(); +NVTT_API void nvttDestroyOutputOptions(NvttOutputOptions * outputOptions); + +NVTT_API void nvttSetOutputOptionsFileName(NvttOutputOptions * outputOptions, const char * fileName); +NVTT_API void nvttSetOutputOptionsOutputHeader(NvttOutputOptions * outputOptions, NvttBoolean b); +//NVTT_API void nvttSetOutputOptionsErrorHandler(NvttOutputOptions * outputOptions, nvttErrorHandler errorHandler); +NVTT_API void nvttSetOutputOptionsOutputHandler(NvttOutputOptions * outputOptions, nvttBeginImageHandler beginImageHandler, nvttOutputHandler outputHandler, nvttEndImageHandler endImageHandler); + + +// Compressor class. +NVTT_API NvttCompressor * nvttCreateCompressor(); +NVTT_API void nvttDestroyCompressor(NvttCompressor * compressor); + +NVTT_API NvttBoolean nvttCompress(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions, const NvttOutputOptions * outputOptions); +NVTT_API int nvttEstimateSize(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions); + + +// Global functions. +NVTT_API const char * nvttErrorString(NvttError e); +NVTT_API unsigned int nvttVersion(); + + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // NVTT_WRAPPER_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt_wrapper.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt_wrapper.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/nvtt_wrapper.cpp @@ -1,208 +1,293 @@ - -#include "nvtt.h" -#include "nvtt_wrapper.h" - - -// InputOptions class. -NvttInputOptions * nvttCreateInputOptions() -{ - return new nvtt::InputOptions(); -} - -void nvttDestroyInputOptions(NvttInputOptions * inputOptions) -{ - delete inputOptions; -} - -void nvttSetInputOptionsTextureLayout(NvttInputOptions * inputOptions, NvttTextureType type, int w, int h, int d) -{ - inputOptions->setTextureLayout((nvtt::TextureType)type, w, h, d); -} - -void nvttResetInputOptionsTextureLayout(NvttInputOptions * inputOptions) -{ - inputOptions->resetTextureLayout(); -} - -NvttBoolean nvttSetInputOptionsMipmapData(NvttInputOptions * inputOptions, const void * data, int w, int h, int d, int face, int mipmap) -{ - return (NvttBoolean)inputOptions->setMipmapData(data, w, h, d, face, mipmap); -} - -void nvttSetInputOptionsFormat(NvttInputOptions * inputOptions, NvttInputFormat format) -{ - inputOptions->setFormat((nvtt::InputFormat)format); -} - -void nvttSetInputOptionsAlphaMode(NvttInputOptions * inputOptions, NvttAlphaMode alphaMode) -{ - inputOptions->setAlphaMode((nvtt::AlphaMode)alphaMode); -} - -void nvttSetInputOptionsGamma(NvttInputOptions * inputOptions, float inputGamma, float outputGamma) -{ - inputOptions->setGamma(inputGamma, outputGamma); -} - -void nvttSetInputOptionsWrapMode(NvttInputOptions * inputOptions, NvttWrapMode mode) -{ - inputOptions->setWrapMode((nvtt::WrapMode)mode); -} - -void nvttSetInputOptionsMipmapFilter(NvttInputOptions * inputOptions, NvttMipmapFilter filter) -{ - inputOptions->setMipmapFilter((nvtt::MipmapFilter)filter); -} - -void nvttSetInputOptionsMipmapGeneration(NvttInputOptions * inputOptions, NvttBoolean enabled, int maxLevel) -{ - inputOptions->setMipmapGeneration(enabled != NVTT_False, maxLevel); -} - -void nvttSetInputOptionsKaiserParameters(NvttInputOptions * inputOptions, float width, float alpha, float stretch) -{ - inputOptions->setKaiserParameters(width, alpha, stretch); -} - -void nvttSetInputOptionsNormalMap(NvttInputOptions * inputOptions, NvttBoolean b) -{ - inputOptions->setNormalMap(b != NVTT_False); -} - -void nvttSetInputOptionsConvertToNormalMap(NvttInputOptions * inputOptions, NvttBoolean convert) -{ - inputOptions->setConvertToNormalMap(convert != NVTT_False); -} - -void nvttSetInputOptionsHeightEvaluation(NvttInputOptions * inputOptions, float redScale, float greenScale, float blueScale, float alphaScale) -{ - inputOptions->setHeightEvaluation(redScale, greenScale, blueScale, alphaScale); -} - -void nvttSetInputOptionsNormalFilter(NvttInputOptions * inputOptions, float small, float medium, float big, float large) -{ - inputOptions->setNormalFilter(small, medium, big, large); -} - -void nvttSetInputOptionsNormalizeMipmaps(NvttInputOptions * inputOptions, NvttBoolean b) -{ - inputOptions->setNormalizeMipmaps(b != NVTT_False); -} - -void nvttSetInputOptionsColorTransform(NvttInputOptions * inputOptions, NvttColorTransform t) -{ - inputOptions->setColorTransform((nvtt::ColorTransform)t); -} - -void nvttSetInputOptionsLinearTransfrom(NvttInputOptions * inputOptions, int channel, float w0, float w1, float w2, float w3) -{ - inputOptions->setLinearTransform(channel, w0, w1, w2, w3); -} - -void nvttSetInputOptionsMaxExtents(NvttInputOptions * inputOptions, int dim) -{ - inputOptions->setMaxExtents(dim); -} - -void nvttSetInputOptionsRoundMode(NvttInputOptions * inputOptions, NvttRoundMode mode) -{ - inputOptions->setRoundMode((nvtt::RoundMode)mode); -} - - -// CompressionOptions class. -NvttCompressionOptions * nvttCreateCompressionOptions() -{ - return new nvtt::CompressionOptions(); -} - -void nvttDestroyCompressionOptions(NvttCompressionOptions * compressionOptions) -{ - delete compressionOptions; -} - -void nvttSetCompressionOptionsFormat(NvttCompressionOptions * compressionOptions, NvttFormat format) -{ - compressionOptions->setFormat((nvtt::Format)format); -} - -void nvttSetCompressionOptionsQuality(NvttCompressionOptions * compressionOptions, NvttQuality quality) -{ - compressionOptions->setQuality((nvtt::Quality)quality); -} - -void nvttSetCompressionOptionsColorWeights(NvttCompressionOptions * compressionOptions, float red, float green, float blue, float alpha) -{ - compressionOptions->setColorWeights(red, green, blue, alpha); -} - -/*void nvttEnableCompressionOptionsCudaCompression(NvttCompressionOptions * compressionOptions, NvttBoolean enable) -{ - compressionOptions->enableCudaCompression(enable != NVTT_False); -}*/ - -void nvttSetCompressionOptionsPixelFormat(NvttCompressionOptions * compressionOptions, unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask) -{ - compressionOptions->setPixelFormat(bitcount, rmask, gmask, bmask, amask); -} - -void nvttSetCompressionOptionsQuantization(NvttCompressionOptions * compressionOptions, NvttBoolean colorDithering, NvttBoolean alphaDithering, NvttBoolean binaryAlpha, int alphaThreshold) -{ - compressionOptions->setQuantization(colorDithering != NVTT_False, alphaDithering != NVTT_False, binaryAlpha != NVTT_False, alphaThreshold); -} - - -// OutputOptions class. -NvttOutputOptions * nvttCreateOutputOptions() -{ - return new nvtt::OutputOptions(); -} - -void nvttDestroyOutputOptions(NvttOutputOptions * outputOptions) -{ - delete outputOptions; -} - -void nvttSetOutputOptionsFileName(NvttOutputOptions * outputOptions, const char * fileName) -{ - outputOptions->setFileName(fileName); -} - -void nvttSetOutputOptionsOutputHeader(NvttOutputOptions * outputOptions, NvttBoolean b) -{ - outputOptions->setOutputHeader(b != NVTT_False); -} -/* -void nvttSetOutputOptionsErrorHandler(NvttOutputOptions * outputOptions, nvttErrorHandler errorHandler) -{ - outputOptions->setErrorHandler(errorHandler); -} - -void nvttSetOutputOptionsOutputHandler(NvttOutputOptions * outputOptions, nvttOutputHandler outputHandler, nvttImageHandler imageHandler) -{ -} -*/ - - -// Compressor class. -NvttBoolean nvttCompress(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions, const NvttOutputOptions * outputOptions) -{ - return (NvttBoolean)compressor->process(*inputOptions, *compressionOptions, *outputOptions); -} - -int nvttEstimateSize(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions) -{ - return compressor->estimateSize(*inputOptions, *compressionOptions); -} - - -// Global functions. -const char * nvttErrorString(NvttError e) -{ - return nvtt::errorString((nvtt::Error)e); -} - -unsigned int nvttVersion() -{ - return nvtt::version(); -} +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "nvtt.h" +#include "nvtt_wrapper.h" + +#include "OutputOptions.h" + +// An OutputHandler that sets and calls function pointers, rather than +// requiring interfaces to derive from OutputHandler itself +struct HandlerProxy : public nvtt::OutputHandler +{ +public: + + HandlerProxy() {} + + nvttBeginImageHandler beginImageHandler; + nvttOutputHandler writeDataHandler; + nvttEndImageHandler endImageHandler; + + virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) + { + if (beginImageHandler != NULL) + { + beginImageHandler(size, width, height, depth, face, miplevel); + } + } + + + virtual bool writeData(const void * data, int size) + { + if (writeDataHandler != NULL) + { + return writeDataHandler(data, size); + } + return false; + } + + virtual void endImage() + { + if (endImageHandler != NULL) + { + endImageHandler(); + } + } +}; + + +// InputOptions class. +NvttInputOptions * nvttCreateInputOptions() +{ + return new nvtt::InputOptions(); +} + +void nvttDestroyInputOptions(NvttInputOptions * inputOptions) +{ + delete inputOptions; +} + +void nvttSetInputOptionsTextureLayout(NvttInputOptions * inputOptions, NvttTextureType type, int w, int h, int d) +{ + inputOptions->setTextureLayout((nvtt::TextureType)type, w, h, d); +} + +void nvttResetInputOptionsTextureLayout(NvttInputOptions * inputOptions) +{ + inputOptions->resetTextureLayout(); +} + +NvttBoolean nvttSetInputOptionsMipmapData(NvttInputOptions * inputOptions, const void * data, int w, int h, int d, int face, int mipmap) +{ + return (NvttBoolean)inputOptions->setMipmapData(data, w, h, d, face, mipmap); +} + +void nvttSetInputOptionsFormat(NvttInputOptions * inputOptions, NvttInputFormat format) +{ + inputOptions->setFormat((nvtt::InputFormat)format); +} + +void nvttSetInputOptionsAlphaMode(NvttInputOptions * inputOptions, NvttAlphaMode alphaMode) +{ + inputOptions->setAlphaMode((nvtt::AlphaMode)alphaMode); +} + +void nvttSetInputOptionsGamma(NvttInputOptions * inputOptions, float inputGamma, float outputGamma) +{ + inputOptions->setGamma(inputGamma, outputGamma); +} + +void nvttSetInputOptionsWrapMode(NvttInputOptions * inputOptions, NvttWrapMode mode) +{ + inputOptions->setWrapMode((nvtt::WrapMode)mode); +} + +void nvttSetInputOptionsMipmapFilter(NvttInputOptions * inputOptions, NvttMipmapFilter filter) +{ + inputOptions->setMipmapFilter((nvtt::MipmapFilter)filter); +} + +void nvttSetInputOptionsMipmapGeneration(NvttInputOptions * inputOptions, NvttBoolean enabled, int maxLevel) +{ + inputOptions->setMipmapGeneration(enabled != NVTT_False, maxLevel); +} + +void nvttSetInputOptionsKaiserParameters(NvttInputOptions * inputOptions, float width, float alpha, float stretch) +{ + inputOptions->setKaiserParameters(width, alpha, stretch); +} + +void nvttSetInputOptionsNormalMap(NvttInputOptions * inputOptions, NvttBoolean b) +{ + inputOptions->setNormalMap(b != NVTT_False); +} + +void nvttSetInputOptionsConvertToNormalMap(NvttInputOptions * inputOptions, NvttBoolean convert) +{ + inputOptions->setConvertToNormalMap(convert != NVTT_False); +} + +void nvttSetInputOptionsHeightEvaluation(NvttInputOptions * inputOptions, float redScale, float greenScale, float blueScale, float alphaScale) +{ + inputOptions->setHeightEvaluation(redScale, greenScale, blueScale, alphaScale); +} + +void nvttSetInputOptionsNormalFilter(NvttInputOptions * inputOptions, float small, float medium, float big, float large) +{ + inputOptions->setNormalFilter(small, medium, big, large); +} + +void nvttSetInputOptionsNormalizeMipmaps(NvttInputOptions * inputOptions, NvttBoolean b) +{ + inputOptions->setNormalizeMipmaps(b != NVTT_False); +} + +void nvttSetInputOptionsMaxExtents(NvttInputOptions * inputOptions, int dim) +{ + inputOptions->setMaxExtents(dim); +} + +void nvttSetInputOptionsRoundMode(NvttInputOptions * inputOptions, NvttRoundMode mode) +{ + inputOptions->setRoundMode((nvtt::RoundMode)mode); +} + + +// CompressionOptions class. +NvttCompressionOptions * nvttCreateCompressionOptions() +{ + return new nvtt::CompressionOptions(); +} + +void nvttDestroyCompressionOptions(NvttCompressionOptions * compressionOptions) +{ + delete compressionOptions; +} + +void nvttSetCompressionOptionsFormat(NvttCompressionOptions * compressionOptions, NvttFormat format) +{ + compressionOptions->setFormat((nvtt::Format)format); +} + +void nvttSetCompressionOptionsQuality(NvttCompressionOptions * compressionOptions, NvttQuality quality) +{ + compressionOptions->setQuality((nvtt::Quality)quality); +} + +void nvttSetCompressionOptionsColorWeights(NvttCompressionOptions * compressionOptions, float red, float green, float blue, float alpha) +{ + compressionOptions->setColorWeights(red, green, blue, alpha); +} + +/*void nvttEnableCompressionOptionsCudaCompression(NvttCompressionOptions * compressionOptions, NvttBoolean enable) +{ +compressionOptions->enableCudaCompression(enable != NVTT_False); +}*/ + +void nvttSetCompressionOptionsPixelFormat(NvttCompressionOptions * compressionOptions, unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask) +{ + compressionOptions->setPixelFormat(bitcount, rmask, gmask, bmask, amask); +} + +void nvttSetCompressionOptionsQuantization(NvttCompressionOptions * compressionOptions, NvttBoolean colorDithering, NvttBoolean alphaDithering, NvttBoolean binaryAlpha, int alphaThreshold) +{ + compressionOptions->setQuantization(colorDithering != NVTT_False, alphaDithering != NVTT_False, binaryAlpha != NVTT_False, alphaThreshold); +} + + +// OutputOptions class. +NvttOutputOptions * nvttCreateOutputOptions() +{ + nvtt::OutputOptions * outputOptions = new nvtt::OutputOptions(); + HandlerProxy * handlerProxy = new HandlerProxy(); + + outputOptions->m.wrapperProxy = handlerProxy; + + return outputOptions; +} + +void nvttDestroyOutputOptions(NvttOutputOptions * outputOptions) +{ + HandlerProxy * handlerProxy = (HandlerProxy *)outputOptions->m.wrapperProxy; + delete handlerProxy; + delete outputOptions; +} + +void nvttSetOutputOptionsFileName(NvttOutputOptions * outputOptions, const char * fileName) +{ + outputOptions->setFileName(fileName); +} + +void nvttSetOutputOptionsOutputHeader(NvttOutputOptions * outputOptions, NvttBoolean b) +{ + outputOptions->setOutputHeader(b != NVTT_False); +} +/* +void nvttSetOutputOptionsErrorHandler(NvttOutputOptions * outputOptions, nvttErrorHandler errorHandler) +{ + outputOptions->setErrorHandler(errorHandler); +} +*/ + +void nvttSetOutputOptionsOutputHandler(NvttOutputOptions * outputOptions, nvttBeginImageHandler beginImageHandler, nvttOutputHandler writeDataHandler, nvttEndImageHandler endImageHandler) +{ + HandlerProxy * handler = (HandlerProxy *)outputOptions->m.wrapperProxy; + + handler->beginImageHandler = beginImageHandler; + handler->writeDataHandler = writeDataHandler; + handler->endImageHandler = endImageHandler; + + if(beginImageHandler == NULL && writeDataHandler == NULL && endImageHandler == NULL) + { + outputOptions->setOutputHandler(NULL); + } + else + { + outputOptions->setOutputHandler(handler); + } +} + + +// Compressor class. +NvttCompressor * nvttCreateCompressor() +{ + return new nvtt::Compressor(); +} + +void nvttDestroyCompressor(NvttCompressor * compressor) +{ + delete compressor; +} + +NvttBoolean nvttCompress(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions, const NvttOutputOptions * outputOptions) +{ + return (NvttBoolean)compressor->process(*inputOptions, *compressionOptions, *outputOptions); +} + +int nvttEstimateSize(const NvttCompressor * compressor, const NvttInputOptions * inputOptions, const NvttCompressionOptions * compressionOptions) +{ + return compressor->estimateSize(*inputOptions, *compressionOptions); +} + + +// Global functions. +const char * nvttErrorString(NvttError e) +{ + return nvtt::errorString((nvtt::Error)e); +} + +unsigned int nvttVersion() +{ + return nvtt::version(); +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/CMakeLists.txt +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/CMakeLists.txt @@ -22,11 +22,14 @@ ADD_LIBRARY(squish STATIC ${SQUISH_SRCS}) -IF("${CMAKE_CXX_COMPILER}" MATCHES "clang(\\+\\+)?$" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - SET(CMAKE_COMPILER_IS_CLANGXX 1) -ENDIF() - -IF(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) - SET_TARGET_PROPERTIES(squish PROPERTIES COMPILE_FLAGS -fPIC) -ENDIF(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) +IF(NOT WIN32) + + IF("${CMAKE_CXX_COMPILER}" MATCHES "clang(\\+\\+)?$" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + SET(CMAKE_COMPILER_IS_CLANGXX 1) + ENDIF() + + IF(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) + SET_TARGET_PROPERTIES(squish PROPERTIES COMPILE_FLAGS -fPIC) + ENDIF(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) +ENDIF(NOT WIN32) Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/alpha.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/alpha.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/alpha.h @@ -26,7 +26,7 @@ #ifndef SQUISH_ALPHA_H #define SQUISH_ALPHA_H -#include +#include "squish.h" namespace squish { Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/clusterfit.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/clusterfit.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/clusterfit.h @@ -23,15 +23,15 @@ -------------------------------------------------------------------------- */ -#ifndef SQUISH_CLUSTERFIT_H -#define SQUISH_CLUSTERFIT_H +#ifndef NV_SQUISH_CLUSTERFIT_H +#define NV_SQUISH_CLUSTERFIT_H #include "squish.h" #include "maths.h" #include "simd.h" #include "colourfit.h" -namespace squish { +namespace nvsquish { class ClusterFit : public ColourFit { Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/clusterfit.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/clusterfit.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/clusterfit.cpp @@ -28,7 +28,7 @@ #include "colourblock.h" #include -namespace squish { +namespace nvsquish { ClusterFit::ClusterFit() { @@ -109,7 +109,7 @@ float ClusterFit::GetBestError() const { #if SQUISH_USE_SIMD - return m_besterror.GetVec3().X(); + return m_besterror.GetX(); #else return m_besterror; #endif @@ -280,15 +280,6 @@ m_beta[k] = m_weights[k]; } - /*unsigned int permutation = 0; - for(int p = 0; p < 16; p++) { - permutation |= indices[p] << (p * 2); - } - if (debug) printf("%X:\t", permutation); - - if (debug && permutation == 0x55FFFFAA) __debugbreak(); - */ - // solve a least squares problem to place the endpoints #if SQUISH_USE_SIMD Vec4 start, end; @@ -392,8 +383,7 @@ // clamp to the grid Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); -// Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); - Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); // IC: use approximate grid fitting. + Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); Vec4 const onethird = VEC4_CONST( 1.0f/3.0f ); Vec4 const twothirds = VEC4_CONST( 2.0f/3.0f ); a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp; @@ -468,8 +458,7 @@ // clamp to the grid Vec3 const grid( 31.0f, 63.0f, 31.0f ); - //Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); - Vec3 const gridrcp(0.03227752766457f, 0.01583151765563f, 0.03227752766457f); // IC: use approximate grid fitting. + Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourblock.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourblock.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourblock.h @@ -23,13 +23,13 @@ -------------------------------------------------------------------------- */ -#ifndef SQUISH_COLOURBLOCK_H -#define SQUISH_COLOURBLOCK_H +#ifndef NV_SQUISH_COLOURBLOCK_H +#define NV_SQUISH_COLOURBLOCK_H #include "squish.h" #include "maths.h" -namespace squish { +namespace nvsquish { void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block ); void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block ); Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourblock.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourblock.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourblock.cpp @@ -25,7 +25,7 @@ #include "colourblock.h" -namespace squish { +namespace nvsquish { static int FloatToInt( float a, int limit ) { Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourfit.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourfit.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourfit.h @@ -23,13 +23,13 @@ -------------------------------------------------------------------------- */ -#ifndef SQUISH_COLOURFIT_H -#define SQUISH_COLOURFIT_H +#ifndef NV_SQUISH_COLOURFIT_H +#define NV_SQUISH_COLOURFIT_H #include "squish.h" #include "maths.h" -namespace squish { +namespace nvsquish { class ColourSet; Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourfit.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourfit.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourfit.cpp @@ -22,11 +22,11 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------- */ - + #include "colourfit.h" #include "colourset.h" -namespace squish { +namespace nvsquish { ColourFit::ColourFit() { Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourset.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourset.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourset.h @@ -23,21 +23,21 @@ -------------------------------------------------------------------------- */ -#ifndef SQUISH_COLOURSET_H -#define SQUISH_COLOURSET_H +#ifndef NV_SQUISH_COLOURSET_H +#define NV_SQUISH_COLOURSET_H #include "squish.h" #include "maths.h" #include "simd.h" -namespace squish { +namespace nvsquish { /*! @brief Represents a set of block colours */ class ColourSet { public: - ColourSet( u8 const* rgba, int flags, bool createMinimalSet = false ); + ColourSet( u8 const* rgba, int flags, bool createMinimalSet = true ); int GetCount() const { return m_count; } Vec3 const* GetPoints() const { return m_points; } Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourset.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourset.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/colourset.cpp @@ -25,7 +25,7 @@ #include "colourset.h" -namespace squish { +namespace nvsquish { // @@ Add flags: // - MatchTransparent Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/extra/squishgen2.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/extra/squishgen2.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/extra/squishgen2.cpp @@ -0,0 +1,113 @@ +/* ----------------------------------------------------------------------------- + + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + Copyright (c) 2008 Ignacio Castano castano@gmail.com + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ + +#include +#include +#include + +struct Precomp { + float alpha2_sum; + float beta2_sum; + float alphabeta_sum; + float factor; +}; + + +int main() +{ + int i = 0; + + printf("struct Precomp {\n"); + printf("\tfloat alpha2_sum;\n"); + printf("\tfloat beta2_sum;\n"); + printf("\tfloat alphabeta_sum;\n"); + printf("\tfloat factor;\n"); + printf("};\n\n"); + + printf("static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = {\n"); + + // Three element clusters: + for( int c0 = 0; c0 <= 16; c0++) // At least two clusters. + { + for( int c1 = 0; c1 <= 16-c0; c1++) + { + int c2 = 16 - c0 - c1; + + Precomp p; + p.alpha2_sum = c0 + c1 * 0.25f; + p.beta2_sum = c2 + c1 * 0.25f; + p.alphabeta_sum = c1 * 0.25f; + p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum); + + if (isfinite(p.factor)) + { + printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2); + } + else + { + printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2); + } + + i++; + } + } + printf("}; // %d three cluster elements\n\n", i); + + printf("static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = {\n"); + + // Four element clusters: + i = 0; + for( int c0 = 0; c0 <= 16; c0++) + { + for( int c1 = 0; c1 <= 16-c0; c1++) + { + for( int c2 = 0; c2 <= 16-c0-c1; c2++) + { + int c3 = 16 - c0 - c1 - c2; + + Precomp p; + p.alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f); + p.beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f); + p.alphabeta_sum = (c1 + c2) * (2.0f/9.0f); + p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum); + + if (isfinite(p.factor)) + { + printf("\t{ %ff, %ff, %ff, %ff }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2, c3); + } + else + { + printf("\t{ %ff, %ff, %ff, FLT_MAX }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2, c3); + } + + i++; + } + } + } + printf("}; // %d four cluster elements\n\n", i); + + return 0; +} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/fastclusterfit.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/fastclusterfit.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/fastclusterfit.h @@ -24,15 +24,15 @@ -------------------------------------------------------------------------- */ -#ifndef SQUISH_FASTCLUSTERFIT_H -#define SQUISH_FASTCLUSTERFIT_H +#ifndef NV_SQUISH_FASTCLUSTERFIT_H +#define NV_SQUISH_FASTCLUSTERFIT_H #include "squish.h" #include "maths.h" #include "simd.h" #include "colourfit.h" -namespace squish { +namespace nvsquish { class FastClusterFit : public ColourFit { @@ -53,14 +53,14 @@ Vec3 m_principle; #if SQUISH_USE_SIMD - Vec4 m_unweighted[16]; + Vec4 m_unweighted[17]; Vec4 m_metric; Vec4 m_metricSqr; Vec4 m_xxsum; Vec4 m_xsum; Vec4 m_besterror; #else - Vec3 m_unweighted[16]; + Vec3 m_unweighted[17]; Vec3 m_metric; Vec3 m_metricSqr; Vec3 m_xxsum; Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/fastclusterfit.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/fastclusterfit.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/fastclusterfit.cpp @@ -31,7 +31,7 @@ #include "fastclusterlookup.inl" -namespace squish { +namespace nvsquish { FastClusterFit::FastClusterFit() { @@ -129,6 +129,8 @@ Vec4 const zero = VEC4_CONST(0.0f); Vec4 const half = VEC4_CONST(0.5f); Vec4 const two = VEC4_CONST(2.0); + Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); + Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); // declare variables Vec4 beststart = VEC4_CONST( 0.0f ); @@ -160,25 +162,22 @@ Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; - // clamp the output to [0, 1] + // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); - - // clamp to the grid - Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - // compute the error - Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); - Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); - Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); - + // compute the error (we skip the constant xxsum) + Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); + Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); + Vec4 e4 = MultiplyAdd( two, e3, e1 ); + // apply the metric to the error term - Vec4 e4 = e3 * m_metricSqr; - Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); - + Vec4 e5 = e4 * m_metricSqr; + Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); + // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) { @@ -274,7 +273,7 @@ Vec4 const factor = constants.SplatW(); i++; - Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); + Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); Vec4 const betax_sum = m_xsum - alphax_sum; Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; @@ -286,18 +285,19 @@ // clamp to the grid Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f ); + Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - // compute the error - Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum ); - Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); - Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 ); - + // compute the error (we skip the constant xxsum) + Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); + Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); + Vec4 e4 = MultiplyAdd( two, e3, e1 ); + // apply the metric to the error term - Vec4 e4 = e3 * m_metricSqr; - Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ(); + Vec4 e5 = e4 * m_metricSqr; + Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) @@ -370,6 +370,12 @@ void FastClusterFit::Compress3( void* block ) { + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); + Vec3 const half( 0.5f ); + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); @@ -399,16 +405,9 @@ Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; - // clamp the output to [0, 1] - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); + // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); - - // clamp to the grid - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); - Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; @@ -477,6 +476,12 @@ void FastClusterFit::Compress4( void* block ) { + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); + Vec3 const half( 0.5f ); + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + // declare variables Vec3 beststart( 0.0f ); Vec3 bestend( 0.0f ); @@ -511,16 +516,9 @@ Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; - // clamp the output to [0, 1] - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); + // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); - - // clamp to the grid - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f ); - Vec3 const half( 0.5f ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/fastclusterlookup.inl =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/fastclusterlookup.inl +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/fastclusterlookup.inl @@ -1,1135 +1,1135 @@ -struct Precomp { - float alpha2_sum; - float beta2_sum; - float alphabeta_sum; - float factor; -}; - -static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = { - { 0.000000f, 16.000000f, 0.000000f, FLT_MAX }, // 0 (0 0 16) - { 0.250000f, 15.250000f, 0.250000f, 0.266667f }, // 1 (0 1 15) - { 0.500000f, 14.500000f, 0.500000f, 0.142857f }, // 2 (0 2 14) - { 0.750000f, 13.750000f, 0.750000f, 0.102564f }, // 3 (0 3 13) - { 1.000000f, 13.000000f, 1.000000f, 0.083333f }, // 4 (0 4 12) - { 1.250000f, 12.250000f, 1.250000f, 0.072727f }, // 5 (0 5 11) - { 1.500000f, 11.500000f, 1.500000f, 0.066667f }, // 6 (0 6 10) - { 1.750000f, 10.750000f, 1.750000f, 0.063492f }, // 7 (0 7 9) - { 2.000000f, 10.000000f, 2.000000f, 0.062500f }, // 8 (0 8 8) - { 2.250000f, 9.250000f, 2.250000f, 0.063492f }, // 9 (0 9 7) - { 2.500000f, 8.500000f, 2.500000f, 0.066667f }, // 10 (0 10 6) - { 2.750000f, 7.750000f, 2.750000f, 0.072727f }, // 11 (0 11 5) - { 3.000000f, 7.000000f, 3.000000f, 0.083333f }, // 12 (0 12 4) - { 3.250000f, 6.250000f, 3.250000f, 0.102564f }, // 13 (0 13 3) - { 3.500000f, 5.500000f, 3.500000f, 0.142857f }, // 14 (0 14 2) - { 3.750000f, 4.750000f, 3.750000f, 0.266667f }, // 15 (0 15 1) - { 4.000000f, 4.000000f, 4.000000f, FLT_MAX }, // 16 (0 16 0) - { 1.000000f, 15.000000f, 0.000000f, 0.066667f }, // 17 (1 0 15) - { 1.250000f, 14.250000f, 0.250000f, 0.056338f }, // 18 (1 1 14) - { 1.500000f, 13.500000f, 0.500000f, 0.050000f }, // 19 (1 2 13) - { 1.750000f, 12.750000f, 0.750000f, 0.045977f }, // 20 (1 3 12) - { 2.000000f, 12.000000f, 1.000000f, 0.043478f }, // 21 (1 4 11) - { 2.250000f, 11.250000f, 1.250000f, 0.042105f }, // 22 (1 5 10) - { 2.500000f, 10.500000f, 1.500000f, 0.041667f }, // 23 (1 6 9) - { 2.750000f, 9.750000f, 1.750000f, 0.042105f }, // 24 (1 7 8) - { 3.000000f, 9.000000f, 2.000000f, 0.043478f }, // 25 (1 8 7) - { 3.250000f, 8.250000f, 2.250000f, 0.045977f }, // 26 (1 9 6) - { 3.500000f, 7.500000f, 2.500000f, 0.050000f }, // 27 (1 10 5) - { 3.750000f, 6.750000f, 2.750000f, 0.056338f }, // 28 (1 11 4) - { 4.000000f, 6.000000f, 3.000000f, 0.066667f }, // 29 (1 12 3) - { 4.250000f, 5.250000f, 3.250000f, 0.085106f }, // 30 (1 13 2) - { 4.500000f, 4.500000f, 3.500000f, 0.125000f }, // 31 (1 14 1) - { 4.750000f, 3.750000f, 3.750000f, 0.266667f }, // 32 (1 15 0) - { 2.000000f, 14.000000f, 0.000000f, 0.035714f }, // 33 (2 0 14) - { 2.250000f, 13.250000f, 0.250000f, 0.033613f }, // 34 (2 1 13) - { 2.500000f, 12.500000f, 0.500000f, 0.032258f }, // 35 (2 2 12) - { 2.750000f, 11.750000f, 0.750000f, 0.031496f }, // 36 (2 3 11) - { 3.000000f, 11.000000f, 1.000000f, 0.031250f }, // 37 (2 4 10) - { 3.250000f, 10.250000f, 1.250000f, 0.031496f }, // 38 (2 5 9) - { 3.500000f, 9.500000f, 1.500000f, 0.032258f }, // 39 (2 6 8) - { 3.750000f, 8.750000f, 1.750000f, 0.033613f }, // 40 (2 7 7) - { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 41 (2 8 6) - { 4.250000f, 7.250000f, 2.250000f, 0.038835f }, // 42 (2 9 5) - { 4.500000f, 6.500000f, 2.500000f, 0.043478f }, // 43 (2 10 4) - { 4.750000f, 5.750000f, 2.750000f, 0.050633f }, // 44 (2 11 3) - { 5.000000f, 5.000000f, 3.000000f, 0.062500f }, // 45 (2 12 2) - { 5.250000f, 4.250000f, 3.250000f, 0.085106f }, // 46 (2 13 1) - { 5.500000f, 3.500000f, 3.500000f, 0.142857f }, // 47 (2 14 0) - { 3.000000f, 13.000000f, 0.000000f, 0.025641f }, // 48 (3 0 13) - { 3.250000f, 12.250000f, 0.250000f, 0.025157f }, // 49 (3 1 12) - { 3.500000f, 11.500000f, 0.500000f, 0.025000f }, // 50 (3 2 11) - { 3.750000f, 10.750000f, 0.750000f, 0.025157f }, // 51 (3 3 10) - { 4.000000f, 10.000000f, 1.000000f, 0.025641f }, // 52 (3 4 9) - { 4.250000f, 9.250000f, 1.250000f, 0.026490f }, // 53 (3 5 8) - { 4.500000f, 8.500000f, 1.500000f, 0.027778f }, // 54 (3 6 7) - { 4.750000f, 7.750000f, 1.750000f, 0.029630f }, // 55 (3 7 6) - { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 56 (3 8 5) - { 5.250000f, 6.250000f, 2.250000f, 0.036036f }, // 57 (3 9 4) - { 5.500000f, 5.500000f, 2.500000f, 0.041667f }, // 58 (3 10 3) - { 5.750000f, 4.750000f, 2.750000f, 0.050633f }, // 59 (3 11 2) - { 6.000000f, 4.000000f, 3.000000f, 0.066667f }, // 60 (3 12 1) - { 6.250000f, 3.250000f, 3.250000f, 0.102564f }, // 61 (3 13 0) - { 4.000000f, 12.000000f, 0.000000f, 0.020833f }, // 62 (4 0 12) - { 4.250000f, 11.250000f, 0.250000f, 0.020942f }, // 63 (4 1 11) - { 4.500000f, 10.500000f, 0.500000f, 0.021277f }, // 64 (4 2 10) - { 4.750000f, 9.750000f, 0.750000f, 0.021858f }, // 65 (4 3 9) - { 5.000000f, 9.000000f, 1.000000f, 0.022727f }, // 66 (4 4 8) - { 5.250000f, 8.250000f, 1.250000f, 0.023952f }, // 67 (4 5 7) - { 5.500000f, 7.500000f, 1.500000f, 0.025641f }, // 68 (4 6 6) - { 5.750000f, 6.750000f, 1.750000f, 0.027972f }, // 69 (4 7 5) - { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 70 (4 8 4) - { 6.250000f, 5.250000f, 2.250000f, 0.036036f }, // 71 (4 9 3) - { 6.500000f, 4.500000f, 2.500000f, 0.043478f }, // 72 (4 10 2) - { 6.750000f, 3.750000f, 2.750000f, 0.056338f }, // 73 (4 11 1) - { 7.000000f, 3.000000f, 3.000000f, 0.083333f }, // 74 (4 12 0) - { 5.000000f, 11.000000f, 0.000000f, 0.018182f }, // 75 (5 0 11) - { 5.250000f, 10.250000f, 0.250000f, 0.018605f }, // 76 (5 1 10) - { 5.500000f, 9.500000f, 0.500000f, 0.019231f }, // 77 (5 2 9) - { 5.750000f, 8.750000f, 0.750000f, 0.020101f }, // 78 (5 3 8) - { 6.000000f, 8.000000f, 1.000000f, 0.021277f }, // 79 (5 4 7) - { 6.250000f, 7.250000f, 1.250000f, 0.022857f }, // 80 (5 5 6) - { 6.500000f, 6.500000f, 1.500000f, 0.025000f }, // 81 (5 6 5) - { 6.750000f, 5.750000f, 1.750000f, 0.027972f }, // 82 (5 7 4) - { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 83 (5 8 3) - { 7.250000f, 4.250000f, 2.250000f, 0.038835f }, // 84 (5 9 2) - { 7.500000f, 3.500000f, 2.500000f, 0.050000f }, // 85 (5 10 1) - { 7.750000f, 2.750000f, 2.750000f, 0.072727f }, // 86 (5 11 0) - { 6.000000f, 10.000000f, 0.000000f, 0.016667f }, // 87 (6 0 10) - { 6.250000f, 9.250000f, 0.250000f, 0.017316f }, // 88 (6 1 9) - { 6.500000f, 8.500000f, 0.500000f, 0.018182f }, // 89 (6 2 8) - { 6.750000f, 7.750000f, 0.750000f, 0.019324f }, // 90 (6 3 7) - { 7.000000f, 7.000000f, 1.000000f, 0.020833f }, // 91 (6 4 6) - { 7.250000f, 6.250000f, 1.250000f, 0.022857f }, // 92 (6 5 5) - { 7.500000f, 5.500000f, 1.500000f, 0.025641f }, // 93 (6 6 4) - { 7.750000f, 4.750000f, 1.750000f, 0.029630f }, // 94 (6 7 3) - { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 95 (6 8 2) - { 8.250000f, 3.250000f, 2.250000f, 0.045977f }, // 96 (6 9 1) - { 8.500000f, 2.500000f, 2.500000f, 0.066667f }, // 97 (6 10 0) - { 7.000000f, 9.000000f, 0.000000f, 0.015873f }, // 98 (7 0 9) - { 7.250000f, 8.250000f, 0.250000f, 0.016736f }, // 99 (7 1 8) - { 7.500000f, 7.500000f, 0.500000f, 0.017857f }, // 100 (7 2 7) - { 7.750000f, 6.750000f, 0.750000f, 0.019324f }, // 101 (7 3 6) - { 8.000000f, 6.000000f, 1.000000f, 0.021277f }, // 102 (7 4 5) - { 8.250000f, 5.250000f, 1.250000f, 0.023952f }, // 103 (7 5 4) - { 8.500000f, 4.500000f, 1.500000f, 0.027778f }, // 104 (7 6 3) - { 8.750000f, 3.750000f, 1.750000f, 0.033613f }, // 105 (7 7 2) - { 9.000000f, 3.000000f, 2.000000f, 0.043478f }, // 106 (7 8 1) - { 9.250000f, 2.250000f, 2.250000f, 0.063492f }, // 107 (7 9 0) - { 8.000000f, 8.000000f, 0.000000f, 0.015625f }, // 108 (8 0 8) - { 8.250000f, 7.250000f, 0.250000f, 0.016736f }, // 109 (8 1 7) - { 8.500000f, 6.500000f, 0.500000f, 0.018182f }, // 110 (8 2 6) - { 8.750000f, 5.750000f, 0.750000f, 0.020101f }, // 111 (8 3 5) - { 9.000000f, 5.000000f, 1.000000f, 0.022727f }, // 112 (8 4 4) - { 9.250000f, 4.250000f, 1.250000f, 0.026490f }, // 113 (8 5 3) - { 9.500000f, 3.500000f, 1.500000f, 0.032258f }, // 114 (8 6 2) - { 9.750000f, 2.750000f, 1.750000f, 0.042105f }, // 115 (8 7 1) - { 10.000000f, 2.000000f, 2.000000f, 0.062500f }, // 116 (8 8 0) - { 9.000000f, 7.000000f, 0.000000f, 0.015873f }, // 117 (9 0 7) - { 9.250000f, 6.250000f, 0.250000f, 0.017316f }, // 118 (9 1 6) - { 9.500000f, 5.500000f, 0.500000f, 0.019231f }, // 119 (9 2 5) - { 9.750000f, 4.750000f, 0.750000f, 0.021858f }, // 120 (9 3 4) - { 10.000000f, 4.000000f, 1.000000f, 0.025641f }, // 121 (9 4 3) - { 10.250000f, 3.250000f, 1.250000f, 0.031496f }, // 122 (9 5 2) - { 10.500000f, 2.500000f, 1.500000f, 0.041667f }, // 123 (9 6 1) - { 10.750000f, 1.750000f, 1.750000f, 0.063492f }, // 124 (9 7 0) - { 10.000000f, 6.000000f, 0.000000f, 0.016667f }, // 125 (10 0 6) - { 10.250000f, 5.250000f, 0.250000f, 0.018605f }, // 126 (10 1 5) - { 10.500000f, 4.500000f, 0.500000f, 0.021277f }, // 127 (10 2 4) - { 10.750000f, 3.750000f, 0.750000f, 0.025157f }, // 128 (10 3 3) - { 11.000000f, 3.000000f, 1.000000f, 0.031250f }, // 129 (10 4 2) - { 11.250000f, 2.250000f, 1.250000f, 0.042105f }, // 130 (10 5 1) - { 11.500000f, 1.500000f, 1.500000f, 0.066667f }, // 131 (10 6 0) - { 11.000000f, 5.000000f, 0.000000f, 0.018182f }, // 132 (11 0 5) - { 11.250000f, 4.250000f, 0.250000f, 0.020942f }, // 133 (11 1 4) - { 11.500000f, 3.500000f, 0.500000f, 0.025000f }, // 134 (11 2 3) - { 11.750000f, 2.750000f, 0.750000f, 0.031496f }, // 135 (11 3 2) - { 12.000000f, 2.000000f, 1.000000f, 0.043478f }, // 136 (11 4 1) - { 12.250000f, 1.250000f, 1.250000f, 0.072727f }, // 137 (11 5 0) - { 12.000000f, 4.000000f, 0.000000f, 0.020833f }, // 138 (12 0 4) - { 12.250000f, 3.250000f, 0.250000f, 0.025157f }, // 139 (12 1 3) - { 12.500000f, 2.500000f, 0.500000f, 0.032258f }, // 140 (12 2 2) - { 12.750000f, 1.750000f, 0.750000f, 0.045977f }, // 141 (12 3 1) - { 13.000000f, 1.000000f, 1.000000f, 0.083333f }, // 142 (12 4 0) - { 13.000000f, 3.000000f, 0.000000f, 0.025641f }, // 143 (13 0 3) - { 13.250000f, 2.250000f, 0.250000f, 0.033613f }, // 144 (13 1 2) - { 13.500000f, 1.500000f, 0.500000f, 0.050000f }, // 145 (13 2 1) - { 13.750000f, 0.750000f, 0.750000f, 0.102564f }, // 146 (13 3 0) - { 14.000000f, 2.000000f, 0.000000f, 0.035714f }, // 147 (14 0 2) - { 14.250000f, 1.250000f, 0.250000f, 0.056338f }, // 148 (14 1 1) - { 14.500000f, 0.500000f, 0.500000f, 0.142857f }, // 149 (14 2 0) - { 15.000000f, 1.000000f, 0.000000f, 0.066667f }, // 150 (15 0 1) - { 15.250000f, 0.250000f, 0.250000f, 0.266667f }, // 151 (15 1 0) - { 16.000000f, 0.000000f, 0.000000f, FLT_MAX }, // 152 (16 0 0) -}; // 153 three cluster elements - -static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = { - { 0.000000f, 16.000000f, 0.000000f, FLT_MAX }, // 0 (0 0 0 16) - { 0.111111f, 15.444445f, 0.222222f, 0.600000f }, // 1 (0 0 1 15) - { 0.222222f, 14.888889f, 0.444444f, 0.321429f }, // 2 (0 0 2 14) - { 0.333333f, 14.333333f, 0.666667f, 0.230769f }, // 3 (0 0 3 13) - { 0.444444f, 13.777778f, 0.888889f, 0.187500f }, // 4 (0 0 4 12) - { 0.555556f, 13.222222f, 1.111111f, 0.163636f }, // 5 (0 0 5 11) - { 0.666667f, 12.666667f, 1.333333f, 0.150000f }, // 6 (0 0 6 10) - { 0.777778f, 12.111111f, 1.555556f, 0.142857f }, // 7 (0 0 7 9) - { 0.888889f, 11.555555f, 1.777778f, 0.140625f }, // 8 (0 0 8 8) - { 1.000000f, 11.000000f, 2.000000f, 0.142857f }, // 9 (0 0 9 7) - { 1.111111f, 10.444445f, 2.222222f, 0.150000f }, // 10 (0 0 10 6) - { 1.222222f, 9.888889f, 2.444444f, 0.163636f }, // 11 (0 0 11 5) - { 1.333333f, 9.333333f, 2.666667f, 0.187500f }, // 12 (0 0 12 4) - { 1.444444f, 8.777778f, 2.888889f, 0.230769f }, // 13 (0 0 13 3) - { 1.555556f, 8.222222f, 3.111111f, 0.321429f }, // 14 (0 0 14 2) - { 1.666667f, 7.666667f, 3.333333f, 0.600000f }, // 15 (0 0 15 1) - { 1.777778f, 7.111111f, 3.555556f, FLT_MAX }, // 16 (0 0 16 0) - { 0.444444f, 15.111111f, 0.222222f, 0.150000f }, // 17 (0 1 0 15) - { 0.555556f, 14.555555f, 0.444444f, 0.126761f }, // 18 (0 1 1 14) - { 0.666667f, 14.000000f, 0.666667f, 0.112500f }, // 19 (0 1 2 13) - { 0.777778f, 13.444445f, 0.888889f, 0.103448f }, // 20 (0 1 3 12) - { 0.888889f, 12.888889f, 1.111111f, 0.097826f }, // 21 (0 1 4 11) - { 1.000000f, 12.333333f, 1.333333f, 0.094737f }, // 22 (0 1 5 10) - { 1.111111f, 11.777778f, 1.555556f, 0.093750f }, // 23 (0 1 6 9) - { 1.222222f, 11.222222f, 1.777778f, 0.094737f }, // 24 (0 1 7 8) - { 1.333333f, 10.666667f, 2.000000f, 0.097826f }, // 25 (0 1 8 7) - { 1.444444f, 10.111111f, 2.222222f, 0.103448f }, // 26 (0 1 9 6) - { 1.555556f, 9.555555f, 2.444444f, 0.112500f }, // 27 (0 1 10 5) - { 1.666667f, 9.000000f, 2.666667f, 0.126761f }, // 28 (0 1 11 4) - { 1.777778f, 8.444445f, 2.888889f, 0.150000f }, // 29 (0 1 12 3) - { 1.888889f, 7.888889f, 3.111111f, 0.191489f }, // 30 (0 1 13 2) - { 2.000000f, 7.333333f, 3.333333f, 0.281250f }, // 31 (0 1 14 1) - { 2.111111f, 6.777778f, 3.555556f, 0.600000f }, // 32 (0 1 15 0) - { 0.888889f, 14.222222f, 0.444444f, 0.080357f }, // 33 (0 2 0 14) - { 1.000000f, 13.666667f, 0.666667f, 0.075630f }, // 34 (0 2 1 13) - { 1.111111f, 13.111111f, 0.888889f, 0.072581f }, // 35 (0 2 2 12) - { 1.222222f, 12.555555f, 1.111111f, 0.070866f }, // 36 (0 2 3 11) - { 1.333333f, 12.000000f, 1.333333f, 0.070313f }, // 37 (0 2 4 10) - { 1.444444f, 11.444445f, 1.555556f, 0.070866f }, // 38 (0 2 5 9) - { 1.555556f, 10.888889f, 1.777778f, 0.072581f }, // 39 (0 2 6 8) - { 1.666667f, 10.333333f, 2.000000f, 0.075630f }, // 40 (0 2 7 7) - { 1.777778f, 9.777778f, 2.222222f, 0.080357f }, // 41 (0 2 8 6) - { 1.888889f, 9.222222f, 2.444444f, 0.087379f }, // 42 (0 2 9 5) - { 2.000000f, 8.666667f, 2.666667f, 0.097826f }, // 43 (0 2 10 4) - { 2.111111f, 8.111111f, 2.888889f, 0.113924f }, // 44 (0 2 11 3) - { 2.222222f, 7.555556f, 3.111111f, 0.140625f }, // 45 (0 2 12 2) - { 2.333333f, 7.000000f, 3.333333f, 0.191489f }, // 46 (0 2 13 1) - { 2.444444f, 6.444445f, 3.555556f, 0.321429f }, // 47 (0 2 14 0) - { 1.333333f, 13.333333f, 0.666667f, 0.057692f }, // 48 (0 3 0 13) - { 1.444444f, 12.777778f, 0.888889f, 0.056604f }, // 49 (0 3 1 12) - { 1.555556f, 12.222222f, 1.111111f, 0.056250f }, // 50 (0 3 2 11) - { 1.666667f, 11.666667f, 1.333333f, 0.056604f }, // 51 (0 3 3 10) - { 1.777778f, 11.111111f, 1.555556f, 0.057692f }, // 52 (0 3 4 9) - { 1.888889f, 10.555555f, 1.777778f, 0.059603f }, // 53 (0 3 5 8) - { 2.000000f, 10.000000f, 2.000000f, 0.062500f }, // 54 (0 3 6 7) - { 2.111111f, 9.444445f, 2.222222f, 0.066667f }, // 55 (0 3 7 6) - { 2.222222f, 8.888889f, 2.444444f, 0.072581f }, // 56 (0 3 8 5) - { 2.333333f, 8.333333f, 2.666667f, 0.081081f }, // 57 (0 3 9 4) - { 2.444444f, 7.777778f, 2.888889f, 0.093750f }, // 58 (0 3 10 3) - { 2.555556f, 7.222222f, 3.111111f, 0.113924f }, // 59 (0 3 11 2) - { 2.666667f, 6.666667f, 3.333333f, 0.150000f }, // 60 (0 3 12 1) - { 2.777778f, 6.111111f, 3.555556f, 0.230769f }, // 61 (0 3 13 0) - { 1.777778f, 12.444445f, 0.888889f, 0.046875f }, // 62 (0 4 0 12) - { 1.888889f, 11.888889f, 1.111111f, 0.047120f }, // 63 (0 4 1 11) - { 2.000000f, 11.333333f, 1.333333f, 0.047872f }, // 64 (0 4 2 10) - { 2.111111f, 10.777778f, 1.555556f, 0.049180f }, // 65 (0 4 3 9) - { 2.222222f, 10.222222f, 1.777778f, 0.051136f }, // 66 (0 4 4 8) - { 2.333333f, 9.666667f, 2.000000f, 0.053892f }, // 67 (0 4 5 7) - { 2.444444f, 9.111111f, 2.222222f, 0.057692f }, // 68 (0 4 6 6) - { 2.555556f, 8.555555f, 2.444444f, 0.062937f }, // 69 (0 4 7 5) - { 2.666667f, 8.000000f, 2.666667f, 0.070313f }, // 70 (0 4 8 4) - { 2.777778f, 7.444445f, 2.888889f, 0.081081f }, // 71 (0 4 9 3) - { 2.888889f, 6.888889f, 3.111111f, 0.097826f }, // 72 (0 4 10 2) - { 3.000000f, 6.333333f, 3.333333f, 0.126761f }, // 73 (0 4 11 1) - { 3.111111f, 5.777778f, 3.555556f, 0.187500f }, // 74 (0 4 12 0) - { 2.222222f, 11.555555f, 1.111111f, 0.040909f }, // 75 (0 5 0 11) - { 2.333333f, 11.000000f, 1.333333f, 0.041860f }, // 76 (0 5 1 10) - { 2.444444f, 10.444445f, 1.555556f, 0.043269f }, // 77 (0 5 2 9) - { 2.555556f, 9.888889f, 1.777778f, 0.045226f }, // 78 (0 5 3 8) - { 2.666667f, 9.333333f, 2.000000f, 0.047872f }, // 79 (0 5 4 7) - { 2.777778f, 8.777778f, 2.222222f, 0.051429f }, // 80 (0 5 5 6) - { 2.888889f, 8.222222f, 2.444444f, 0.056250f }, // 81 (0 5 6 5) - { 3.000000f, 7.666667f, 2.666667f, 0.062937f }, // 82 (0 5 7 4) - { 3.111111f, 7.111111f, 2.888889f, 0.072581f }, // 83 (0 5 8 3) - { 3.222222f, 6.555556f, 3.111111f, 0.087379f }, // 84 (0 5 9 2) - { 3.333333f, 6.000000f, 3.333333f, 0.112500f }, // 85 (0 5 10 1) - { 3.444444f, 5.444445f, 3.555556f, 0.163636f }, // 86 (0 5 11 0) - { 2.666667f, 10.666667f, 1.333333f, 0.037500f }, // 87 (0 6 0 10) - { 2.777778f, 10.111111f, 1.555556f, 0.038961f }, // 88 (0 6 1 9) - { 2.888889f, 9.555555f, 1.777778f, 0.040909f }, // 89 (0 6 2 8) - { 3.000000f, 9.000000f, 2.000000f, 0.043478f }, // 90 (0 6 3 7) - { 3.111111f, 8.444445f, 2.222222f, 0.046875f }, // 91 (0 6 4 6) - { 3.222222f, 7.888889f, 2.444444f, 0.051429f }, // 92 (0 6 5 5) - { 3.333333f, 7.333333f, 2.666667f, 0.057692f }, // 93 (0 6 6 4) - { 3.444444f, 6.777778f, 2.888889f, 0.066667f }, // 94 (0 6 7 3) - { 3.555556f, 6.222222f, 3.111111f, 0.080357f }, // 95 (0 6 8 2) - { 3.666667f, 5.666667f, 3.333333f, 0.103448f }, // 96 (0 6 9 1) - { 3.777778f, 5.111111f, 3.555556f, 0.150000f }, // 97 (0 6 10 0) - { 3.111111f, 9.777778f, 1.555556f, 0.035714f }, // 98 (0 7 0 9) - { 3.222222f, 9.222222f, 1.777778f, 0.037657f }, // 99 (0 7 1 8) - { 3.333333f, 8.666667f, 2.000000f, 0.040179f }, // 100 (0 7 2 7) - { 3.444444f, 8.111111f, 2.222222f, 0.043478f }, // 101 (0 7 3 6) - { 3.555556f, 7.555555f, 2.444444f, 0.047872f }, // 102 (0 7 4 5) - { 3.666667f, 7.000000f, 2.666667f, 0.053892f }, // 103 (0 7 5 4) - { 3.777778f, 6.444445f, 2.888889f, 0.062500f }, // 104 (0 7 6 3) - { 3.888889f, 5.888889f, 3.111111f, 0.075630f }, // 105 (0 7 7 2) - { 4.000000f, 5.333333f, 3.333333f, 0.097826f }, // 106 (0 7 8 1) - { 4.111111f, 4.777778f, 3.555556f, 0.142857f }, // 107 (0 7 9 0) - { 3.555556f, 8.888889f, 1.777778f, 0.035156f }, // 108 (0 8 0 8) - { 3.666667f, 8.333333f, 2.000000f, 0.037657f }, // 109 (0 8 1 7) - { 3.777778f, 7.777778f, 2.222222f, 0.040909f }, // 110 (0 8 2 6) - { 3.888889f, 7.222222f, 2.444444f, 0.045226f }, // 111 (0 8 3 5) - { 4.000000f, 6.666667f, 2.666667f, 0.051136f }, // 112 (0 8 4 4) - { 4.111111f, 6.111111f, 2.888889f, 0.059603f }, // 113 (0 8 5 3) - { 4.222222f, 5.555555f, 3.111111f, 0.072581f }, // 114 (0 8 6 2) - { 4.333333f, 5.000000f, 3.333333f, 0.094737f }, // 115 (0 8 7 1) - { 4.444445f, 4.444445f, 3.555556f, 0.140625f }, // 116 (0 8 8 0) - { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 117 (0 9 0 7) - { 4.111111f, 7.444445f, 2.222222f, 0.038961f }, // 118 (0 9 1 6) - { 4.222222f, 6.888889f, 2.444444f, 0.043269f }, // 119 (0 9 2 5) - { 4.333333f, 6.333333f, 2.666667f, 0.049180f }, // 120 (0 9 3 4) - { 4.444445f, 5.777778f, 2.888889f, 0.057692f }, // 121 (0 9 4 3) - { 4.555556f, 5.222222f, 3.111111f, 0.070866f }, // 122 (0 9 5 2) - { 4.666667f, 4.666667f, 3.333333f, 0.093750f }, // 123 (0 9 6 1) - { 4.777778f, 4.111111f, 3.555556f, 0.142857f }, // 124 (0 9 7 0) - { 4.444445f, 7.111111f, 2.222222f, 0.037500f }, // 125 (0 10 0 6) - { 4.555556f, 6.555555f, 2.444444f, 0.041860f }, // 126 (0 10 1 5) - { 4.666667f, 6.000000f, 2.666667f, 0.047872f }, // 127 (0 10 2 4) - { 4.777778f, 5.444445f, 2.888889f, 0.056604f }, // 128 (0 10 3 3) - { 4.888889f, 4.888889f, 3.111111f, 0.070313f }, // 129 (0 10 4 2) - { 5.000000f, 4.333333f, 3.333333f, 0.094737f }, // 130 (0 10 5 1) - { 5.111111f, 3.777778f, 3.555556f, 0.150000f }, // 131 (0 10 6 0) - { 4.888889f, 6.222222f, 2.444444f, 0.040909f }, // 132 (0 11 0 5) - { 5.000000f, 5.666667f, 2.666667f, 0.047120f }, // 133 (0 11 1 4) - { 5.111111f, 5.111111f, 2.888889f, 0.056250f }, // 134 (0 11 2 3) - { 5.222222f, 4.555555f, 3.111111f, 0.070866f }, // 135 (0 11 3 2) - { 5.333333f, 4.000000f, 3.333333f, 0.097826f }, // 136 (0 11 4 1) - { 5.444445f, 3.444444f, 3.555556f, 0.163636f }, // 137 (0 11 5 0) - { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 138 (0 12 0 4) - { 5.444445f, 4.777778f, 2.888889f, 0.056604f }, // 139 (0 12 1 3) - { 5.555556f, 4.222222f, 3.111111f, 0.072581f }, // 140 (0 12 2 2) - { 5.666667f, 3.666667f, 3.333333f, 0.103448f }, // 141 (0 12 3 1) - { 5.777778f, 3.111111f, 3.555556f, 0.187500f }, // 142 (0 12 4 0) - { 5.777778f, 4.444445f, 2.888889f, 0.057692f }, // 143 (0 13 0 3) - { 5.888889f, 3.888889f, 3.111111f, 0.075630f }, // 144 (0 13 1 2) - { 6.000000f, 3.333333f, 3.333333f, 0.112500f }, // 145 (0 13 2 1) - { 6.111111f, 2.777778f, 3.555556f, 0.230769f }, // 146 (0 13 3 0) - { 6.222222f, 3.555556f, 3.111111f, 0.080357f }, // 147 (0 14 0 2) - { 6.333333f, 3.000000f, 3.333333f, 0.126761f }, // 148 (0 14 1 1) - { 6.444445f, 2.444444f, 3.555556f, 0.321429f }, // 149 (0 14 2 0) - { 6.666667f, 2.666667f, 3.333333f, 0.150000f }, // 150 (0 15 0 1) - { 6.777778f, 2.111111f, 3.555556f, 0.600000f }, // 151 (0 15 1 0) - { 7.111111f, 1.777778f, 3.555556f, FLT_MAX }, // 152 (0 16 0 0) - { 1.000000f, 15.000000f, 0.000000f, 0.066667f }, // 153 (1 0 0 15) - { 1.111111f, 14.444445f, 0.222222f, 0.062500f }, // 154 (1 0 1 14) - { 1.222222f, 13.888889f, 0.444444f, 0.059603f }, // 155 (1 0 2 13) - { 1.333333f, 13.333333f, 0.666667f, 0.057692f }, // 156 (1 0 3 12) - { 1.444444f, 12.777778f, 0.888889f, 0.056604f }, // 157 (1 0 4 11) - { 1.555556f, 12.222222f, 1.111111f, 0.056250f }, // 158 (1 0 5 10) - { 1.666667f, 11.666667f, 1.333333f, 0.056604f }, // 159 (1 0 6 9) - { 1.777778f, 11.111111f, 1.555556f, 0.057692f }, // 160 (1 0 7 8) - { 1.888889f, 10.555555f, 1.777778f, 0.059603f }, // 161 (1 0 8 7) - { 2.000000f, 10.000000f, 2.000000f, 0.062500f }, // 162 (1 0 9 6) - { 2.111111f, 9.444445f, 2.222222f, 0.066667f }, // 163 (1 0 10 5) - { 2.222222f, 8.888889f, 2.444444f, 0.072581f }, // 164 (1 0 11 4) - { 2.333333f, 8.333333f, 2.666667f, 0.081081f }, // 165 (1 0 12 3) - { 2.444444f, 7.777778f, 2.888889f, 0.093750f }, // 166 (1 0 13 2) - { 2.555556f, 7.222222f, 3.111111f, 0.113924f }, // 167 (1 0 14 1) - { 2.666667f, 6.666667f, 3.333333f, 0.150000f }, // 168 (1 0 15 0) - { 1.444444f, 14.111111f, 0.222222f, 0.049180f }, // 169 (1 1 0 14) - { 1.555556f, 13.555555f, 0.444444f, 0.047872f }, // 170 (1 1 1 13) - { 1.666667f, 13.000000f, 0.666667f, 0.047120f }, // 171 (1 1 2 12) - { 1.777778f, 12.444445f, 0.888889f, 0.046875f }, // 172 (1 1 3 11) - { 1.888889f, 11.888889f, 1.111111f, 0.047120f }, // 173 (1 1 4 10) - { 2.000000f, 11.333333f, 1.333333f, 0.047872f }, // 174 (1 1 5 9) - { 2.111111f, 10.777778f, 1.555556f, 0.049180f }, // 175 (1 1 6 8) - { 2.222222f, 10.222222f, 1.777778f, 0.051136f }, // 176 (1 1 7 7) - { 2.333333f, 9.666667f, 2.000000f, 0.053892f }, // 177 (1 1 8 6) - { 2.444444f, 9.111111f, 2.222222f, 0.057692f }, // 178 (1 1 9 5) - { 2.555556f, 8.555555f, 2.444444f, 0.062937f }, // 179 (1 1 10 4) - { 2.666667f, 8.000000f, 2.666667f, 0.070313f }, // 180 (1 1 11 3) - { 2.777778f, 7.444445f, 2.888889f, 0.081081f }, // 181 (1 1 12 2) - { 2.888889f, 6.888889f, 3.111111f, 0.097826f }, // 182 (1 1 13 1) - { 3.000000f, 6.333333f, 3.333333f, 0.126761f }, // 183 (1 1 14 0) - { 1.888889f, 13.222222f, 0.444444f, 0.040359f }, // 184 (1 2 0 13) - { 2.000000f, 12.666667f, 0.666667f, 0.040179f }, // 185 (1 2 1 12) - { 2.111111f, 12.111111f, 0.888889f, 0.040359f }, // 186 (1 2 2 11) - { 2.222222f, 11.555555f, 1.111111f, 0.040909f }, // 187 (1 2 3 10) - { 2.333333f, 11.000000f, 1.333333f, 0.041860f }, // 188 (1 2 4 9) - { 2.444444f, 10.444445f, 1.555556f, 0.043269f }, // 189 (1 2 5 8) - { 2.555556f, 9.888889f, 1.777778f, 0.045226f }, // 190 (1 2 6 7) - { 2.666667f, 9.333333f, 2.000000f, 0.047872f }, // 191 (1 2 7 6) - { 2.777778f, 8.777778f, 2.222222f, 0.051429f }, // 192 (1 2 8 5) - { 2.888889f, 8.222222f, 2.444444f, 0.056250f }, // 193 (1 2 9 4) - { 3.000000f, 7.666667f, 2.666667f, 0.062937f }, // 194 (1 2 10 3) - { 3.111111f, 7.111111f, 2.888889f, 0.072581f }, // 195 (1 2 11 2) - { 3.222222f, 6.555556f, 3.111111f, 0.087379f }, // 196 (1 2 12 1) - { 3.333333f, 6.000000f, 3.333333f, 0.112500f }, // 197 (1 2 13 0) - { 2.333333f, 12.333333f, 0.666667f, 0.035294f }, // 198 (1 3 0 12) - { 2.444444f, 11.777778f, 0.888889f, 0.035714f }, // 199 (1 3 1 11) - { 2.555556f, 11.222222f, 1.111111f, 0.036437f }, // 200 (1 3 2 10) - { 2.666667f, 10.666667f, 1.333333f, 0.037500f }, // 201 (1 3 3 9) - { 2.777778f, 10.111111f, 1.555556f, 0.038961f }, // 202 (1 3 4 8) - { 2.888889f, 9.555555f, 1.777778f, 0.040909f }, // 203 (1 3 5 7) - { 3.000000f, 9.000000f, 2.000000f, 0.043478f }, // 204 (1 3 6 6) - { 3.111111f, 8.444445f, 2.222222f, 0.046875f }, // 205 (1 3 7 5) - { 3.222222f, 7.888889f, 2.444444f, 0.051429f }, // 206 (1 3 8 4) - { 3.333333f, 7.333333f, 2.666667f, 0.057692f }, // 207 (1 3 9 3) - { 3.444444f, 6.777778f, 2.888889f, 0.066667f }, // 208 (1 3 10 2) - { 3.555556f, 6.222222f, 3.111111f, 0.080357f }, // 209 (1 3 11 1) - { 3.666667f, 5.666667f, 3.333333f, 0.103448f }, // 210 (1 3 12 0) - { 2.777778f, 11.444445f, 0.888889f, 0.032258f }, // 211 (1 4 0 11) - { 2.888889f, 10.888889f, 1.111111f, 0.033088f }, // 212 (1 4 1 10) - { 3.000000f, 10.333333f, 1.333333f, 0.034221f }, // 213 (1 4 2 9) - { 3.111111f, 9.777778f, 1.555556f, 0.035714f }, // 214 (1 4 3 8) - { 3.222222f, 9.222222f, 1.777778f, 0.037657f }, // 215 (1 4 4 7) - { 3.333333f, 8.666667f, 2.000000f, 0.040179f }, // 216 (1 4 5 6) - { 3.444444f, 8.111111f, 2.222222f, 0.043478f }, // 217 (1 4 6 5) - { 3.555556f, 7.555555f, 2.444444f, 0.047872f }, // 218 (1 4 7 4) - { 3.666667f, 7.000000f, 2.666667f, 0.053892f }, // 219 (1 4 8 3) - { 3.777778f, 6.444445f, 2.888889f, 0.062500f }, // 220 (1 4 9 2) - { 3.888889f, 5.888889f, 3.111111f, 0.075630f }, // 221 (1 4 10 1) - { 4.000000f, 5.333333f, 3.333333f, 0.097826f }, // 222 (1 4 11 0) - { 3.222222f, 10.555555f, 1.111111f, 0.030508f }, // 223 (1 5 0 10) - { 3.333333f, 10.000000f, 1.333333f, 0.031690f }, // 224 (1 5 1 9) - { 3.444444f, 9.444445f, 1.555556f, 0.033210f }, // 225 (1 5 2 8) - { 3.555556f, 8.888889f, 1.777778f, 0.035156f }, // 226 (1 5 3 7) - { 3.666667f, 8.333333f, 2.000000f, 0.037657f }, // 227 (1 5 4 6) - { 3.777778f, 7.777778f, 2.222222f, 0.040909f }, // 228 (1 5 5 5) - { 3.888889f, 7.222222f, 2.444444f, 0.045226f }, // 229 (1 5 6 4) - { 4.000000f, 6.666667f, 2.666667f, 0.051136f }, // 230 (1 5 7 3) - { 4.111111f, 6.111111f, 2.888889f, 0.059603f }, // 231 (1 5 8 2) - { 4.222222f, 5.555556f, 3.111111f, 0.072581f }, // 232 (1 5 9 1) - { 4.333333f, 5.000000f, 3.333333f, 0.094737f }, // 233 (1 5 10 0) - { 3.666667f, 9.666667f, 1.333333f, 0.029703f }, // 234 (1 6 0 9) - { 3.777778f, 9.111111f, 1.555556f, 0.031250f }, // 235 (1 6 1 8) - { 3.888889f, 8.555555f, 1.777778f, 0.033210f }, // 236 (1 6 2 7) - { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 237 (1 6 3 6) - { 4.111111f, 7.444445f, 2.222222f, 0.038961f }, // 238 (1 6 4 5) - { 4.222222f, 6.888889f, 2.444444f, 0.043269f }, // 239 (1 6 5 4) - { 4.333333f, 6.333333f, 2.666667f, 0.049180f }, // 240 (1 6 6 3) - { 4.444445f, 5.777778f, 2.888889f, 0.057692f }, // 241 (1 6 7 2) - { 4.555555f, 5.222222f, 3.111111f, 0.070866f }, // 242 (1 6 8 1) - { 4.666667f, 4.666667f, 3.333333f, 0.093750f }, // 243 (1 6 9 0) - { 4.111111f, 8.777778f, 1.555556f, 0.029703f }, // 244 (1 7 0 8) - { 4.222222f, 8.222222f, 1.777778f, 0.031690f }, // 245 (1 7 1 7) - { 4.333333f, 7.666667f, 2.000000f, 0.034221f }, // 246 (1 7 2 6) - { 4.444445f, 7.111111f, 2.222222f, 0.037500f }, // 247 (1 7 3 5) - { 4.555555f, 6.555555f, 2.444444f, 0.041860f }, // 248 (1 7 4 4) - { 4.666667f, 6.000000f, 2.666667f, 0.047872f }, // 249 (1 7 5 3) - { 4.777778f, 5.444445f, 2.888889f, 0.056604f }, // 250 (1 7 6 2) - { 4.888889f, 4.888889f, 3.111111f, 0.070313f }, // 251 (1 7 7 1) - { 5.000000f, 4.333333f, 3.333333f, 0.094737f }, // 252 (1 7 8 0) - { 4.555555f, 7.888889f, 1.777778f, 0.030508f }, // 253 (1 8 0 7) - { 4.666667f, 7.333333f, 2.000000f, 0.033088f }, // 254 (1 8 1 6) - { 4.777778f, 6.777778f, 2.222222f, 0.036437f }, // 255 (1 8 2 5) - { 4.888889f, 6.222222f, 2.444444f, 0.040909f }, // 256 (1 8 3 4) - { 5.000000f, 5.666667f, 2.666667f, 0.047120f }, // 257 (1 8 4 3) - { 5.111111f, 5.111111f, 2.888889f, 0.056250f }, // 258 (1 8 5 2) - { 5.222222f, 4.555555f, 3.111111f, 0.070866f }, // 259 (1 8 6 1) - { 5.333333f, 4.000000f, 3.333333f, 0.097826f }, // 260 (1 8 7 0) - { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 261 (1 9 0 6) - { 5.111111f, 6.444445f, 2.222222f, 0.035714f }, // 262 (1 9 1 5) - { 5.222222f, 5.888889f, 2.444444f, 0.040359f }, // 263 (1 9 2 4) - { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 264 (1 9 3 3) - { 5.444445f, 4.777778f, 2.888889f, 0.056604f }, // 265 (1 9 4 2) - { 5.555556f, 4.222222f, 3.111111f, 0.072581f }, // 266 (1 9 5 1) - { 5.666667f, 3.666667f, 3.333333f, 0.103448f }, // 267 (1 9 6 0) - { 5.444445f, 6.111111f, 2.222222f, 0.035294f }, // 268 (1 10 0 5) - { 5.555556f, 5.555555f, 2.444444f, 0.040179f }, // 269 (1 10 1 4) - { 5.666667f, 5.000000f, 2.666667f, 0.047120f }, // 270 (1 10 2 3) - { 5.777778f, 4.444445f, 2.888889f, 0.057692f }, // 271 (1 10 3 2) - { 5.888889f, 3.888889f, 3.111111f, 0.075630f }, // 272 (1 10 4 1) - { 6.000000f, 3.333333f, 3.333333f, 0.112500f }, // 273 (1 10 5 0) - { 5.888889f, 5.222222f, 2.444444f, 0.040359f }, // 274 (1 11 0 4) - { 6.000000f, 4.666667f, 2.666667f, 0.047872f }, // 275 (1 11 1 3) - { 6.111111f, 4.111111f, 2.888889f, 0.059603f }, // 276 (1 11 2 2) - { 6.222222f, 3.555556f, 3.111111f, 0.080357f }, // 277 (1 11 3 1) - { 6.333333f, 3.000000f, 3.333333f, 0.126761f }, // 278 (1 11 4 0) - { 6.333333f, 4.333333f, 2.666667f, 0.049180f }, // 279 (1 12 0 3) - { 6.444445f, 3.777778f, 2.888889f, 0.062500f }, // 280 (1 12 1 2) - { 6.555556f, 3.222222f, 3.111111f, 0.087379f }, // 281 (1 12 2 1) - { 6.666667f, 2.666667f, 3.333333f, 0.150000f }, // 282 (1 12 3 0) - { 6.777778f, 3.444444f, 2.888889f, 0.066667f }, // 283 (1 13 0 2) - { 6.888889f, 2.888889f, 3.111111f, 0.097826f }, // 284 (1 13 1 1) - { 7.000000f, 2.333333f, 3.333333f, 0.191489f }, // 285 (1 13 2 0) - { 7.222222f, 2.555556f, 3.111111f, 0.113924f }, // 286 (1 14 0 1) - { 7.333333f, 2.000000f, 3.333333f, 0.281250f }, // 287 (1 14 1 0) - { 7.666667f, 1.666667f, 3.333333f, 0.600000f }, // 288 (1 15 0 0) - { 2.000000f, 14.000000f, 0.000000f, 0.035714f }, // 289 (2 0 0 14) - { 2.111111f, 13.444445f, 0.222222f, 0.035294f }, // 290 (2 0 1 13) - { 2.222222f, 12.888889f, 0.444444f, 0.035156f }, // 291 (2 0 2 12) - { 2.333333f, 12.333333f, 0.666667f, 0.035294f }, // 292 (2 0 3 11) - { 2.444444f, 11.777778f, 0.888889f, 0.035714f }, // 293 (2 0 4 10) - { 2.555556f, 11.222222f, 1.111111f, 0.036437f }, // 294 (2 0 5 9) - { 2.666667f, 10.666667f, 1.333333f, 0.037500f }, // 295 (2 0 6 8) - { 2.777778f, 10.111111f, 1.555556f, 0.038961f }, // 296 (2 0 7 7) - { 2.888889f, 9.555555f, 1.777778f, 0.040909f }, // 297 (2 0 8 6) - { 3.000000f, 9.000000f, 2.000000f, 0.043478f }, // 298 (2 0 9 5) - { 3.111111f, 8.444445f, 2.222222f, 0.046875f }, // 299 (2 0 10 4) - { 3.222222f, 7.888889f, 2.444444f, 0.051429f }, // 300 (2 0 11 3) - { 3.333333f, 7.333333f, 2.666667f, 0.057692f }, // 301 (2 0 12 2) - { 3.444444f, 6.777778f, 2.888889f, 0.066667f }, // 302 (2 0 13 1) - { 3.555556f, 6.222222f, 3.111111f, 0.080357f }, // 303 (2 0 14 0) - { 2.444444f, 13.111111f, 0.222222f, 0.031250f }, // 304 (2 1 0 13) - { 2.555556f, 12.555555f, 0.444444f, 0.031359f }, // 305 (2 1 1 12) - { 2.666667f, 12.000000f, 0.666667f, 0.031690f }, // 306 (2 1 2 11) - { 2.777778f, 11.444445f, 0.888889f, 0.032258f }, // 307 (2 1 3 10) - { 2.888889f, 10.888889f, 1.111111f, 0.033088f }, // 308 (2 1 4 9) - { 3.000000f, 10.333333f, 1.333333f, 0.034221f }, // 309 (2 1 5 8) - { 3.111111f, 9.777778f, 1.555556f, 0.035714f }, // 310 (2 1 6 7) - { 3.222222f, 9.222222f, 1.777778f, 0.037657f }, // 311 (2 1 7 6) - { 3.333333f, 8.666667f, 2.000000f, 0.040179f }, // 312 (2 1 8 5) - { 3.444444f, 8.111111f, 2.222222f, 0.043478f }, // 313 (2 1 9 4) - { 3.555556f, 7.555556f, 2.444444f, 0.047872f }, // 314 (2 1 10 3) - { 3.666667f, 7.000000f, 2.666667f, 0.053892f }, // 315 (2 1 11 2) - { 3.777778f, 6.444445f, 2.888889f, 0.062500f }, // 316 (2 1 12 1) - { 3.888889f, 5.888889f, 3.111111f, 0.075630f }, // 317 (2 1 13 0) - { 2.888889f, 12.222222f, 0.444444f, 0.028481f }, // 318 (2 2 0 12) - { 3.000000f, 11.666667f, 0.666667f, 0.028939f }, // 319 (2 2 1 11) - { 3.111111f, 11.111111f, 0.888889f, 0.029605f }, // 320 (2 2 2 10) - { 3.222222f, 10.555555f, 1.111111f, 0.030508f }, // 321 (2 2 3 9) - { 3.333333f, 10.000000f, 1.333333f, 0.031690f }, // 322 (2 2 4 8) - { 3.444444f, 9.444445f, 1.555556f, 0.033210f }, // 323 (2 2 5 7) - { 3.555556f, 8.888889f, 1.777778f, 0.035156f }, // 324 (2 2 6 6) - { 3.666667f, 8.333333f, 2.000000f, 0.037657f }, // 325 (2 2 7 5) - { 3.777778f, 7.777778f, 2.222222f, 0.040909f }, // 326 (2 2 8 4) - { 3.888889f, 7.222222f, 2.444444f, 0.045226f }, // 327 (2 2 9 3) - { 4.000000f, 6.666667f, 2.666667f, 0.051136f }, // 328 (2 2 10 2) - { 4.111111f, 6.111111f, 2.888889f, 0.059603f }, // 329 (2 2 11 1) - { 4.222222f, 5.555556f, 3.111111f, 0.072581f }, // 330 (2 2 12 0) - { 3.333333f, 11.333333f, 0.666667f, 0.026786f }, // 331 (2 3 0 11) - { 3.444444f, 10.777778f, 0.888889f, 0.027523f }, // 332 (2 3 1 10) - { 3.555556f, 10.222222f, 1.111111f, 0.028481f }, // 333 (2 3 2 9) - { 3.666667f, 9.666667f, 1.333333f, 0.029703f }, // 334 (2 3 3 8) - { 3.777778f, 9.111111f, 1.555556f, 0.031250f }, // 335 (2 3 4 7) - { 3.888889f, 8.555555f, 1.777778f, 0.033210f }, // 336 (2 3 5 6) - { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 337 (2 3 6 5) - { 4.111111f, 7.444445f, 2.222222f, 0.038961f }, // 338 (2 3 7 4) - { 4.222222f, 6.888889f, 2.444444f, 0.043269f }, // 339 (2 3 8 3) - { 4.333333f, 6.333333f, 2.666667f, 0.049180f }, // 340 (2 3 9 2) - { 4.444445f, 5.777778f, 2.888889f, 0.057692f }, // 341 (2 3 10 1) - { 4.555555f, 5.222222f, 3.111111f, 0.070866f }, // 342 (2 3 11 0) - { 3.777778f, 10.444445f, 0.888889f, 0.025862f }, // 343 (2 4 0 10) - { 3.888889f, 9.888889f, 1.111111f, 0.026866f }, // 344 (2 4 1 9) - { 4.000000f, 9.333333f, 1.333333f, 0.028125f }, // 345 (2 4 2 8) - { 4.111111f, 8.777778f, 1.555556f, 0.029703f }, // 346 (2 4 3 7) - { 4.222222f, 8.222222f, 1.777778f, 0.031690f }, // 347 (2 4 4 6) - { 4.333333f, 7.666667f, 2.000000f, 0.034221f }, // 348 (2 4 5 5) - { 4.444445f, 7.111111f, 2.222222f, 0.037500f }, // 349 (2 4 6 4) - { 4.555555f, 6.555555f, 2.444444f, 0.041860f }, // 350 (2 4 7 3) - { 4.666667f, 6.000000f, 2.666667f, 0.047872f }, // 351 (2 4 8 2) - { 4.777778f, 5.444445f, 2.888889f, 0.056604f }, // 352 (2 4 9 1) - { 4.888889f, 4.888889f, 3.111111f, 0.070313f }, // 353 (2 4 10 0) - { 4.222222f, 9.555555f, 1.111111f, 0.025568f }, // 354 (2 5 0 9) - { 4.333333f, 9.000000f, 1.333333f, 0.026866f }, // 355 (2 5 1 8) - { 4.444445f, 8.444445f, 1.555556f, 0.028481f }, // 356 (2 5 2 7) - { 4.555555f, 7.888889f, 1.777778f, 0.030508f }, // 357 (2 5 3 6) - { 4.666667f, 7.333333f, 2.000000f, 0.033088f }, // 358 (2 5 4 5) - { 4.777778f, 6.777778f, 2.222222f, 0.036437f }, // 359 (2 5 5 4) - { 4.888889f, 6.222222f, 2.444444f, 0.040909f }, // 360 (2 5 6 3) - { 5.000000f, 5.666667f, 2.666667f, 0.047120f }, // 361 (2 5 7 2) - { 5.111111f, 5.111111f, 2.888889f, 0.056250f }, // 362 (2 5 8 1) - { 5.222222f, 4.555556f, 3.111111f, 0.070866f }, // 363 (2 5 9 0) - { 4.666667f, 8.666667f, 1.333333f, 0.025862f }, // 364 (2 6 0 8) - { 4.777778f, 8.111111f, 1.555556f, 0.027523f }, // 365 (2 6 1 7) - { 4.888889f, 7.555555f, 1.777778f, 0.029605f }, // 366 (2 6 2 6) - { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 367 (2 6 3 5) - { 5.111111f, 6.444445f, 2.222222f, 0.035714f }, // 368 (2 6 4 4) - { 5.222222f, 5.888889f, 2.444444f, 0.040359f }, // 369 (2 6 5 3) - { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 370 (2 6 6 2) - { 5.444445f, 4.777778f, 2.888889f, 0.056604f }, // 371 (2 6 7 1) - { 5.555555f, 4.222222f, 3.111111f, 0.072581f }, // 372 (2 6 8 0) - { 5.111111f, 7.777778f, 1.555556f, 0.026786f }, // 373 (2 7 0 7) - { 5.222222f, 7.222222f, 1.777778f, 0.028939f }, // 374 (2 7 1 6) - { 5.333333f, 6.666667f, 2.000000f, 0.031690f }, // 375 (2 7 2 5) - { 5.444445f, 6.111111f, 2.222222f, 0.035294f }, // 376 (2 7 3 4) - { 5.555555f, 5.555555f, 2.444444f, 0.040179f }, // 377 (2 7 4 3) - { 5.666667f, 5.000000f, 2.666667f, 0.047120f }, // 378 (2 7 5 2) - { 5.777778f, 4.444445f, 2.888889f, 0.057692f }, // 379 (2 7 6 1) - { 5.888889f, 3.888889f, 3.111111f, 0.075630f }, // 380 (2 7 7 0) - { 5.555555f, 6.888889f, 1.777778f, 0.028481f }, // 381 (2 8 0 6) - { 5.666667f, 6.333333f, 2.000000f, 0.031359f }, // 382 (2 8 1 5) - { 5.777778f, 5.777778f, 2.222222f, 0.035156f }, // 383 (2 8 2 4) - { 5.888889f, 5.222222f, 2.444444f, 0.040359f }, // 384 (2 8 3 3) - { 6.000000f, 4.666667f, 2.666667f, 0.047872f }, // 385 (2 8 4 2) - { 6.111111f, 4.111111f, 2.888889f, 0.059603f }, // 386 (2 8 5 1) - { 6.222222f, 3.555556f, 3.111111f, 0.080357f }, // 387 (2 8 6 0) - { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 388 (2 9 0 5) - { 6.111111f, 5.444445f, 2.222222f, 0.035294f }, // 389 (2 9 1 4) - { 6.222222f, 4.888889f, 2.444444f, 0.040909f }, // 390 (2 9 2 3) - { 6.333333f, 4.333333f, 2.666667f, 0.049180f }, // 391 (2 9 3 2) - { 6.444445f, 3.777778f, 2.888889f, 0.062500f }, // 392 (2 9 4 1) - { 6.555556f, 3.222222f, 3.111111f, 0.087379f }, // 393 (2 9 5 0) - { 6.444445f, 5.111111f, 2.222222f, 0.035714f }, // 394 (2 10 0 4) - { 6.555556f, 4.555555f, 2.444444f, 0.041860f }, // 395 (2 10 1 3) - { 6.666667f, 4.000000f, 2.666667f, 0.051136f }, // 396 (2 10 2 2) - { 6.777778f, 3.444444f, 2.888889f, 0.066667f }, // 397 (2 10 3 1) - { 6.888889f, 2.888889f, 3.111111f, 0.097826f }, // 398 (2 10 4 0) - { 6.888889f, 4.222222f, 2.444444f, 0.043269f }, // 399 (2 11 0 3) - { 7.000000f, 3.666667f, 2.666667f, 0.053892f }, // 400 (2 11 1 2) - { 7.111111f, 3.111111f, 2.888889f, 0.072581f }, // 401 (2 11 2 1) - { 7.222222f, 2.555556f, 3.111111f, 0.113924f }, // 402 (2 11 3 0) - { 7.333333f, 3.333333f, 2.666667f, 0.057692f }, // 403 (2 12 0 2) - { 7.444445f, 2.777778f, 2.888889f, 0.081081f }, // 404 (2 12 1 1) - { 7.555556f, 2.222222f, 3.111111f, 0.140625f }, // 405 (2 12 2 0) - { 7.777778f, 2.444444f, 2.888889f, 0.093750f }, // 406 (2 13 0 1) - { 7.888889f, 1.888889f, 3.111111f, 0.191489f }, // 407 (2 13 1 0) - { 8.222222f, 1.555556f, 3.111111f, 0.321429f }, // 408 (2 14 0 0) - { 3.000000f, 13.000000f, 0.000000f, 0.025641f }, // 409 (3 0 0 13) - { 3.111111f, 12.444445f, 0.222222f, 0.025862f }, // 410 (3 0 1 12) - { 3.222222f, 11.888889f, 0.444444f, 0.026239f }, // 411 (3 0 2 11) - { 3.333333f, 11.333333f, 0.666667f, 0.026786f }, // 412 (3 0 3 10) - { 3.444444f, 10.777778f, 0.888889f, 0.027523f }, // 413 (3 0 4 9) - { 3.555556f, 10.222222f, 1.111111f, 0.028481f }, // 414 (3 0 5 8) - { 3.666667f, 9.666667f, 1.333333f, 0.029703f }, // 415 (3 0 6 7) - { 3.777778f, 9.111111f, 1.555556f, 0.031250f }, // 416 (3 0 7 6) - { 3.888889f, 8.555555f, 1.777778f, 0.033210f }, // 417 (3 0 8 5) - { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 418 (3 0 9 4) - { 4.111111f, 7.444445f, 2.222222f, 0.038961f }, // 419 (3 0 10 3) - { 4.222222f, 6.888889f, 2.444444f, 0.043269f }, // 420 (3 0 11 2) - { 4.333333f, 6.333333f, 2.666667f, 0.049180f }, // 421 (3 0 12 1) - { 4.444445f, 5.777778f, 2.888889f, 0.057692f }, // 422 (3 0 13 0) - { 3.444444f, 12.111111f, 0.222222f, 0.024000f }, // 423 (3 1 0 12) - { 3.555556f, 11.555555f, 0.444444f, 0.024457f }, // 424 (3 1 1 11) - { 3.666667f, 11.000000f, 0.666667f, 0.025070f }, // 425 (3 1 2 10) - { 3.777778f, 10.444445f, 0.888889f, 0.025862f }, // 426 (3 1 3 9) - { 3.888889f, 9.888889f, 1.111111f, 0.026866f }, // 427 (3 1 4 8) - { 4.000000f, 9.333333f, 1.333333f, 0.028125f }, // 428 (3 1 5 7) - { 4.111111f, 8.777778f, 1.555556f, 0.029703f }, // 429 (3 1 6 6) - { 4.222222f, 8.222222f, 1.777778f, 0.031690f }, // 430 (3 1 7 5) - { 4.333333f, 7.666667f, 2.000000f, 0.034221f }, // 431 (3 1 8 4) - { 4.444445f, 7.111111f, 2.222222f, 0.037500f }, // 432 (3 1 9 3) - { 4.555555f, 6.555556f, 2.444444f, 0.041860f }, // 433 (3 1 10 2) - { 4.666667f, 6.000000f, 2.666667f, 0.047872f }, // 434 (3 1 11 1) - { 4.777778f, 5.444445f, 2.888889f, 0.056604f }, // 435 (3 1 12 0) - { 3.888889f, 11.222222f, 0.444444f, 0.023018f }, // 436 (3 2 0 11) - { 4.000000f, 10.666667f, 0.666667f, 0.023684f }, // 437 (3 2 1 10) - { 4.111111f, 10.111111f, 0.888889f, 0.024523f }, // 438 (3 2 2 9) - { 4.222222f, 9.555555f, 1.111111f, 0.025568f }, // 439 (3 2 3 8) - { 4.333333f, 9.000000f, 1.333333f, 0.026866f }, // 440 (3 2 4 7) - { 4.444445f, 8.444445f, 1.555556f, 0.028481f }, // 441 (3 2 5 6) - { 4.555555f, 7.888889f, 1.777778f, 0.030508f }, // 442 (3 2 6 5) - { 4.666667f, 7.333333f, 2.000000f, 0.033088f }, // 443 (3 2 7 4) - { 4.777778f, 6.777778f, 2.222222f, 0.036437f }, // 444 (3 2 8 3) - { 4.888889f, 6.222222f, 2.444444f, 0.040909f }, // 445 (3 2 9 2) - { 5.000000f, 5.666667f, 2.666667f, 0.047120f }, // 446 (3 2 10 1) - { 5.111111f, 5.111111f, 2.888889f, 0.056250f }, // 447 (3 2 11 0) - { 4.333333f, 10.333333f, 0.666667f, 0.022556f }, // 448 (3 3 0 10) - { 4.444445f, 9.777778f, 0.888889f, 0.023438f }, // 449 (3 3 1 9) - { 4.555555f, 9.222222f, 1.111111f, 0.024523f }, // 450 (3 3 2 8) - { 4.666667f, 8.666667f, 1.333333f, 0.025862f }, // 451 (3 3 3 7) - { 4.777778f, 8.111111f, 1.555556f, 0.027523f }, // 452 (3 3 4 6) - { 4.888889f, 7.555555f, 1.777778f, 0.029605f }, // 453 (3 3 5 5) - { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 454 (3 3 6 4) - { 5.111111f, 6.444445f, 2.222222f, 0.035714f }, // 455 (3 3 7 3) - { 5.222222f, 5.888889f, 2.444444f, 0.040359f }, // 456 (3 3 8 2) - { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 457 (3 3 9 1) - { 5.444445f, 4.777778f, 2.888889f, 0.056604f }, // 458 (3 3 10 0) - { 4.777778f, 9.444445f, 0.888889f, 0.022556f }, // 459 (3 4 0 9) - { 4.888889f, 8.888889f, 1.111111f, 0.023684f }, // 460 (3 4 1 8) - { 5.000000f, 8.333333f, 1.333333f, 0.025070f }, // 461 (3 4 2 7) - { 5.111111f, 7.777778f, 1.555556f, 0.026786f }, // 462 (3 4 3 6) - { 5.222222f, 7.222222f, 1.777778f, 0.028939f }, // 463 (3 4 4 5) - { 5.333333f, 6.666667f, 2.000000f, 0.031690f }, // 464 (3 4 5 4) - { 5.444445f, 6.111111f, 2.222222f, 0.035294f }, // 465 (3 4 6 3) - { 5.555555f, 5.555555f, 2.444444f, 0.040179f }, // 466 (3 4 7 2) - { 5.666667f, 5.000000f, 2.666667f, 0.047120f }, // 467 (3 4 8 1) - { 5.777778f, 4.444445f, 2.888889f, 0.057692f }, // 468 (3 4 9 0) - { 5.222222f, 8.555555f, 1.111111f, 0.023018f }, // 469 (3 5 0 8) - { 5.333333f, 8.000000f, 1.333333f, 0.024457f }, // 470 (3 5 1 7) - { 5.444445f, 7.444445f, 1.555556f, 0.026239f }, // 471 (3 5 2 6) - { 5.555555f, 6.888889f, 1.777778f, 0.028481f }, // 472 (3 5 3 5) - { 5.666667f, 6.333333f, 2.000000f, 0.031359f }, // 473 (3 5 4 4) - { 5.777778f, 5.777778f, 2.222222f, 0.035156f }, // 474 (3 5 5 3) - { 5.888889f, 5.222222f, 2.444444f, 0.040359f }, // 475 (3 5 6 2) - { 6.000000f, 4.666667f, 2.666667f, 0.047872f }, // 476 (3 5 7 1) - { 6.111111f, 4.111111f, 2.888889f, 0.059603f }, // 477 (3 5 8 0) - { 5.666667f, 7.666667f, 1.333333f, 0.024000f }, // 478 (3 6 0 7) - { 5.777778f, 7.111111f, 1.555556f, 0.025862f }, // 479 (3 6 1 6) - { 5.888889f, 6.555555f, 1.777778f, 0.028213f }, // 480 (3 6 2 5) - { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 481 (3 6 3 4) - { 6.111111f, 5.444445f, 2.222222f, 0.035294f }, // 482 (3 6 4 3) - { 6.222222f, 4.888889f, 2.444444f, 0.040909f }, // 483 (3 6 5 2) - { 6.333333f, 4.333333f, 2.666667f, 0.049180f }, // 484 (3 6 6 1) - { 6.444445f, 3.777778f, 2.888889f, 0.062500f }, // 485 (3 6 7 0) - { 6.111111f, 6.777778f, 1.555556f, 0.025641f }, // 486 (3 7 0 6) - { 6.222222f, 6.222222f, 1.777778f, 0.028125f }, // 487 (3 7 1 5) - { 6.333333f, 5.666667f, 2.000000f, 0.031359f }, // 488 (3 7 2 4) - { 6.444445f, 5.111111f, 2.222222f, 0.035714f }, // 489 (3 7 3 3) - { 6.555555f, 4.555555f, 2.444444f, 0.041860f }, // 490 (3 7 4 2) - { 6.666667f, 4.000000f, 2.666667f, 0.051136f }, // 491 (3 7 5 1) - { 6.777778f, 3.444444f, 2.888889f, 0.066667f }, // 492 (3 7 6 0) - { 6.555555f, 5.888889f, 1.777778f, 0.028213f }, // 493 (3 8 0 5) - { 6.666667f, 5.333333f, 2.000000f, 0.031690f }, // 494 (3 8 1 4) - { 6.777778f, 4.777778f, 2.222222f, 0.036437f }, // 495 (3 8 2 3) - { 6.888889f, 4.222222f, 2.444444f, 0.043269f }, // 496 (3 8 3 2) - { 7.000000f, 3.666667f, 2.666667f, 0.053892f }, // 497 (3 8 4 1) - { 7.111111f, 3.111111f, 2.888889f, 0.072581f }, // 498 (3 8 5 0) - { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 499 (3 9 0 4) - { 7.111111f, 4.444445f, 2.222222f, 0.037500f }, // 500 (3 9 1 3) - { 7.222222f, 3.888889f, 2.444444f, 0.045226f }, // 501 (3 9 2 2) - { 7.333333f, 3.333333f, 2.666667f, 0.057692f }, // 502 (3 9 3 1) - { 7.444445f, 2.777778f, 2.888889f, 0.081081f }, // 503 (3 9 4 0) - { 7.444445f, 4.111111f, 2.222222f, 0.038961f }, // 504 (3 10 0 3) - { 7.555556f, 3.555556f, 2.444444f, 0.047872f }, // 505 (3 10 1 2) - { 7.666667f, 3.000000f, 2.666667f, 0.062937f }, // 506 (3 10 2 1) - { 7.777778f, 2.444444f, 2.888889f, 0.093750f }, // 507 (3 10 3 0) - { 7.888889f, 3.222222f, 2.444444f, 0.051429f }, // 508 (3 11 0 2) - { 8.000000f, 2.666667f, 2.666667f, 0.070313f }, // 509 (3 11 1 1) - { 8.111111f, 2.111111f, 2.888889f, 0.113924f }, // 510 (3 11 2 0) - { 8.333333f, 2.333333f, 2.666667f, 0.081081f }, // 511 (3 12 0 1) - { 8.444445f, 1.777778f, 2.888889f, 0.150000f }, // 512 (3 12 1 0) - { 8.777778f, 1.444444f, 2.888889f, 0.230769f }, // 513 (3 13 0 0) - { 4.000000f, 12.000000f, 0.000000f, 0.020833f }, // 514 (4 0 0 12) - { 4.111111f, 11.444445f, 0.222222f, 0.021277f }, // 515 (4 0 1 11) - { 4.222222f, 10.888889f, 0.444444f, 0.021845f }, // 516 (4 0 2 10) - { 4.333333f, 10.333333f, 0.666667f, 0.022556f }, // 517 (4 0 3 9) - { 4.444445f, 9.777778f, 0.888889f, 0.023438f }, // 518 (4 0 4 8) - { 4.555555f, 9.222222f, 1.111111f, 0.024523f }, // 519 (4 0 5 7) - { 4.666667f, 8.666667f, 1.333333f, 0.025862f }, // 520 (4 0 6 6) - { 4.777778f, 8.111111f, 1.555556f, 0.027523f }, // 521 (4 0 7 5) - { 4.888889f, 7.555555f, 1.777778f, 0.029605f }, // 522 (4 0 8 4) - { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 523 (4 0 9 3) - { 5.111111f, 6.444445f, 2.222222f, 0.035714f }, // 524 (4 0 10 2) - { 5.222222f, 5.888889f, 2.444444f, 0.040359f }, // 525 (4 0 11 1) - { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 526 (4 0 12 0) - { 4.444445f, 11.111111f, 0.222222f, 0.020270f }, // 527 (4 1 0 11) - { 4.555555f, 10.555555f, 0.444444f, 0.020882f }, // 528 (4 1 1 10) - { 4.666667f, 10.000000f, 0.666667f, 0.021635f }, // 529 (4 1 2 9) - { 4.777778f, 9.444445f, 0.888889f, 0.022556f }, // 530 (4 1 3 8) - { 4.888889f, 8.888889f, 1.111111f, 0.023684f }, // 531 (4 1 4 7) - { 5.000000f, 8.333333f, 1.333333f, 0.025070f }, // 532 (4 1 5 6) - { 5.111111f, 7.777778f, 1.555556f, 0.026786f }, // 533 (4 1 6 5) - { 5.222222f, 7.222222f, 1.777778f, 0.028939f }, // 534 (4 1 7 4) - { 5.333333f, 6.666667f, 2.000000f, 0.031690f }, // 535 (4 1 8 3) - { 5.444445f, 6.111111f, 2.222222f, 0.035294f }, // 536 (4 1 9 2) - { 5.555555f, 5.555556f, 2.444444f, 0.040179f }, // 537 (4 1 10 1) - { 5.666667f, 5.000000f, 2.666667f, 0.047120f }, // 538 (4 1 11 0) - { 4.888889f, 10.222222f, 0.444444f, 0.020089f }, // 539 (4 2 0 10) - { 5.000000f, 9.666667f, 0.666667f, 0.020882f }, // 540 (4 2 1 9) - { 5.111111f, 9.111111f, 0.888889f, 0.021845f }, // 541 (4 2 2 8) - { 5.222222f, 8.555555f, 1.111111f, 0.023018f }, // 542 (4 2 3 7) - { 5.333333f, 8.000000f, 1.333333f, 0.024457f }, // 543 (4 2 4 6) - { 5.444445f, 7.444445f, 1.555556f, 0.026239f }, // 544 (4 2 5 5) - { 5.555555f, 6.888889f, 1.777778f, 0.028481f }, // 545 (4 2 6 4) - { 5.666667f, 6.333333f, 2.000000f, 0.031359f }, // 546 (4 2 7 3) - { 5.777778f, 5.777778f, 2.222222f, 0.035156f }, // 547 (4 2 8 2) - { 5.888889f, 5.222222f, 2.444444f, 0.040359f }, // 548 (4 2 9 1) - { 6.000000f, 4.666667f, 2.666667f, 0.047872f }, // 549 (4 2 10 0) - { 5.333333f, 9.333333f, 0.666667f, 0.020270f }, // 550 (4 3 0 9) - { 5.444445f, 8.777778f, 0.888889f, 0.021277f }, // 551 (4 3 1 8) - { 5.555555f, 8.222222f, 1.111111f, 0.022500f }, // 552 (4 3 2 7) - { 5.666667f, 7.666667f, 1.333333f, 0.024000f }, // 553 (4 3 3 6) - { 5.777778f, 7.111111f, 1.555556f, 0.025862f }, // 554 (4 3 4 5) - { 5.888889f, 6.555555f, 1.777778f, 0.028213f }, // 555 (4 3 5 4) - { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 556 (4 3 6 3) - { 6.111111f, 5.444445f, 2.222222f, 0.035294f }, // 557 (4 3 7 2) - { 6.222222f, 4.888889f, 2.444444f, 0.040909f }, // 558 (4 3 8 1) - { 6.333333f, 4.333333f, 2.666667f, 0.049180f }, // 559 (4 3 9 0) - { 5.777778f, 8.444445f, 0.888889f, 0.020833f }, // 560 (4 4 0 8) - { 5.888889f, 7.888889f, 1.111111f, 0.022113f }, // 561 (4 4 1 7) - { 6.000000f, 7.333333f, 1.333333f, 0.023684f }, // 562 (4 4 2 6) - { 6.111111f, 6.777778f, 1.555556f, 0.025641f }, // 563 (4 4 3 5) - { 6.222222f, 6.222222f, 1.777778f, 0.028125f }, // 564 (4 4 4 4) - { 6.333333f, 5.666667f, 2.000000f, 0.031359f }, // 565 (4 4 5 3) - { 6.444445f, 5.111111f, 2.222222f, 0.035714f }, // 566 (4 4 6 2) - { 6.555555f, 4.555555f, 2.444444f, 0.041860f }, // 567 (4 4 7 1) - { 6.666667f, 4.000000f, 2.666667f, 0.051136f }, // 568 (4 4 8 0) - { 6.222222f, 7.555555f, 1.111111f, 0.021845f }, // 569 (4 5 0 7) - { 6.333333f, 7.000000f, 1.333333f, 0.023499f }, // 570 (4 5 1 6) - { 6.444445f, 6.444445f, 1.555556f, 0.025568f }, // 571 (4 5 2 5) - { 6.555555f, 5.888889f, 1.777778f, 0.028213f }, // 572 (4 5 3 4) - { 6.666667f, 5.333333f, 2.000000f, 0.031690f }, // 573 (4 5 4 3) - { 6.777778f, 4.777778f, 2.222222f, 0.036437f }, // 574 (4 5 5 2) - { 6.888889f, 4.222222f, 2.444444f, 0.043269f }, // 575 (4 5 6 1) - { 7.000000f, 3.666667f, 2.666667f, 0.053892f }, // 576 (4 5 7 0) - { 6.666667f, 6.666667f, 1.333333f, 0.023438f }, // 577 (4 6 0 6) - { 6.777778f, 6.111111f, 1.555556f, 0.025641f }, // 578 (4 6 1 5) - { 6.888889f, 5.555555f, 1.777778f, 0.028481f }, // 579 (4 6 2 4) - { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 580 (4 6 3 3) - { 7.111111f, 4.444445f, 2.222222f, 0.037500f }, // 581 (4 6 4 2) - { 7.222222f, 3.888889f, 2.444444f, 0.045226f }, // 582 (4 6 5 1) - { 7.333333f, 3.333333f, 2.666667f, 0.057692f }, // 583 (4 6 6 0) - { 7.111111f, 5.777778f, 1.555556f, 0.025862f }, // 584 (4 7 0 5) - { 7.222222f, 5.222222f, 1.777778f, 0.028939f }, // 585 (4 7 1 4) - { 7.333333f, 4.666667f, 2.000000f, 0.033088f }, // 586 (4 7 2 3) - { 7.444445f, 4.111111f, 2.222222f, 0.038961f }, // 587 (4 7 3 2) - { 7.555555f, 3.555556f, 2.444444f, 0.047872f }, // 588 (4 7 4 1) - { 7.666667f, 3.000000f, 2.666667f, 0.062937f }, // 589 (4 7 5 0) - { 7.555555f, 4.888889f, 1.777778f, 0.029605f }, // 590 (4 8 0 4) - { 7.666667f, 4.333333f, 2.000000f, 0.034221f }, // 591 (4 8 1 3) - { 7.777778f, 3.777778f, 2.222222f, 0.040909f }, // 592 (4 8 2 2) - { 7.888889f, 3.222222f, 2.444444f, 0.051429f }, // 593 (4 8 3 1) - { 8.000000f, 2.666667f, 2.666667f, 0.070313f }, // 594 (4 8 4 0) - { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 595 (4 9 0 3) - { 8.111111f, 3.444444f, 2.222222f, 0.043478f }, // 596 (4 9 1 2) - { 8.222222f, 2.888889f, 2.444444f, 0.056250f }, // 597 (4 9 2 1) - { 8.333333f, 2.333333f, 2.666667f, 0.081081f }, // 598 (4 9 3 0) - { 8.444445f, 3.111111f, 2.222222f, 0.046875f }, // 599 (4 10 0 2) - { 8.555555f, 2.555556f, 2.444444f, 0.062937f }, // 600 (4 10 1 1) - { 8.666667f, 2.000000f, 2.666667f, 0.097826f }, // 601 (4 10 2 0) - { 8.888889f, 2.222222f, 2.444444f, 0.072581f }, // 602 (4 11 0 1) - { 9.000000f, 1.666667f, 2.666667f, 0.126761f }, // 603 (4 11 1 0) - { 9.333333f, 1.333333f, 2.666667f, 0.187500f }, // 604 (4 12 0 0) - { 5.000000f, 11.000000f, 0.000000f, 0.018182f }, // 605 (5 0 0 11) - { 5.111111f, 10.444445f, 0.222222f, 0.018750f }, // 606 (5 0 1 10) - { 5.222222f, 9.888889f, 0.444444f, 0.019438f }, // 607 (5 0 2 9) - { 5.333333f, 9.333333f, 0.666667f, 0.020270f }, // 608 (5 0 3 8) - { 5.444445f, 8.777778f, 0.888889f, 0.021277f }, // 609 (5 0 4 7) - { 5.555555f, 8.222222f, 1.111111f, 0.022500f }, // 610 (5 0 5 6) - { 5.666667f, 7.666667f, 1.333333f, 0.024000f }, // 611 (5 0 6 5) - { 5.777778f, 7.111111f, 1.555556f, 0.025862f }, // 612 (5 0 7 4) - { 5.888889f, 6.555555f, 1.777778f, 0.028213f }, // 613 (5 0 8 3) - { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 614 (5 0 9 2) - { 6.111111f, 5.444445f, 2.222222f, 0.035294f }, // 615 (5 0 10 1) - { 6.222222f, 4.888889f, 2.444444f, 0.040909f }, // 616 (5 0 11 0) - { 5.444445f, 10.111111f, 0.222222f, 0.018182f }, // 617 (5 1 0 10) - { 5.555555f, 9.555555f, 0.444444f, 0.018908f }, // 618 (5 1 1 9) - { 5.666667f, 9.000000f, 0.666667f, 0.019780f }, // 619 (5 1 2 8) - { 5.777778f, 8.444445f, 0.888889f, 0.020833f }, // 620 (5 1 3 7) - { 5.888889f, 7.888889f, 1.111111f, 0.022113f }, // 621 (5 1 4 6) - { 6.000000f, 7.333333f, 1.333333f, 0.023684f }, // 622 (5 1 5 5) - { 6.111111f, 6.777778f, 1.555556f, 0.025641f }, // 623 (5 1 6 4) - { 6.222222f, 6.222222f, 1.777778f, 0.028125f }, // 624 (5 1 7 3) - { 6.333333f, 5.666667f, 2.000000f, 0.031359f }, // 625 (5 1 8 2) - { 6.444445f, 5.111111f, 2.222222f, 0.035714f }, // 626 (5 1 9 1) - { 6.555555f, 4.555556f, 2.444444f, 0.041860f }, // 627 (5 1 10 0) - { 5.888889f, 9.222222f, 0.444444f, 0.018480f }, // 628 (5 2 0 9) - { 6.000000f, 8.666667f, 0.666667f, 0.019397f }, // 629 (5 2 1 8) - { 6.111111f, 8.111111f, 0.888889f, 0.020501f }, // 630 (5 2 2 7) - { 6.222222f, 7.555555f, 1.111111f, 0.021845f }, // 631 (5 2 3 6) - { 6.333333f, 7.000000f, 1.333333f, 0.023499f }, // 632 (5 2 4 5) - { 6.444445f, 6.444445f, 1.555556f, 0.025568f }, // 633 (5 2 5 4) - { 6.555555f, 5.888889f, 1.777778f, 0.028213f }, // 634 (5 2 6 3) - { 6.666667f, 5.333333f, 2.000000f, 0.031690f }, // 635 (5 2 7 2) - { 6.777778f, 4.777778f, 2.222222f, 0.036437f }, // 636 (5 2 8 1) - { 6.888889f, 4.222222f, 2.444444f, 0.043269f }, // 637 (5 2 9 0) - { 6.333333f, 8.333333f, 0.666667f, 0.019108f }, // 638 (5 3 0 8) - { 6.444445f, 7.777778f, 0.888889f, 0.020270f }, // 639 (5 3 1 7) - { 6.555555f, 7.222222f, 1.111111f, 0.021687f }, // 640 (5 3 2 6) - { 6.666667f, 6.666667f, 1.333333f, 0.023438f }, // 641 (5 3 3 5) - { 6.777778f, 6.111111f, 1.555556f, 0.025641f }, // 642 (5 3 4 4) - { 6.888889f, 5.555555f, 1.777778f, 0.028481f }, // 643 (5 3 5 3) - { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 644 (5 3 6 2) - { 7.111111f, 4.444445f, 2.222222f, 0.037500f }, // 645 (5 3 7 1) - { 7.222222f, 3.888889f, 2.444444f, 0.045226f }, // 646 (5 3 8 0) - { 6.777778f, 7.444445f, 0.888889f, 0.020134f }, // 647 (5 4 0 7) - { 6.888889f, 6.888889f, 1.111111f, 0.021635f }, // 648 (5 4 1 6) - { 7.000000f, 6.333333f, 1.333333f, 0.023499f }, // 649 (5 4 2 5) - { 7.111111f, 5.777778f, 1.555556f, 0.025862f }, // 650 (5 4 3 4) - { 7.222222f, 5.222222f, 1.777778f, 0.028939f }, // 651 (5 4 4 3) - { 7.333333f, 4.666667f, 2.000000f, 0.033088f }, // 652 (5 4 5 2) - { 7.444445f, 4.111111f, 2.222222f, 0.038961f }, // 653 (5 4 6 1) - { 7.555555f, 3.555556f, 2.444444f, 0.047872f }, // 654 (5 4 7 0) - { 7.222222f, 6.555555f, 1.111111f, 0.021687f }, // 655 (5 5 0 6) - { 7.333333f, 6.000000f, 1.333333f, 0.023684f }, // 656 (5 5 1 5) - { 7.444445f, 5.444445f, 1.555556f, 0.026239f }, // 657 (5 5 2 4) - { 7.555555f, 4.888889f, 1.777778f, 0.029605f }, // 658 (5 5 3 3) - { 7.666667f, 4.333333f, 2.000000f, 0.034221f }, // 659 (5 5 4 2) - { 7.777778f, 3.777778f, 2.222222f, 0.040909f }, // 660 (5 5 5 1) - { 7.888889f, 3.222222f, 2.444444f, 0.051429f }, // 661 (5 5 6 0) - { 7.666667f, 5.666667f, 1.333333f, 0.024000f }, // 662 (5 6 0 5) - { 7.777778f, 5.111111f, 1.555556f, 0.026786f }, // 663 (5 6 1 4) - { 7.888889f, 4.555555f, 1.777778f, 0.030508f }, // 664 (5 6 2 3) - { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 665 (5 6 3 2) - { 8.111111f, 3.444444f, 2.222222f, 0.043478f }, // 666 (5 6 4 1) - { 8.222222f, 2.888889f, 2.444444f, 0.056250f }, // 667 (5 6 5 0) - { 8.111111f, 4.777778f, 1.555556f, 0.027523f }, // 668 (5 7 0 4) - { 8.222222f, 4.222222f, 1.777778f, 0.031690f }, // 669 (5 7 1 3) - { 8.333333f, 3.666667f, 2.000000f, 0.037657f }, // 670 (5 7 2 2) - { 8.444445f, 3.111111f, 2.222222f, 0.046875f }, // 671 (5 7 3 1) - { 8.555555f, 2.555556f, 2.444444f, 0.062937f }, // 672 (5 7 4 0) - { 8.555555f, 3.888889f, 1.777778f, 0.033210f }, // 673 (5 8 0 3) - { 8.666667f, 3.333333f, 2.000000f, 0.040179f }, // 674 (5 8 1 2) - { 8.777778f, 2.777778f, 2.222222f, 0.051429f }, // 675 (5 8 2 1) - { 8.888889f, 2.222222f, 2.444444f, 0.072581f }, // 676 (5 8 3 0) - { 9.000000f, 3.000000f, 2.000000f, 0.043478f }, // 677 (5 9 0 2) - { 9.111111f, 2.444444f, 2.222222f, 0.057692f }, // 678 (5 9 1 1) - { 9.222222f, 1.888889f, 2.444444f, 0.087379f }, // 679 (5 9 2 0) - { 9.444445f, 2.111111f, 2.222222f, 0.066667f }, // 680 (5 10 0 1) - { 9.555555f, 1.555556f, 2.444444f, 0.112500f }, // 681 (5 10 1 0) - { 9.888889f, 1.222222f, 2.444444f, 0.163636f }, // 682 (5 11 0 0) - { 6.000000f, 10.000000f, 0.000000f, 0.016667f }, // 683 (6 0 0 10) - { 6.111111f, 9.444445f, 0.222222f, 0.017341f }, // 684 (6 0 1 9) - { 6.222222f, 8.888889f, 0.444444f, 0.018145f }, // 685 (6 0 2 8) - { 6.333333f, 8.333333f, 0.666667f, 0.019108f }, // 686 (6 0 3 7) - { 6.444445f, 7.777778f, 0.888889f, 0.020270f }, // 687 (6 0 4 6) - { 6.555555f, 7.222222f, 1.111111f, 0.021687f }, // 688 (6 0 5 5) - { 6.666667f, 6.666667f, 1.333333f, 0.023438f }, // 689 (6 0 6 4) - { 6.777778f, 6.111111f, 1.555556f, 0.025641f }, // 690 (6 0 7 3) - { 6.888889f, 5.555555f, 1.777778f, 0.028481f }, // 691 (6 0 8 2) - { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 692 (6 0 9 1) - { 7.111111f, 4.444445f, 2.222222f, 0.037500f }, // 693 (6 0 10 0) - { 6.444445f, 9.111111f, 0.222222f, 0.017045f }, // 694 (6 1 0 9) - { 6.555555f, 8.555555f, 0.444444f, 0.017893f }, // 695 (6 1 1 8) - { 6.666667f, 8.000000f, 0.666667f, 0.018908f }, // 696 (6 1 2 7) - { 6.777778f, 7.444445f, 0.888889f, 0.020134f }, // 697 (6 1 3 6) - { 6.888889f, 6.888889f, 1.111111f, 0.021635f }, // 698 (6 1 4 5) - { 7.000000f, 6.333333f, 1.333333f, 0.023499f }, // 699 (6 1 5 4) - { 7.111111f, 5.777778f, 1.555556f, 0.025862f }, // 700 (6 1 6 3) - { 7.222222f, 5.222222f, 1.777778f, 0.028939f }, // 701 (6 1 7 2) - { 7.333333f, 4.666667f, 2.000000f, 0.033088f }, // 702 (6 1 8 1) - { 7.444445f, 4.111111f, 2.222222f, 0.038961f }, // 703 (6 1 9 0) - { 6.888889f, 8.222222f, 0.444444f, 0.017717f }, // 704 (6 2 0 8) - { 7.000000f, 7.666667f, 0.666667f, 0.018789f }, // 705 (6 2 1 7) - { 7.111111f, 7.111111f, 0.888889f, 0.020089f }, // 706 (6 2 2 6) - { 7.222222f, 6.555555f, 1.111111f, 0.021687f }, // 707 (6 2 3 5) - { 7.333333f, 6.000000f, 1.333333f, 0.023684f }, // 708 (6 2 4 4) - { 7.444445f, 5.444445f, 1.555556f, 0.026239f }, // 709 (6 2 5 3) - { 7.555555f, 4.888889f, 1.777778f, 0.029605f }, // 710 (6 2 6 2) - { 7.666667f, 4.333333f, 2.000000f, 0.034221f }, // 711 (6 2 7 1) - { 7.777778f, 3.777778f, 2.222222f, 0.040909f }, // 712 (6 2 8 0) - { 7.333333f, 7.333333f, 0.666667f, 0.018750f }, // 713 (6 3 0 7) - { 7.444445f, 6.777778f, 0.888889f, 0.020134f }, // 714 (6 3 1 6) - { 7.555555f, 6.222222f, 1.111111f, 0.021845f }, // 715 (6 3 2 5) - { 7.666667f, 5.666667f, 1.333333f, 0.024000f }, // 716 (6 3 3 4) - { 7.777778f, 5.111111f, 1.555556f, 0.026786f }, // 717 (6 3 4 3) - { 7.888889f, 4.555555f, 1.777778f, 0.030508f }, // 718 (6 3 5 2) - { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 719 (6 3 6 1) - { 8.111111f, 3.444444f, 2.222222f, 0.043478f }, // 720 (6 3 7 0) - { 7.777778f, 6.444445f, 0.888889f, 0.020270f }, // 721 (6 4 0 6) - { 7.888889f, 5.888889f, 1.111111f, 0.022113f }, // 722 (6 4 1 5) - { 8.000000f, 5.333333f, 1.333333f, 0.024457f }, // 723 (6 4 2 4) - { 8.111111f, 4.777778f, 1.555556f, 0.027523f }, // 724 (6 4 3 3) - { 8.222222f, 4.222222f, 1.777778f, 0.031690f }, // 725 (6 4 4 2) - { 8.333333f, 3.666667f, 2.000000f, 0.037657f }, // 726 (6 4 5 1) - { 8.444445f, 3.111111f, 2.222222f, 0.046875f }, // 727 (6 4 6 0) - { 8.222222f, 5.555555f, 1.111111f, 0.022500f }, // 728 (6 5 0 5) - { 8.333333f, 5.000000f, 1.333333f, 0.025070f }, // 729 (6 5 1 4) - { 8.444445f, 4.444445f, 1.555556f, 0.028481f }, // 730 (6 5 2 3) - { 8.555555f, 3.888889f, 1.777778f, 0.033210f }, // 731 (6 5 3 2) - { 8.666667f, 3.333333f, 2.000000f, 0.040179f }, // 732 (6 5 4 1) - { 8.777778f, 2.777778f, 2.222222f, 0.051429f }, // 733 (6 5 5 0) - { 8.666667f, 4.666667f, 1.333333f, 0.025862f }, // 734 (6 6 0 4) - { 8.777778f, 4.111111f, 1.555556f, 0.029703f }, // 735 (6 6 1 3) - { 8.888889f, 3.555556f, 1.777778f, 0.035156f }, // 736 (6 6 2 2) - { 9.000000f, 3.000000f, 2.000000f, 0.043478f }, // 737 (6 6 3 1) - { 9.111111f, 2.444444f, 2.222222f, 0.057692f }, // 738 (6 6 4 0) - { 9.111111f, 3.777778f, 1.555556f, 0.031250f }, // 739 (6 7 0 3) - { 9.222222f, 3.222222f, 1.777778f, 0.037657f }, // 740 (6 7 1 2) - { 9.333333f, 2.666667f, 2.000000f, 0.047872f }, // 741 (6 7 2 1) - { 9.444445f, 2.111111f, 2.222222f, 0.066667f }, // 742 (6 7 3 0) - { 9.555555f, 2.888889f, 1.777778f, 0.040909f }, // 743 (6 8 0 2) - { 9.666667f, 2.333333f, 2.000000f, 0.053892f }, // 744 (6 8 1 1) - { 9.777778f, 1.777778f, 2.222222f, 0.080357f }, // 745 (6 8 2 0) - { 10.000000f, 2.000000f, 2.000000f, 0.062500f }, // 746 (6 9 0 1) - { 10.111111f, 1.444444f, 2.222222f, 0.103448f }, // 747 (6 9 1 0) - { 10.444445f, 1.111111f, 2.222222f, 0.150000f }, // 748 (6 10 0 0) - { 7.000000f, 9.000000f, 0.000000f, 0.015873f }, // 749 (7 0 0 9) - { 7.111111f, 8.444445f, 0.222222f, 0.016667f }, // 750 (7 0 1 8) - { 7.222222f, 7.888889f, 0.444444f, 0.017613f }, // 751 (7 0 2 7) - { 7.333333f, 7.333333f, 0.666667f, 0.018750f }, // 752 (7 0 3 6) - { 7.444445f, 6.777778f, 0.888889f, 0.020134f }, // 753 (7 0 4 5) - { 7.555555f, 6.222222f, 1.111111f, 0.021845f }, // 754 (7 0 5 4) - { 7.666667f, 5.666667f, 1.333333f, 0.024000f }, // 755 (7 0 6 3) - { 7.777778f, 5.111111f, 1.555556f, 0.026786f }, // 756 (7 0 7 2) - { 7.888889f, 4.555555f, 1.777778f, 0.030508f }, // 757 (7 0 8 1) - { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 758 (7 0 9 0) - { 7.444445f, 8.111111f, 0.222222f, 0.016575f }, // 759 (7 1 0 8) - { 7.555555f, 7.555555f, 0.444444f, 0.017578f }, // 760 (7 1 1 7) - { 7.666667f, 7.000000f, 0.666667f, 0.018789f }, // 761 (7 1 2 6) - { 7.777778f, 6.444445f, 0.888889f, 0.020270f }, // 762 (7 1 3 5) - { 7.888889f, 5.888889f, 1.111111f, 0.022113f }, // 763 (7 1 4 4) - { 8.000000f, 5.333333f, 1.333333f, 0.024457f }, // 764 (7 1 5 3) - { 8.111111f, 4.777778f, 1.555556f, 0.027523f }, // 765 (7 1 6 2) - { 8.222222f, 4.222222f, 1.777778f, 0.031690f }, // 766 (7 1 7 1) - { 8.333333f, 3.666667f, 2.000000f, 0.037657f }, // 767 (7 1 8 0) - { 7.888889f, 7.222222f, 0.444444f, 0.017613f }, // 768 (7 2 0 7) - { 8.000000f, 6.666667f, 0.666667f, 0.018908f }, // 769 (7 2 1 6) - { 8.111111f, 6.111111f, 0.888889f, 0.020501f }, // 770 (7 2 2 5) - { 8.222222f, 5.555555f, 1.111111f, 0.022500f }, // 771 (7 2 3 4) - { 8.333333f, 5.000000f, 1.333333f, 0.025070f }, // 772 (7 2 4 3) - { 8.444445f, 4.444445f, 1.555556f, 0.028481f }, // 773 (7 2 5 2) - { 8.555555f, 3.888889f, 1.777778f, 0.033210f }, // 774 (7 2 6 1) - { 8.666667f, 3.333333f, 2.000000f, 0.040179f }, // 775 (7 2 7 0) - { 8.333333f, 6.333333f, 0.666667f, 0.019108f }, // 776 (7 3 0 6) - { 8.444445f, 5.777778f, 0.888889f, 0.020833f }, // 777 (7 3 1 5) - { 8.555555f, 5.222222f, 1.111111f, 0.023018f }, // 778 (7 3 2 4) - { 8.666667f, 4.666667f, 1.333333f, 0.025862f }, // 779 (7 3 3 3) - { 8.777778f, 4.111111f, 1.555556f, 0.029703f }, // 780 (7 3 4 2) - { 8.888889f, 3.555556f, 1.777778f, 0.035156f }, // 781 (7 3 5 1) - { 9.000000f, 3.000000f, 2.000000f, 0.043478f }, // 782 (7 3 6 0) - { 8.777778f, 5.444445f, 0.888889f, 0.021277f }, // 783 (7 4 0 5) - { 8.888889f, 4.888889f, 1.111111f, 0.023684f }, // 784 (7 4 1 4) - { 9.000000f, 4.333333f, 1.333333f, 0.026866f }, // 785 (7 4 2 3) - { 9.111111f, 3.777778f, 1.555556f, 0.031250f }, // 786 (7 4 3 2) - { 9.222222f, 3.222222f, 1.777778f, 0.037657f }, // 787 (7 4 4 1) - { 9.333333f, 2.666667f, 2.000000f, 0.047872f }, // 788 (7 4 5 0) - { 9.222222f, 4.555555f, 1.111111f, 0.024523f }, // 789 (7 5 0 4) - { 9.333333f, 4.000000f, 1.333333f, 0.028125f }, // 790 (7 5 1 3) - { 9.444445f, 3.444444f, 1.555556f, 0.033210f }, // 791 (7 5 2 2) - { 9.555555f, 2.888889f, 1.777778f, 0.040909f }, // 792 (7 5 3 1) - { 9.666667f, 2.333333f, 2.000000f, 0.053892f }, // 793 (7 5 4 0) - { 9.666667f, 3.666667f, 1.333333f, 0.029703f }, // 794 (7 6 0 3) - { 9.777778f, 3.111111f, 1.555556f, 0.035714f }, // 795 (7 6 1 2) - { 9.888889f, 2.555556f, 1.777778f, 0.045226f }, // 796 (7 6 2 1) - { 10.000000f, 2.000000f, 2.000000f, 0.062500f }, // 797 (7 6 3 0) - { 10.111111f, 2.777778f, 1.555556f, 0.038961f }, // 798 (7 7 0 2) - { 10.222222f, 2.222222f, 1.777778f, 0.051136f }, // 799 (7 7 1 1) - { 10.333333f, 1.666667f, 2.000000f, 0.075630f }, // 800 (7 7 2 0) - { 10.555555f, 1.888889f, 1.777778f, 0.059603f }, // 801 (7 8 0 1) - { 10.666667f, 1.333333f, 2.000000f, 0.097826f }, // 802 (7 8 1 0) - { 11.000000f, 1.000000f, 2.000000f, 0.142857f }, // 803 (7 9 0 0) - { 8.000000f, 8.000000f, 0.000000f, 0.015625f }, // 804 (8 0 0 8) - { 8.111111f, 7.444445f, 0.222222f, 0.016575f }, // 805 (8 0 1 7) - { 8.222222f, 6.888889f, 0.444444f, 0.017717f }, // 806 (8 0 2 6) - { 8.333333f, 6.333333f, 0.666667f, 0.019108f }, // 807 (8 0 3 5) - { 8.444445f, 5.777778f, 0.888889f, 0.020833f }, // 808 (8 0 4 4) - { 8.555555f, 5.222222f, 1.111111f, 0.023018f }, // 809 (8 0 5 3) - { 8.666667f, 4.666667f, 1.333333f, 0.025862f }, // 810 (8 0 6 2) - { 8.777778f, 4.111111f, 1.555556f, 0.029703f }, // 811 (8 0 7 1) - { 8.888889f, 3.555556f, 1.777778f, 0.035156f }, // 812 (8 0 8 0) - { 8.444445f, 7.111111f, 0.222222f, 0.016667f }, // 813 (8 1 0 7) - { 8.555555f, 6.555555f, 0.444444f, 0.017893f }, // 814 (8 1 1 6) - { 8.666667f, 6.000000f, 0.666667f, 0.019397f }, // 815 (8 1 2 5) - { 8.777778f, 5.444445f, 0.888889f, 0.021277f }, // 816 (8 1 3 4) - { 8.888889f, 4.888889f, 1.111111f, 0.023684f }, // 817 (8 1 4 3) - { 9.000000f, 4.333333f, 1.333333f, 0.026866f }, // 818 (8 1 5 2) - { 9.111111f, 3.777778f, 1.555556f, 0.031250f }, // 819 (8 1 6 1) - { 9.222222f, 3.222222f, 1.777778f, 0.037657f }, // 820 (8 1 7 0) - { 8.888889f, 6.222222f, 0.444444f, 0.018145f }, // 821 (8 2 0 6) - { 9.000000f, 5.666667f, 0.666667f, 0.019780f }, // 822 (8 2 1 5) - { 9.111111f, 5.111111f, 0.888889f, 0.021845f }, // 823 (8 2 2 4) - { 9.222222f, 4.555555f, 1.111111f, 0.024523f }, // 824 (8 2 3 3) - { 9.333333f, 4.000000f, 1.333333f, 0.028125f }, // 825 (8 2 4 2) - { 9.444445f, 3.444444f, 1.555556f, 0.033210f }, // 826 (8 2 5 1) - { 9.555555f, 2.888889f, 1.777778f, 0.040909f }, // 827 (8 2 6 0) - { 9.333333f, 5.333333f, 0.666667f, 0.020270f }, // 828 (8 3 0 5) - { 9.444445f, 4.777778f, 0.888889f, 0.022556f }, // 829 (8 3 1 4) - { 9.555555f, 4.222222f, 1.111111f, 0.025568f }, // 830 (8 3 2 3) - { 9.666667f, 3.666667f, 1.333333f, 0.029703f }, // 831 (8 3 3 2) - { 9.777778f, 3.111111f, 1.555556f, 0.035714f }, // 832 (8 3 4 1) - { 9.888889f, 2.555556f, 1.777778f, 0.045226f }, // 833 (8 3 5 0) - { 9.777778f, 4.444445f, 0.888889f, 0.023438f }, // 834 (8 4 0 4) - { 9.888889f, 3.888889f, 1.111111f, 0.026866f }, // 835 (8 4 1 3) - { 10.000000f, 3.333333f, 1.333333f, 0.031690f }, // 836 (8 4 2 2) - { 10.111111f, 2.777778f, 1.555556f, 0.038961f }, // 837 (8 4 3 1) - { 10.222222f, 2.222222f, 1.777778f, 0.051136f }, // 838 (8 4 4 0) - { 10.222222f, 3.555556f, 1.111111f, 0.028481f }, // 839 (8 5 0 3) - { 10.333333f, 3.000000f, 1.333333f, 0.034221f }, // 840 (8 5 1 2) - { 10.444445f, 2.444444f, 1.555556f, 0.043269f }, // 841 (8 5 2 1) - { 10.555555f, 1.888889f, 1.777778f, 0.059603f }, // 842 (8 5 3 0) - { 10.666667f, 2.666667f, 1.333333f, 0.037500f }, // 843 (8 6 0 2) - { 10.777778f, 2.111111f, 1.555556f, 0.049180f }, // 844 (8 6 1 1) - { 10.888889f, 1.555556f, 1.777778f, 0.072581f }, // 845 (8 6 2 0) - { 11.111111f, 1.777778f, 1.555556f, 0.057692f }, // 846 (8 7 0 1) - { 11.222222f, 1.222222f, 1.777778f, 0.094737f }, // 847 (8 7 1 0) - { 11.555555f, 0.888889f, 1.777778f, 0.140625f }, // 848 (8 8 0 0) - { 9.000000f, 7.000000f, 0.000000f, 0.015873f }, // 849 (9 0 0 7) - { 9.111111f, 6.444445f, 0.222222f, 0.017045f }, // 850 (9 0 1 6) - { 9.222222f, 5.888889f, 0.444444f, 0.018480f }, // 851 (9 0 2 5) - { 9.333333f, 5.333333f, 0.666667f, 0.020270f }, // 852 (9 0 3 4) - { 9.444445f, 4.777778f, 0.888889f, 0.022556f }, // 853 (9 0 4 3) - { 9.555555f, 4.222222f, 1.111111f, 0.025568f }, // 854 (9 0 5 2) - { 9.666667f, 3.666667f, 1.333333f, 0.029703f }, // 855 (9 0 6 1) - { 9.777778f, 3.111111f, 1.555556f, 0.035714f }, // 856 (9 0 7 0) - { 9.444445f, 6.111111f, 0.222222f, 0.017341f }, // 857 (9 1 0 6) - { 9.555555f, 5.555555f, 0.444444f, 0.018908f }, // 858 (9 1 1 5) - { 9.666667f, 5.000000f, 0.666667f, 0.020882f }, // 859 (9 1 2 4) - { 9.777778f, 4.444445f, 0.888889f, 0.023438f }, // 860 (9 1 3 3) - { 9.888889f, 3.888889f, 1.111111f, 0.026866f }, // 861 (9 1 4 2) - { 10.000000f, 3.333333f, 1.333333f, 0.031690f }, // 862 (9 1 5 1) - { 10.111111f, 2.777778f, 1.555556f, 0.038961f }, // 863 (9 1 6 0) - { 9.888889f, 5.222222f, 0.444444f, 0.019438f }, // 864 (9 2 0 5) - { 10.000000f, 4.666667f, 0.666667f, 0.021635f }, // 865 (9 2 1 4) - { 10.111111f, 4.111111f, 0.888889f, 0.024523f }, // 866 (9 2 2 3) - { 10.222222f, 3.555556f, 1.111111f, 0.028481f }, // 867 (9 2 3 2) - { 10.333333f, 3.000000f, 1.333333f, 0.034221f }, // 868 (9 2 4 1) - { 10.444445f, 2.444444f, 1.555556f, 0.043269f }, // 869 (9 2 5 0) - { 10.333333f, 4.333333f, 0.666667f, 0.022556f }, // 870 (9 3 0 4) - { 10.444445f, 3.777778f, 0.888889f, 0.025862f }, // 871 (9 3 1 3) - { 10.555555f, 3.222222f, 1.111111f, 0.030508f }, // 872 (9 3 2 2) - { 10.666667f, 2.666667f, 1.333333f, 0.037500f }, // 873 (9 3 3 1) - { 10.777778f, 2.111111f, 1.555556f, 0.049180f }, // 874 (9 3 4 0) - { 10.777778f, 3.444444f, 0.888889f, 0.027523f }, // 875 (9 4 0 3) - { 10.888889f, 2.888889f, 1.111111f, 0.033088f }, // 876 (9 4 1 2) - { 11.000000f, 2.333333f, 1.333333f, 0.041860f }, // 877 (9 4 2 1) - { 11.111111f, 1.777778f, 1.555556f, 0.057692f }, // 878 (9 4 3 0) - { 11.222222f, 2.555556f, 1.111111f, 0.036437f }, // 879 (9 5 0 2) - { 11.333333f, 2.000000f, 1.333333f, 0.047872f }, // 880 (9 5 1 1) - { 11.444445f, 1.444444f, 1.555556f, 0.070866f }, // 881 (9 5 2 0) - { 11.666667f, 1.666667f, 1.333333f, 0.056604f }, // 882 (9 6 0 1) - { 11.777778f, 1.111111f, 1.555556f, 0.093750f }, // 883 (9 6 1 0) - { 12.111111f, 0.777778f, 1.555556f, 0.142857f }, // 884 (9 7 0 0) - { 10.000000f, 6.000000f, 0.000000f, 0.016667f }, // 885 (10 0 0 6) - { 10.111111f, 5.444445f, 0.222222f, 0.018182f }, // 886 (10 0 1 5) - { 10.222222f, 4.888889f, 0.444444f, 0.020089f }, // 887 (10 0 2 4) - { 10.333333f, 4.333333f, 0.666667f, 0.022556f }, // 888 (10 0 3 3) - { 10.444445f, 3.777778f, 0.888889f, 0.025862f }, // 889 (10 0 4 2) - { 10.555555f, 3.222222f, 1.111111f, 0.030508f }, // 890 (10 0 5 1) - { 10.666667f, 2.666667f, 1.333333f, 0.037500f }, // 891 (10 0 6 0) - { 10.444445f, 5.111111f, 0.222222f, 0.018750f }, // 892 (10 1 0 5) - { 10.555555f, 4.555555f, 0.444444f, 0.020882f }, // 893 (10 1 1 4) - { 10.666667f, 4.000000f, 0.666667f, 0.023684f }, // 894 (10 1 2 3) - { 10.777778f, 3.444444f, 0.888889f, 0.027523f }, // 895 (10 1 3 2) - { 10.888889f, 2.888889f, 1.111111f, 0.033088f }, // 896 (10 1 4 1) - { 11.000000f, 2.333333f, 1.333333f, 0.041860f }, // 897 (10 1 5 0) - { 10.888889f, 4.222222f, 0.444444f, 0.021845f }, // 898 (10 2 0 4) - { 11.000000f, 3.666667f, 0.666667f, 0.025070f }, // 899 (10 2 1 3) - { 11.111111f, 3.111111f, 0.888889f, 0.029605f }, // 900 (10 2 2 2) - { 11.222222f, 2.555556f, 1.111111f, 0.036437f }, // 901 (10 2 3 1) - { 11.333333f, 2.000000f, 1.333333f, 0.047872f }, // 902 (10 2 4 0) - { 11.333333f, 3.333333f, 0.666667f, 0.026786f }, // 903 (10 3 0 3) - { 11.444445f, 2.777778f, 0.888889f, 0.032258f }, // 904 (10 3 1 2) - { 11.555555f, 2.222222f, 1.111111f, 0.040909f }, // 905 (10 3 2 1) - { 11.666667f, 1.666667f, 1.333333f, 0.056604f }, // 906 (10 3 3 0) - { 11.777778f, 2.444444f, 0.888889f, 0.035714f }, // 907 (10 4 0 2) - { 11.888889f, 1.888889f, 1.111111f, 0.047120f }, // 908 (10 4 1 1) - { 12.000000f, 1.333333f, 1.333333f, 0.070313f }, // 909 (10 4 2 0) - { 12.222222f, 1.555556f, 1.111111f, 0.056250f }, // 910 (10 5 0 1) - { 12.333333f, 1.000000f, 1.333333f, 0.094737f }, // 911 (10 5 1 0) - { 12.666667f, 0.666667f, 1.333333f, 0.150000f }, // 912 (10 6 0 0) - { 11.000000f, 5.000000f, 0.000000f, 0.018182f }, // 913 (11 0 0 5) - { 11.111111f, 4.444445f, 0.222222f, 0.020270f }, // 914 (11 0 1 4) - { 11.222222f, 3.888889f, 0.444444f, 0.023018f }, // 915 (11 0 2 3) - { 11.333333f, 3.333333f, 0.666667f, 0.026786f }, // 916 (11 0 3 2) - { 11.444445f, 2.777778f, 0.888889f, 0.032258f }, // 917 (11 0 4 1) - { 11.555555f, 2.222222f, 1.111111f, 0.040909f }, // 918 (11 0 5 0) - { 11.444445f, 4.111111f, 0.222222f, 0.021277f }, // 919 (11 1 0 4) - { 11.555555f, 3.555556f, 0.444444f, 0.024457f }, // 920 (11 1 1 3) - { 11.666667f, 3.000000f, 0.666667f, 0.028939f }, // 921 (11 1 2 2) - { 11.777778f, 2.444444f, 0.888889f, 0.035714f }, // 922 (11 1 3 1) - { 11.888889f, 1.888889f, 1.111111f, 0.047120f }, // 923 (11 1 4 0) - { 11.888889f, 3.222222f, 0.444444f, 0.026239f }, // 924 (11 2 0 3) - { 12.000000f, 2.666667f, 0.666667f, 0.031690f }, // 925 (11 2 1 2) - { 12.111111f, 2.111111f, 0.888889f, 0.040359f }, // 926 (11 2 2 1) - { 12.222222f, 1.555556f, 1.111111f, 0.056250f }, // 927 (11 2 3 0) - { 12.333333f, 2.333333f, 0.666667f, 0.035294f }, // 928 (11 3 0 2) - { 12.444445f, 1.777778f, 0.888889f, 0.046875f }, // 929 (11 3 1 1) - { 12.555555f, 1.222222f, 1.111111f, 0.070866f }, // 930 (11 3 2 0) - { 12.777778f, 1.444444f, 0.888889f, 0.056604f }, // 931 (11 4 0 1) - { 12.888889f, 0.888889f, 1.111111f, 0.097826f }, // 932 (11 4 1 0) - { 13.222222f, 0.555556f, 1.111111f, 0.163636f }, // 933 (11 5 0 0) - { 12.000000f, 4.000000f, 0.000000f, 0.020833f }, // 934 (12 0 0 4) - { 12.111111f, 3.444444f, 0.222222f, 0.024000f }, // 935 (12 0 1 3) - { 12.222222f, 2.888889f, 0.444444f, 0.028481f }, // 936 (12 0 2 2) - { 12.333333f, 2.333333f, 0.666667f, 0.035294f }, // 937 (12 0 3 1) - { 12.444445f, 1.777778f, 0.888889f, 0.046875f }, // 938 (12 0 4 0) - { 12.444445f, 3.111111f, 0.222222f, 0.025862f }, // 939 (12 1 0 3) - { 12.555555f, 2.555556f, 0.444444f, 0.031359f }, // 940 (12 1 1 2) - { 12.666667f, 2.000000f, 0.666667f, 0.040179f }, // 941 (12 1 2 1) - { 12.777778f, 1.444444f, 0.888889f, 0.056604f }, // 942 (12 1 3 0) - { 12.888889f, 2.222222f, 0.444444f, 0.035156f }, // 943 (12 2 0 2) - { 13.000000f, 1.666667f, 0.666667f, 0.047120f }, // 944 (12 2 1 1) - { 13.111111f, 1.111111f, 0.888889f, 0.072581f }, // 945 (12 2 2 0) - { 13.333333f, 1.333333f, 0.666667f, 0.057692f }, // 946 (12 3 0 1) - { 13.444445f, 0.777778f, 0.888889f, 0.103448f }, // 947 (12 3 1 0) - { 13.777778f, 0.444444f, 0.888889f, 0.187500f }, // 948 (12 4 0 0) - { 13.000000f, 3.000000f, 0.000000f, 0.025641f }, // 949 (13 0 0 3) - { 13.111111f, 2.444444f, 0.222222f, 0.031250f }, // 950 (13 0 1 2) - { 13.222222f, 1.888889f, 0.444444f, 0.040359f }, // 951 (13 0 2 1) - { 13.333333f, 1.333333f, 0.666667f, 0.057692f }, // 952 (13 0 3 0) - { 13.444445f, 2.111111f, 0.222222f, 0.035294f }, // 953 (13 1 0 2) - { 13.555555f, 1.555556f, 0.444444f, 0.047872f }, // 954 (13 1 1 1) - { 13.666667f, 1.000000f, 0.666667f, 0.075630f }, // 955 (13 1 2 0) - { 13.888889f, 1.222222f, 0.444444f, 0.059603f }, // 956 (13 2 0 1) - { 14.000000f, 0.666667f, 0.666667f, 0.112500f }, // 957 (13 2 1 0) - { 14.333333f, 0.333333f, 0.666667f, 0.230769f }, // 958 (13 3 0 0) - { 14.000000f, 2.000000f, 0.000000f, 0.035714f }, // 959 (14 0 0 2) - { 14.111111f, 1.444444f, 0.222222f, 0.049180f }, // 960 (14 0 1 1) - { 14.222222f, 0.888889f, 0.444444f, 0.080357f }, // 961 (14 0 2 0) - { 14.444445f, 1.111111f, 0.222222f, 0.062500f }, // 962 (14 1 0 1) - { 14.555555f, 0.555556f, 0.444444f, 0.126761f }, // 963 (14 1 1 0) - { 14.888889f, 0.222222f, 0.444444f, 0.321429f }, // 964 (14 2 0 0) - { 15.000000f, 1.000000f, 0.000000f, 0.066667f }, // 965 (15 0 0 1) - { 15.111111f, 0.444444f, 0.222222f, 0.150000f }, // 966 (15 0 1 0) - { 15.444445f, 0.111111f, 0.222222f, 0.600000f }, // 967 (15 1 0 0) - { 16.000000f, 0.000000f, 0.000000f, FLT_MAX }, // 968 (16 0 0 0) -}; // 969 four cluster elements - +struct Precomp { + float alpha2_sum; + float beta2_sum; + float alphabeta_sum; + float factor; +}; + +static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = { + { 0.000000f, 16.000000f, 0.000000f, FLT_MAX }, // 0 (0 0 16) + { 0.250000f, 15.250000f, 0.250000f, 0.266667f }, // 1 (0 1 15) + { 0.500000f, 14.500000f, 0.500000f, 0.142857f }, // 2 (0 2 14) + { 0.750000f, 13.750000f, 0.750000f, 0.102564f }, // 3 (0 3 13) + { 1.000000f, 13.000000f, 1.000000f, 0.083333f }, // 4 (0 4 12) + { 1.250000f, 12.250000f, 1.250000f, 0.072727f }, // 5 (0 5 11) + { 1.500000f, 11.500000f, 1.500000f, 0.066667f }, // 6 (0 6 10) + { 1.750000f, 10.750000f, 1.750000f, 0.063492f }, // 7 (0 7 9) + { 2.000000f, 10.000000f, 2.000000f, 0.062500f }, // 8 (0 8 8) + { 2.250000f, 9.250000f, 2.250000f, 0.063492f }, // 9 (0 9 7) + { 2.500000f, 8.500000f, 2.500000f, 0.066667f }, // 10 (0 10 6) + { 2.750000f, 7.750000f, 2.750000f, 0.072727f }, // 11 (0 11 5) + { 3.000000f, 7.000000f, 3.000000f, 0.083333f }, // 12 (0 12 4) + { 3.250000f, 6.250000f, 3.250000f, 0.102564f }, // 13 (0 13 3) + { 3.500000f, 5.500000f, 3.500000f, 0.142857f }, // 14 (0 14 2) + { 3.750000f, 4.750000f, 3.750000f, 0.266667f }, // 15 (0 15 1) + { 4.000000f, 4.000000f, 4.000000f, FLT_MAX }, // 16 (0 16 0) + { 1.000000f, 15.000000f, 0.000000f, 0.066667f }, // 17 (1 0 15) + { 1.250000f, 14.250000f, 0.250000f, 0.056338f }, // 18 (1 1 14) + { 1.500000f, 13.500000f, 0.500000f, 0.050000f }, // 19 (1 2 13) + { 1.750000f, 12.750000f, 0.750000f, 0.045977f }, // 20 (1 3 12) + { 2.000000f, 12.000000f, 1.000000f, 0.043478f }, // 21 (1 4 11) + { 2.250000f, 11.250000f, 1.250000f, 0.042105f }, // 22 (1 5 10) + { 2.500000f, 10.500000f, 1.500000f, 0.041667f }, // 23 (1 6 9) + { 2.750000f, 9.750000f, 1.750000f, 0.042105f }, // 24 (1 7 8) + { 3.000000f, 9.000000f, 2.000000f, 0.043478f }, // 25 (1 8 7) + { 3.250000f, 8.250000f, 2.250000f, 0.045977f }, // 26 (1 9 6) + { 3.500000f, 7.500000f, 2.500000f, 0.050000f }, // 27 (1 10 5) + { 3.750000f, 6.750000f, 2.750000f, 0.056338f }, // 28 (1 11 4) + { 4.000000f, 6.000000f, 3.000000f, 0.066667f }, // 29 (1 12 3) + { 4.250000f, 5.250000f, 3.250000f, 0.085106f }, // 30 (1 13 2) + { 4.500000f, 4.500000f, 3.500000f, 0.125000f }, // 31 (1 14 1) + { 4.750000f, 3.750000f, 3.750000f, 0.266667f }, // 32 (1 15 0) + { 2.000000f, 14.000000f, 0.000000f, 0.035714f }, // 33 (2 0 14) + { 2.250000f, 13.250000f, 0.250000f, 0.033613f }, // 34 (2 1 13) + { 2.500000f, 12.500000f, 0.500000f, 0.032258f }, // 35 (2 2 12) + { 2.750000f, 11.750000f, 0.750000f, 0.031496f }, // 36 (2 3 11) + { 3.000000f, 11.000000f, 1.000000f, 0.031250f }, // 37 (2 4 10) + { 3.250000f, 10.250000f, 1.250000f, 0.031496f }, // 38 (2 5 9) + { 3.500000f, 9.500000f, 1.500000f, 0.032258f }, // 39 (2 6 8) + { 3.750000f, 8.750000f, 1.750000f, 0.033613f }, // 40 (2 7 7) + { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 41 (2 8 6) + { 4.250000f, 7.250000f, 2.250000f, 0.038835f }, // 42 (2 9 5) + { 4.500000f, 6.500000f, 2.500000f, 0.043478f }, // 43 (2 10 4) + { 4.750000f, 5.750000f, 2.750000f, 0.050633f }, // 44 (2 11 3) + { 5.000000f, 5.000000f, 3.000000f, 0.062500f }, // 45 (2 12 2) + { 5.250000f, 4.250000f, 3.250000f, 0.085106f }, // 46 (2 13 1) + { 5.500000f, 3.500000f, 3.500000f, 0.142857f }, // 47 (2 14 0) + { 3.000000f, 13.000000f, 0.000000f, 0.025641f }, // 48 (3 0 13) + { 3.250000f, 12.250000f, 0.250000f, 0.025157f }, // 49 (3 1 12) + { 3.500000f, 11.500000f, 0.500000f, 0.025000f }, // 50 (3 2 11) + { 3.750000f, 10.750000f, 0.750000f, 0.025157f }, // 51 (3 3 10) + { 4.000000f, 10.000000f, 1.000000f, 0.025641f }, // 52 (3 4 9) + { 4.250000f, 9.250000f, 1.250000f, 0.026490f }, // 53 (3 5 8) + { 4.500000f, 8.500000f, 1.500000f, 0.027778f }, // 54 (3 6 7) + { 4.750000f, 7.750000f, 1.750000f, 0.029630f }, // 55 (3 7 6) + { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 56 (3 8 5) + { 5.250000f, 6.250000f, 2.250000f, 0.036036f }, // 57 (3 9 4) + { 5.500000f, 5.500000f, 2.500000f, 0.041667f }, // 58 (3 10 3) + { 5.750000f, 4.750000f, 2.750000f, 0.050633f }, // 59 (3 11 2) + { 6.000000f, 4.000000f, 3.000000f, 0.066667f }, // 60 (3 12 1) + { 6.250000f, 3.250000f, 3.250000f, 0.102564f }, // 61 (3 13 0) + { 4.000000f, 12.000000f, 0.000000f, 0.020833f }, // 62 (4 0 12) + { 4.250000f, 11.250000f, 0.250000f, 0.020942f }, // 63 (4 1 11) + { 4.500000f, 10.500000f, 0.500000f, 0.021277f }, // 64 (4 2 10) + { 4.750000f, 9.750000f, 0.750000f, 0.021858f }, // 65 (4 3 9) + { 5.000000f, 9.000000f, 1.000000f, 0.022727f }, // 66 (4 4 8) + { 5.250000f, 8.250000f, 1.250000f, 0.023952f }, // 67 (4 5 7) + { 5.500000f, 7.500000f, 1.500000f, 0.025641f }, // 68 (4 6 6) + { 5.750000f, 6.750000f, 1.750000f, 0.027972f }, // 69 (4 7 5) + { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 70 (4 8 4) + { 6.250000f, 5.250000f, 2.250000f, 0.036036f }, // 71 (4 9 3) + { 6.500000f, 4.500000f, 2.500000f, 0.043478f }, // 72 (4 10 2) + { 6.750000f, 3.750000f, 2.750000f, 0.056338f }, // 73 (4 11 1) + { 7.000000f, 3.000000f, 3.000000f, 0.083333f }, // 74 (4 12 0) + { 5.000000f, 11.000000f, 0.000000f, 0.018182f }, // 75 (5 0 11) + { 5.250000f, 10.250000f, 0.250000f, 0.018605f }, // 76 (5 1 10) + { 5.500000f, 9.500000f, 0.500000f, 0.019231f }, // 77 (5 2 9) + { 5.750000f, 8.750000f, 0.750000f, 0.020101f }, // 78 (5 3 8) + { 6.000000f, 8.000000f, 1.000000f, 0.021277f }, // 79 (5 4 7) + { 6.250000f, 7.250000f, 1.250000f, 0.022857f }, // 80 (5 5 6) + { 6.500000f, 6.500000f, 1.500000f, 0.025000f }, // 81 (5 6 5) + { 6.750000f, 5.750000f, 1.750000f, 0.027972f }, // 82 (5 7 4) + { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 83 (5 8 3) + { 7.250000f, 4.250000f, 2.250000f, 0.038835f }, // 84 (5 9 2) + { 7.500000f, 3.500000f, 2.500000f, 0.050000f }, // 85 (5 10 1) + { 7.750000f, 2.750000f, 2.750000f, 0.072727f }, // 86 (5 11 0) + { 6.000000f, 10.000000f, 0.000000f, 0.016667f }, // 87 (6 0 10) + { 6.250000f, 9.250000f, 0.250000f, 0.017316f }, // 88 (6 1 9) + { 6.500000f, 8.500000f, 0.500000f, 0.018182f }, // 89 (6 2 8) + { 6.750000f, 7.750000f, 0.750000f, 0.019324f }, // 90 (6 3 7) + { 7.000000f, 7.000000f, 1.000000f, 0.020833f }, // 91 (6 4 6) + { 7.250000f, 6.250000f, 1.250000f, 0.022857f }, // 92 (6 5 5) + { 7.500000f, 5.500000f, 1.500000f, 0.025641f }, // 93 (6 6 4) + { 7.750000f, 4.750000f, 1.750000f, 0.029630f }, // 94 (6 7 3) + { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 95 (6 8 2) + { 8.250000f, 3.250000f, 2.250000f, 0.045977f }, // 96 (6 9 1) + { 8.500000f, 2.500000f, 2.500000f, 0.066667f }, // 97 (6 10 0) + { 7.000000f, 9.000000f, 0.000000f, 0.015873f }, // 98 (7 0 9) + { 7.250000f, 8.250000f, 0.250000f, 0.016736f }, // 99 (7 1 8) + { 7.500000f, 7.500000f, 0.500000f, 0.017857f }, // 100 (7 2 7) + { 7.750000f, 6.750000f, 0.750000f, 0.019324f }, // 101 (7 3 6) + { 8.000000f, 6.000000f, 1.000000f, 0.021277f }, // 102 (7 4 5) + { 8.250000f, 5.250000f, 1.250000f, 0.023952f }, // 103 (7 5 4) + { 8.500000f, 4.500000f, 1.500000f, 0.027778f }, // 104 (7 6 3) + { 8.750000f, 3.750000f, 1.750000f, 0.033613f }, // 105 (7 7 2) + { 9.000000f, 3.000000f, 2.000000f, 0.043478f }, // 106 (7 8 1) + { 9.250000f, 2.250000f, 2.250000f, 0.063492f }, // 107 (7 9 0) + { 8.000000f, 8.000000f, 0.000000f, 0.015625f }, // 108 (8 0 8) + { 8.250000f, 7.250000f, 0.250000f, 0.016736f }, // 109 (8 1 7) + { 8.500000f, 6.500000f, 0.500000f, 0.018182f }, // 110 (8 2 6) + { 8.750000f, 5.750000f, 0.750000f, 0.020101f }, // 111 (8 3 5) + { 9.000000f, 5.000000f, 1.000000f, 0.022727f }, // 112 (8 4 4) + { 9.250000f, 4.250000f, 1.250000f, 0.026490f }, // 113 (8 5 3) + { 9.500000f, 3.500000f, 1.500000f, 0.032258f }, // 114 (8 6 2) + { 9.750000f, 2.750000f, 1.750000f, 0.042105f }, // 115 (8 7 1) + { 10.000000f, 2.000000f, 2.000000f, 0.062500f }, // 116 (8 8 0) + { 9.000000f, 7.000000f, 0.000000f, 0.015873f }, // 117 (9 0 7) + { 9.250000f, 6.250000f, 0.250000f, 0.017316f }, // 118 (9 1 6) + { 9.500000f, 5.500000f, 0.500000f, 0.019231f }, // 119 (9 2 5) + { 9.750000f, 4.750000f, 0.750000f, 0.021858f }, // 120 (9 3 4) + { 10.000000f, 4.000000f, 1.000000f, 0.025641f }, // 121 (9 4 3) + { 10.250000f, 3.250000f, 1.250000f, 0.031496f }, // 122 (9 5 2) + { 10.500000f, 2.500000f, 1.500000f, 0.041667f }, // 123 (9 6 1) + { 10.750000f, 1.750000f, 1.750000f, 0.063492f }, // 124 (9 7 0) + { 10.000000f, 6.000000f, 0.000000f, 0.016667f }, // 125 (10 0 6) + { 10.250000f, 5.250000f, 0.250000f, 0.018605f }, // 126 (10 1 5) + { 10.500000f, 4.500000f, 0.500000f, 0.021277f }, // 127 (10 2 4) + { 10.750000f, 3.750000f, 0.750000f, 0.025157f }, // 128 (10 3 3) + { 11.000000f, 3.000000f, 1.000000f, 0.031250f }, // 129 (10 4 2) + { 11.250000f, 2.250000f, 1.250000f, 0.042105f }, // 130 (10 5 1) + { 11.500000f, 1.500000f, 1.500000f, 0.066667f }, // 131 (10 6 0) + { 11.000000f, 5.000000f, 0.000000f, 0.018182f }, // 132 (11 0 5) + { 11.250000f, 4.250000f, 0.250000f, 0.020942f }, // 133 (11 1 4) + { 11.500000f, 3.500000f, 0.500000f, 0.025000f }, // 134 (11 2 3) + { 11.750000f, 2.750000f, 0.750000f, 0.031496f }, // 135 (11 3 2) + { 12.000000f, 2.000000f, 1.000000f, 0.043478f }, // 136 (11 4 1) + { 12.250000f, 1.250000f, 1.250000f, 0.072727f }, // 137 (11 5 0) + { 12.000000f, 4.000000f, 0.000000f, 0.020833f }, // 138 (12 0 4) + { 12.250000f, 3.250000f, 0.250000f, 0.025157f }, // 139 (12 1 3) + { 12.500000f, 2.500000f, 0.500000f, 0.032258f }, // 140 (12 2 2) + { 12.750000f, 1.750000f, 0.750000f, 0.045977f }, // 141 (12 3 1) + { 13.000000f, 1.000000f, 1.000000f, 0.083333f }, // 142 (12 4 0) + { 13.000000f, 3.000000f, 0.000000f, 0.025641f }, // 143 (13 0 3) + { 13.250000f, 2.250000f, 0.250000f, 0.033613f }, // 144 (13 1 2) + { 13.500000f, 1.500000f, 0.500000f, 0.050000f }, // 145 (13 2 1) + { 13.750000f, 0.750000f, 0.750000f, 0.102564f }, // 146 (13 3 0) + { 14.000000f, 2.000000f, 0.000000f, 0.035714f }, // 147 (14 0 2) + { 14.250000f, 1.250000f, 0.250000f, 0.056338f }, // 148 (14 1 1) + { 14.500000f, 0.500000f, 0.500000f, 0.142857f }, // 149 (14 2 0) + { 15.000000f, 1.000000f, 0.000000f, 0.066667f }, // 150 (15 0 1) + { 15.250000f, 0.250000f, 0.250000f, 0.266667f }, // 151 (15 1 0) + { 16.000000f, 0.000000f, 0.000000f, FLT_MAX }, // 152 (16 0 0) +}; // 153 three cluster elements + +static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = { + { 0.000000f, 16.000000f, 0.000000f, FLT_MAX }, // 0 (0 0 0 16) + { 0.111111f, 15.444445f, 0.222222f, 0.600000f }, // 1 (0 0 1 15) + { 0.222222f, 14.888889f, 0.444444f, 0.321429f }, // 2 (0 0 2 14) + { 0.333333f, 14.333333f, 0.666667f, 0.230769f }, // 3 (0 0 3 13) + { 0.444444f, 13.777778f, 0.888889f, 0.187500f }, // 4 (0 0 4 12) + { 0.555556f, 13.222222f, 1.111111f, 0.163636f }, // 5 (0 0 5 11) + { 0.666667f, 12.666667f, 1.333333f, 0.150000f }, // 6 (0 0 6 10) + { 0.777778f, 12.111111f, 1.555556f, 0.142857f }, // 7 (0 0 7 9) + { 0.888889f, 11.555555f, 1.777778f, 0.140625f }, // 8 (0 0 8 8) + { 1.000000f, 11.000000f, 2.000000f, 0.142857f }, // 9 (0 0 9 7) + { 1.111111f, 10.444445f, 2.222222f, 0.150000f }, // 10 (0 0 10 6) + { 1.222222f, 9.888889f, 2.444444f, 0.163636f }, // 11 (0 0 11 5) + { 1.333333f, 9.333333f, 2.666667f, 0.187500f }, // 12 (0 0 12 4) + { 1.444444f, 8.777778f, 2.888889f, 0.230769f }, // 13 (0 0 13 3) + { 1.555556f, 8.222222f, 3.111111f, 0.321429f }, // 14 (0 0 14 2) + { 1.666667f, 7.666667f, 3.333333f, 0.600000f }, // 15 (0 0 15 1) + { 1.777778f, 7.111111f, 3.555556f, FLT_MAX }, // 16 (0 0 16 0) + { 0.444444f, 15.111111f, 0.222222f, 0.150000f }, // 17 (0 1 0 15) + { 0.555556f, 14.555555f, 0.444444f, 0.126761f }, // 18 (0 1 1 14) + { 0.666667f, 14.000000f, 0.666667f, 0.112500f }, // 19 (0 1 2 13) + { 0.777778f, 13.444445f, 0.888889f, 0.103448f }, // 20 (0 1 3 12) + { 0.888889f, 12.888889f, 1.111111f, 0.097826f }, // 21 (0 1 4 11) + { 1.000000f, 12.333333f, 1.333333f, 0.094737f }, // 22 (0 1 5 10) + { 1.111111f, 11.777778f, 1.555556f, 0.093750f }, // 23 (0 1 6 9) + { 1.222222f, 11.222222f, 1.777778f, 0.094737f }, // 24 (0 1 7 8) + { 1.333333f, 10.666667f, 2.000000f, 0.097826f }, // 25 (0 1 8 7) + { 1.444444f, 10.111111f, 2.222222f, 0.103448f }, // 26 (0 1 9 6) + { 1.555556f, 9.555555f, 2.444444f, 0.112500f }, // 27 (0 1 10 5) + { 1.666667f, 9.000000f, 2.666667f, 0.126761f }, // 28 (0 1 11 4) + { 1.777778f, 8.444445f, 2.888889f, 0.150000f }, // 29 (0 1 12 3) + { 1.888889f, 7.888889f, 3.111111f, 0.191489f }, // 30 (0 1 13 2) + { 2.000000f, 7.333333f, 3.333333f, 0.281250f }, // 31 (0 1 14 1) + { 2.111111f, 6.777778f, 3.555556f, 0.600000f }, // 32 (0 1 15 0) + { 0.888889f, 14.222222f, 0.444444f, 0.080357f }, // 33 (0 2 0 14) + { 1.000000f, 13.666667f, 0.666667f, 0.075630f }, // 34 (0 2 1 13) + { 1.111111f, 13.111111f, 0.888889f, 0.072581f }, // 35 (0 2 2 12) + { 1.222222f, 12.555555f, 1.111111f, 0.070866f }, // 36 (0 2 3 11) + { 1.333333f, 12.000000f, 1.333333f, 0.070313f }, // 37 (0 2 4 10) + { 1.444444f, 11.444445f, 1.555556f, 0.070866f }, // 38 (0 2 5 9) + { 1.555556f, 10.888889f, 1.777778f, 0.072581f }, // 39 (0 2 6 8) + { 1.666667f, 10.333333f, 2.000000f, 0.075630f }, // 40 (0 2 7 7) + { 1.777778f, 9.777778f, 2.222222f, 0.080357f }, // 41 (0 2 8 6) + { 1.888889f, 9.222222f, 2.444444f, 0.087379f }, // 42 (0 2 9 5) + { 2.000000f, 8.666667f, 2.666667f, 0.097826f }, // 43 (0 2 10 4) + { 2.111111f, 8.111111f, 2.888889f, 0.113924f }, // 44 (0 2 11 3) + { 2.222222f, 7.555556f, 3.111111f, 0.140625f }, // 45 (0 2 12 2) + { 2.333333f, 7.000000f, 3.333333f, 0.191489f }, // 46 (0 2 13 1) + { 2.444444f, 6.444445f, 3.555556f, 0.321429f }, // 47 (0 2 14 0) + { 1.333333f, 13.333333f, 0.666667f, 0.057692f }, // 48 (0 3 0 13) + { 1.444444f, 12.777778f, 0.888889f, 0.056604f }, // 49 (0 3 1 12) + { 1.555556f, 12.222222f, 1.111111f, 0.056250f }, // 50 (0 3 2 11) + { 1.666667f, 11.666667f, 1.333333f, 0.056604f }, // 51 (0 3 3 10) + { 1.777778f, 11.111111f, 1.555556f, 0.057692f }, // 52 (0 3 4 9) + { 1.888889f, 10.555555f, 1.777778f, 0.059603f }, // 53 (0 3 5 8) + { 2.000000f, 10.000000f, 2.000000f, 0.062500f }, // 54 (0 3 6 7) + { 2.111111f, 9.444445f, 2.222222f, 0.066667f }, // 55 (0 3 7 6) + { 2.222222f, 8.888889f, 2.444444f, 0.072581f }, // 56 (0 3 8 5) + { 2.333333f, 8.333333f, 2.666667f, 0.081081f }, // 57 (0 3 9 4) + { 2.444444f, 7.777778f, 2.888889f, 0.093750f }, // 58 (0 3 10 3) + { 2.555556f, 7.222222f, 3.111111f, 0.113924f }, // 59 (0 3 11 2) + { 2.666667f, 6.666667f, 3.333333f, 0.150000f }, // 60 (0 3 12 1) + { 2.777778f, 6.111111f, 3.555556f, 0.230769f }, // 61 (0 3 13 0) + { 1.777778f, 12.444445f, 0.888889f, 0.046875f }, // 62 (0 4 0 12) + { 1.888889f, 11.888889f, 1.111111f, 0.047120f }, // 63 (0 4 1 11) + { 2.000000f, 11.333333f, 1.333333f, 0.047872f }, // 64 (0 4 2 10) + { 2.111111f, 10.777778f, 1.555556f, 0.049180f }, // 65 (0 4 3 9) + { 2.222222f, 10.222222f, 1.777778f, 0.051136f }, // 66 (0 4 4 8) + { 2.333333f, 9.666667f, 2.000000f, 0.053892f }, // 67 (0 4 5 7) + { 2.444444f, 9.111111f, 2.222222f, 0.057692f }, // 68 (0 4 6 6) + { 2.555556f, 8.555555f, 2.444444f, 0.062937f }, // 69 (0 4 7 5) + { 2.666667f, 8.000000f, 2.666667f, 0.070313f }, // 70 (0 4 8 4) + { 2.777778f, 7.444445f, 2.888889f, 0.081081f }, // 71 (0 4 9 3) + { 2.888889f, 6.888889f, 3.111111f, 0.097826f }, // 72 (0 4 10 2) + { 3.000000f, 6.333333f, 3.333333f, 0.126761f }, // 73 (0 4 11 1) + { 3.111111f, 5.777778f, 3.555556f, 0.187500f }, // 74 (0 4 12 0) + { 2.222222f, 11.555555f, 1.111111f, 0.040909f }, // 75 (0 5 0 11) + { 2.333333f, 11.000000f, 1.333333f, 0.041860f }, // 76 (0 5 1 10) + { 2.444444f, 10.444445f, 1.555556f, 0.043269f }, // 77 (0 5 2 9) + { 2.555556f, 9.888889f, 1.777778f, 0.045226f }, // 78 (0 5 3 8) + { 2.666667f, 9.333333f, 2.000000f, 0.047872f }, // 79 (0 5 4 7) + { 2.777778f, 8.777778f, 2.222222f, 0.051429f }, // 80 (0 5 5 6) + { 2.888889f, 8.222222f, 2.444444f, 0.056250f }, // 81 (0 5 6 5) + { 3.000000f, 7.666667f, 2.666667f, 0.062937f }, // 82 (0 5 7 4) + { 3.111111f, 7.111111f, 2.888889f, 0.072581f }, // 83 (0 5 8 3) + { 3.222222f, 6.555556f, 3.111111f, 0.087379f }, // 84 (0 5 9 2) + { 3.333333f, 6.000000f, 3.333333f, 0.112500f }, // 85 (0 5 10 1) + { 3.444444f, 5.444445f, 3.555556f, 0.163636f }, // 86 (0 5 11 0) + { 2.666667f, 10.666667f, 1.333333f, 0.037500f }, // 87 (0 6 0 10) + { 2.777778f, 10.111111f, 1.555556f, 0.038961f }, // 88 (0 6 1 9) + { 2.888889f, 9.555555f, 1.777778f, 0.040909f }, // 89 (0 6 2 8) + { 3.000000f, 9.000000f, 2.000000f, 0.043478f }, // 90 (0 6 3 7) + { 3.111111f, 8.444445f, 2.222222f, 0.046875f }, // 91 (0 6 4 6) + { 3.222222f, 7.888889f, 2.444444f, 0.051429f }, // 92 (0 6 5 5) + { 3.333333f, 7.333333f, 2.666667f, 0.057692f }, // 93 (0 6 6 4) + { 3.444444f, 6.777778f, 2.888889f, 0.066667f }, // 94 (0 6 7 3) + { 3.555556f, 6.222222f, 3.111111f, 0.080357f }, // 95 (0 6 8 2) + { 3.666667f, 5.666667f, 3.333333f, 0.103448f }, // 96 (0 6 9 1) + { 3.777778f, 5.111111f, 3.555556f, 0.150000f }, // 97 (0 6 10 0) + { 3.111111f, 9.777778f, 1.555556f, 0.035714f }, // 98 (0 7 0 9) + { 3.222222f, 9.222222f, 1.777778f, 0.037657f }, // 99 (0 7 1 8) + { 3.333333f, 8.666667f, 2.000000f, 0.040179f }, // 100 (0 7 2 7) + { 3.444444f, 8.111111f, 2.222222f, 0.043478f }, // 101 (0 7 3 6) + { 3.555556f, 7.555555f, 2.444444f, 0.047872f }, // 102 (0 7 4 5) + { 3.666667f, 7.000000f, 2.666667f, 0.053892f }, // 103 (0 7 5 4) + { 3.777778f, 6.444445f, 2.888889f, 0.062500f }, // 104 (0 7 6 3) + { 3.888889f, 5.888889f, 3.111111f, 0.075630f }, // 105 (0 7 7 2) + { 4.000000f, 5.333333f, 3.333333f, 0.097826f }, // 106 (0 7 8 1) + { 4.111111f, 4.777778f, 3.555556f, 0.142857f }, // 107 (0 7 9 0) + { 3.555556f, 8.888889f, 1.777778f, 0.035156f }, // 108 (0 8 0 8) + { 3.666667f, 8.333333f, 2.000000f, 0.037657f }, // 109 (0 8 1 7) + { 3.777778f, 7.777778f, 2.222222f, 0.040909f }, // 110 (0 8 2 6) + { 3.888889f, 7.222222f, 2.444444f, 0.045226f }, // 111 (0 8 3 5) + { 4.000000f, 6.666667f, 2.666667f, 0.051136f }, // 112 (0 8 4 4) + { 4.111111f, 6.111111f, 2.888889f, 0.059603f }, // 113 (0 8 5 3) + { 4.222222f, 5.555555f, 3.111111f, 0.072581f }, // 114 (0 8 6 2) + { 4.333333f, 5.000000f, 3.333333f, 0.094737f }, // 115 (0 8 7 1) + { 4.444445f, 4.444445f, 3.555556f, 0.140625f }, // 116 (0 8 8 0) + { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 117 (0 9 0 7) + { 4.111111f, 7.444445f, 2.222222f, 0.038961f }, // 118 (0 9 1 6) + { 4.222222f, 6.888889f, 2.444444f, 0.043269f }, // 119 (0 9 2 5) + { 4.333333f, 6.333333f, 2.666667f, 0.049180f }, // 120 (0 9 3 4) + { 4.444445f, 5.777778f, 2.888889f, 0.057692f }, // 121 (0 9 4 3) + { 4.555556f, 5.222222f, 3.111111f, 0.070866f }, // 122 (0 9 5 2) + { 4.666667f, 4.666667f, 3.333333f, 0.093750f }, // 123 (0 9 6 1) + { 4.777778f, 4.111111f, 3.555556f, 0.142857f }, // 124 (0 9 7 0) + { 4.444445f, 7.111111f, 2.222222f, 0.037500f }, // 125 (0 10 0 6) + { 4.555556f, 6.555555f, 2.444444f, 0.041860f }, // 126 (0 10 1 5) + { 4.666667f, 6.000000f, 2.666667f, 0.047872f }, // 127 (0 10 2 4) + { 4.777778f, 5.444445f, 2.888889f, 0.056604f }, // 128 (0 10 3 3) + { 4.888889f, 4.888889f, 3.111111f, 0.070313f }, // 129 (0 10 4 2) + { 5.000000f, 4.333333f, 3.333333f, 0.094737f }, // 130 (0 10 5 1) + { 5.111111f, 3.777778f, 3.555556f, 0.150000f }, // 131 (0 10 6 0) + { 4.888889f, 6.222222f, 2.444444f, 0.040909f }, // 132 (0 11 0 5) + { 5.000000f, 5.666667f, 2.666667f, 0.047120f }, // 133 (0 11 1 4) + { 5.111111f, 5.111111f, 2.888889f, 0.056250f }, // 134 (0 11 2 3) + { 5.222222f, 4.555555f, 3.111111f, 0.070866f }, // 135 (0 11 3 2) + { 5.333333f, 4.000000f, 3.333333f, 0.097826f }, // 136 (0 11 4 1) + { 5.444445f, 3.444444f, 3.555556f, 0.163636f }, // 137 (0 11 5 0) + { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 138 (0 12 0 4) + { 5.444445f, 4.777778f, 2.888889f, 0.056604f }, // 139 (0 12 1 3) + { 5.555556f, 4.222222f, 3.111111f, 0.072581f }, // 140 (0 12 2 2) + { 5.666667f, 3.666667f, 3.333333f, 0.103448f }, // 141 (0 12 3 1) + { 5.777778f, 3.111111f, 3.555556f, 0.187500f }, // 142 (0 12 4 0) + { 5.777778f, 4.444445f, 2.888889f, 0.057692f }, // 143 (0 13 0 3) + { 5.888889f, 3.888889f, 3.111111f, 0.075630f }, // 144 (0 13 1 2) + { 6.000000f, 3.333333f, 3.333333f, 0.112500f }, // 145 (0 13 2 1) + { 6.111111f, 2.777778f, 3.555556f, 0.230769f }, // 146 (0 13 3 0) + { 6.222222f, 3.555556f, 3.111111f, 0.080357f }, // 147 (0 14 0 2) + { 6.333333f, 3.000000f, 3.333333f, 0.126761f }, // 148 (0 14 1 1) + { 6.444445f, 2.444444f, 3.555556f, 0.321429f }, // 149 (0 14 2 0) + { 6.666667f, 2.666667f, 3.333333f, 0.150000f }, // 150 (0 15 0 1) + { 6.777778f, 2.111111f, 3.555556f, 0.600000f }, // 151 (0 15 1 0) + { 7.111111f, 1.777778f, 3.555556f, FLT_MAX }, // 152 (0 16 0 0) + { 1.000000f, 15.000000f, 0.000000f, 0.066667f }, // 153 (1 0 0 15) + { 1.111111f, 14.444445f, 0.222222f, 0.062500f }, // 154 (1 0 1 14) + { 1.222222f, 13.888889f, 0.444444f, 0.059603f }, // 155 (1 0 2 13) + { 1.333333f, 13.333333f, 0.666667f, 0.057692f }, // 156 (1 0 3 12) + { 1.444444f, 12.777778f, 0.888889f, 0.056604f }, // 157 (1 0 4 11) + { 1.555556f, 12.222222f, 1.111111f, 0.056250f }, // 158 (1 0 5 10) + { 1.666667f, 11.666667f, 1.333333f, 0.056604f }, // 159 (1 0 6 9) + { 1.777778f, 11.111111f, 1.555556f, 0.057692f }, // 160 (1 0 7 8) + { 1.888889f, 10.555555f, 1.777778f, 0.059603f }, // 161 (1 0 8 7) + { 2.000000f, 10.000000f, 2.000000f, 0.062500f }, // 162 (1 0 9 6) + { 2.111111f, 9.444445f, 2.222222f, 0.066667f }, // 163 (1 0 10 5) + { 2.222222f, 8.888889f, 2.444444f, 0.072581f }, // 164 (1 0 11 4) + { 2.333333f, 8.333333f, 2.666667f, 0.081081f }, // 165 (1 0 12 3) + { 2.444444f, 7.777778f, 2.888889f, 0.093750f }, // 166 (1 0 13 2) + { 2.555556f, 7.222222f, 3.111111f, 0.113924f }, // 167 (1 0 14 1) + { 2.666667f, 6.666667f, 3.333333f, 0.150000f }, // 168 (1 0 15 0) + { 1.444444f, 14.111111f, 0.222222f, 0.049180f }, // 169 (1 1 0 14) + { 1.555556f, 13.555555f, 0.444444f, 0.047872f }, // 170 (1 1 1 13) + { 1.666667f, 13.000000f, 0.666667f, 0.047120f }, // 171 (1 1 2 12) + { 1.777778f, 12.444445f, 0.888889f, 0.046875f }, // 172 (1 1 3 11) + { 1.888889f, 11.888889f, 1.111111f, 0.047120f }, // 173 (1 1 4 10) + { 2.000000f, 11.333333f, 1.333333f, 0.047872f }, // 174 (1 1 5 9) + { 2.111111f, 10.777778f, 1.555556f, 0.049180f }, // 175 (1 1 6 8) + { 2.222222f, 10.222222f, 1.777778f, 0.051136f }, // 176 (1 1 7 7) + { 2.333333f, 9.666667f, 2.000000f, 0.053892f }, // 177 (1 1 8 6) + { 2.444444f, 9.111111f, 2.222222f, 0.057692f }, // 178 (1 1 9 5) + { 2.555556f, 8.555555f, 2.444444f, 0.062937f }, // 179 (1 1 10 4) + { 2.666667f, 8.000000f, 2.666667f, 0.070313f }, // 180 (1 1 11 3) + { 2.777778f, 7.444445f, 2.888889f, 0.081081f }, // 181 (1 1 12 2) + { 2.888889f, 6.888889f, 3.111111f, 0.097826f }, // 182 (1 1 13 1) + { 3.000000f, 6.333333f, 3.333333f, 0.126761f }, // 183 (1 1 14 0) + { 1.888889f, 13.222222f, 0.444444f, 0.040359f }, // 184 (1 2 0 13) + { 2.000000f, 12.666667f, 0.666667f, 0.040179f }, // 185 (1 2 1 12) + { 2.111111f, 12.111111f, 0.888889f, 0.040359f }, // 186 (1 2 2 11) + { 2.222222f, 11.555555f, 1.111111f, 0.040909f }, // 187 (1 2 3 10) + { 2.333333f, 11.000000f, 1.333333f, 0.041860f }, // 188 (1 2 4 9) + { 2.444444f, 10.444445f, 1.555556f, 0.043269f }, // 189 (1 2 5 8) + { 2.555556f, 9.888889f, 1.777778f, 0.045226f }, // 190 (1 2 6 7) + { 2.666667f, 9.333333f, 2.000000f, 0.047872f }, // 191 (1 2 7 6) + { 2.777778f, 8.777778f, 2.222222f, 0.051429f }, // 192 (1 2 8 5) + { 2.888889f, 8.222222f, 2.444444f, 0.056250f }, // 193 (1 2 9 4) + { 3.000000f, 7.666667f, 2.666667f, 0.062937f }, // 194 (1 2 10 3) + { 3.111111f, 7.111111f, 2.888889f, 0.072581f }, // 195 (1 2 11 2) + { 3.222222f, 6.555556f, 3.111111f, 0.087379f }, // 196 (1 2 12 1) + { 3.333333f, 6.000000f, 3.333333f, 0.112500f }, // 197 (1 2 13 0) + { 2.333333f, 12.333333f, 0.666667f, 0.035294f }, // 198 (1 3 0 12) + { 2.444444f, 11.777778f, 0.888889f, 0.035714f }, // 199 (1 3 1 11) + { 2.555556f, 11.222222f, 1.111111f, 0.036437f }, // 200 (1 3 2 10) + { 2.666667f, 10.666667f, 1.333333f, 0.037500f }, // 201 (1 3 3 9) + { 2.777778f, 10.111111f, 1.555556f, 0.038961f }, // 202 (1 3 4 8) + { 2.888889f, 9.555555f, 1.777778f, 0.040909f }, // 203 (1 3 5 7) + { 3.000000f, 9.000000f, 2.000000f, 0.043478f }, // 204 (1 3 6 6) + { 3.111111f, 8.444445f, 2.222222f, 0.046875f }, // 205 (1 3 7 5) + { 3.222222f, 7.888889f, 2.444444f, 0.051429f }, // 206 (1 3 8 4) + { 3.333333f, 7.333333f, 2.666667f, 0.057692f }, // 207 (1 3 9 3) + { 3.444444f, 6.777778f, 2.888889f, 0.066667f }, // 208 (1 3 10 2) + { 3.555556f, 6.222222f, 3.111111f, 0.080357f }, // 209 (1 3 11 1) + { 3.666667f, 5.666667f, 3.333333f, 0.103448f }, // 210 (1 3 12 0) + { 2.777778f, 11.444445f, 0.888889f, 0.032258f }, // 211 (1 4 0 11) + { 2.888889f, 10.888889f, 1.111111f, 0.033088f }, // 212 (1 4 1 10) + { 3.000000f, 10.333333f, 1.333333f, 0.034221f }, // 213 (1 4 2 9) + { 3.111111f, 9.777778f, 1.555556f, 0.035714f }, // 214 (1 4 3 8) + { 3.222222f, 9.222222f, 1.777778f, 0.037657f }, // 215 (1 4 4 7) + { 3.333333f, 8.666667f, 2.000000f, 0.040179f }, // 216 (1 4 5 6) + { 3.444444f, 8.111111f, 2.222222f, 0.043478f }, // 217 (1 4 6 5) + { 3.555556f, 7.555555f, 2.444444f, 0.047872f }, // 218 (1 4 7 4) + { 3.666667f, 7.000000f, 2.666667f, 0.053892f }, // 219 (1 4 8 3) + { 3.777778f, 6.444445f, 2.888889f, 0.062500f }, // 220 (1 4 9 2) + { 3.888889f, 5.888889f, 3.111111f, 0.075630f }, // 221 (1 4 10 1) + { 4.000000f, 5.333333f, 3.333333f, 0.097826f }, // 222 (1 4 11 0) + { 3.222222f, 10.555555f, 1.111111f, 0.030508f }, // 223 (1 5 0 10) + { 3.333333f, 10.000000f, 1.333333f, 0.031690f }, // 224 (1 5 1 9) + { 3.444444f, 9.444445f, 1.555556f, 0.033210f }, // 225 (1 5 2 8) + { 3.555556f, 8.888889f, 1.777778f, 0.035156f }, // 226 (1 5 3 7) + { 3.666667f, 8.333333f, 2.000000f, 0.037657f }, // 227 (1 5 4 6) + { 3.777778f, 7.777778f, 2.222222f, 0.040909f }, // 228 (1 5 5 5) + { 3.888889f, 7.222222f, 2.444444f, 0.045226f }, // 229 (1 5 6 4) + { 4.000000f, 6.666667f, 2.666667f, 0.051136f }, // 230 (1 5 7 3) + { 4.111111f, 6.111111f, 2.888889f, 0.059603f }, // 231 (1 5 8 2) + { 4.222222f, 5.555556f, 3.111111f, 0.072581f }, // 232 (1 5 9 1) + { 4.333333f, 5.000000f, 3.333333f, 0.094737f }, // 233 (1 5 10 0) + { 3.666667f, 9.666667f, 1.333333f, 0.029703f }, // 234 (1 6 0 9) + { 3.777778f, 9.111111f, 1.555556f, 0.031250f }, // 235 (1 6 1 8) + { 3.888889f, 8.555555f, 1.777778f, 0.033210f }, // 236 (1 6 2 7) + { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 237 (1 6 3 6) + { 4.111111f, 7.444445f, 2.222222f, 0.038961f }, // 238 (1 6 4 5) + { 4.222222f, 6.888889f, 2.444444f, 0.043269f }, // 239 (1 6 5 4) + { 4.333333f, 6.333333f, 2.666667f, 0.049180f }, // 240 (1 6 6 3) + { 4.444445f, 5.777778f, 2.888889f, 0.057692f }, // 241 (1 6 7 2) + { 4.555555f, 5.222222f, 3.111111f, 0.070866f }, // 242 (1 6 8 1) + { 4.666667f, 4.666667f, 3.333333f, 0.093750f }, // 243 (1 6 9 0) + { 4.111111f, 8.777778f, 1.555556f, 0.029703f }, // 244 (1 7 0 8) + { 4.222222f, 8.222222f, 1.777778f, 0.031690f }, // 245 (1 7 1 7) + { 4.333333f, 7.666667f, 2.000000f, 0.034221f }, // 246 (1 7 2 6) + { 4.444445f, 7.111111f, 2.222222f, 0.037500f }, // 247 (1 7 3 5) + { 4.555555f, 6.555555f, 2.444444f, 0.041860f }, // 248 (1 7 4 4) + { 4.666667f, 6.000000f, 2.666667f, 0.047872f }, // 249 (1 7 5 3) + { 4.777778f, 5.444445f, 2.888889f, 0.056604f }, // 250 (1 7 6 2) + { 4.888889f, 4.888889f, 3.111111f, 0.070313f }, // 251 (1 7 7 1) + { 5.000000f, 4.333333f, 3.333333f, 0.094737f }, // 252 (1 7 8 0) + { 4.555555f, 7.888889f, 1.777778f, 0.030508f }, // 253 (1 8 0 7) + { 4.666667f, 7.333333f, 2.000000f, 0.033088f }, // 254 (1 8 1 6) + { 4.777778f, 6.777778f, 2.222222f, 0.036437f }, // 255 (1 8 2 5) + { 4.888889f, 6.222222f, 2.444444f, 0.040909f }, // 256 (1 8 3 4) + { 5.000000f, 5.666667f, 2.666667f, 0.047120f }, // 257 (1 8 4 3) + { 5.111111f, 5.111111f, 2.888889f, 0.056250f }, // 258 (1 8 5 2) + { 5.222222f, 4.555555f, 3.111111f, 0.070866f }, // 259 (1 8 6 1) + { 5.333333f, 4.000000f, 3.333333f, 0.097826f }, // 260 (1 8 7 0) + { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 261 (1 9 0 6) + { 5.111111f, 6.444445f, 2.222222f, 0.035714f }, // 262 (1 9 1 5) + { 5.222222f, 5.888889f, 2.444444f, 0.040359f }, // 263 (1 9 2 4) + { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 264 (1 9 3 3) + { 5.444445f, 4.777778f, 2.888889f, 0.056604f }, // 265 (1 9 4 2) + { 5.555556f, 4.222222f, 3.111111f, 0.072581f }, // 266 (1 9 5 1) + { 5.666667f, 3.666667f, 3.333333f, 0.103448f }, // 267 (1 9 6 0) + { 5.444445f, 6.111111f, 2.222222f, 0.035294f }, // 268 (1 10 0 5) + { 5.555556f, 5.555555f, 2.444444f, 0.040179f }, // 269 (1 10 1 4) + { 5.666667f, 5.000000f, 2.666667f, 0.047120f }, // 270 (1 10 2 3) + { 5.777778f, 4.444445f, 2.888889f, 0.057692f }, // 271 (1 10 3 2) + { 5.888889f, 3.888889f, 3.111111f, 0.075630f }, // 272 (1 10 4 1) + { 6.000000f, 3.333333f, 3.333333f, 0.112500f }, // 273 (1 10 5 0) + { 5.888889f, 5.222222f, 2.444444f, 0.040359f }, // 274 (1 11 0 4) + { 6.000000f, 4.666667f, 2.666667f, 0.047872f }, // 275 (1 11 1 3) + { 6.111111f, 4.111111f, 2.888889f, 0.059603f }, // 276 (1 11 2 2) + { 6.222222f, 3.555556f, 3.111111f, 0.080357f }, // 277 (1 11 3 1) + { 6.333333f, 3.000000f, 3.333333f, 0.126761f }, // 278 (1 11 4 0) + { 6.333333f, 4.333333f, 2.666667f, 0.049180f }, // 279 (1 12 0 3) + { 6.444445f, 3.777778f, 2.888889f, 0.062500f }, // 280 (1 12 1 2) + { 6.555556f, 3.222222f, 3.111111f, 0.087379f }, // 281 (1 12 2 1) + { 6.666667f, 2.666667f, 3.333333f, 0.150000f }, // 282 (1 12 3 0) + { 6.777778f, 3.444444f, 2.888889f, 0.066667f }, // 283 (1 13 0 2) + { 6.888889f, 2.888889f, 3.111111f, 0.097826f }, // 284 (1 13 1 1) + { 7.000000f, 2.333333f, 3.333333f, 0.191489f }, // 285 (1 13 2 0) + { 7.222222f, 2.555556f, 3.111111f, 0.113924f }, // 286 (1 14 0 1) + { 7.333333f, 2.000000f, 3.333333f, 0.281250f }, // 287 (1 14 1 0) + { 7.666667f, 1.666667f, 3.333333f, 0.600000f }, // 288 (1 15 0 0) + { 2.000000f, 14.000000f, 0.000000f, 0.035714f }, // 289 (2 0 0 14) + { 2.111111f, 13.444445f, 0.222222f, 0.035294f }, // 290 (2 0 1 13) + { 2.222222f, 12.888889f, 0.444444f, 0.035156f }, // 291 (2 0 2 12) + { 2.333333f, 12.333333f, 0.666667f, 0.035294f }, // 292 (2 0 3 11) + { 2.444444f, 11.777778f, 0.888889f, 0.035714f }, // 293 (2 0 4 10) + { 2.555556f, 11.222222f, 1.111111f, 0.036437f }, // 294 (2 0 5 9) + { 2.666667f, 10.666667f, 1.333333f, 0.037500f }, // 295 (2 0 6 8) + { 2.777778f, 10.111111f, 1.555556f, 0.038961f }, // 296 (2 0 7 7) + { 2.888889f, 9.555555f, 1.777778f, 0.040909f }, // 297 (2 0 8 6) + { 3.000000f, 9.000000f, 2.000000f, 0.043478f }, // 298 (2 0 9 5) + { 3.111111f, 8.444445f, 2.222222f, 0.046875f }, // 299 (2 0 10 4) + { 3.222222f, 7.888889f, 2.444444f, 0.051429f }, // 300 (2 0 11 3) + { 3.333333f, 7.333333f, 2.666667f, 0.057692f }, // 301 (2 0 12 2) + { 3.444444f, 6.777778f, 2.888889f, 0.066667f }, // 302 (2 0 13 1) + { 3.555556f, 6.222222f, 3.111111f, 0.080357f }, // 303 (2 0 14 0) + { 2.444444f, 13.111111f, 0.222222f, 0.031250f }, // 304 (2 1 0 13) + { 2.555556f, 12.555555f, 0.444444f, 0.031359f }, // 305 (2 1 1 12) + { 2.666667f, 12.000000f, 0.666667f, 0.031690f }, // 306 (2 1 2 11) + { 2.777778f, 11.444445f, 0.888889f, 0.032258f }, // 307 (2 1 3 10) + { 2.888889f, 10.888889f, 1.111111f, 0.033088f }, // 308 (2 1 4 9) + { 3.000000f, 10.333333f, 1.333333f, 0.034221f }, // 309 (2 1 5 8) + { 3.111111f, 9.777778f, 1.555556f, 0.035714f }, // 310 (2 1 6 7) + { 3.222222f, 9.222222f, 1.777778f, 0.037657f }, // 311 (2 1 7 6) + { 3.333333f, 8.666667f, 2.000000f, 0.040179f }, // 312 (2 1 8 5) + { 3.444444f, 8.111111f, 2.222222f, 0.043478f }, // 313 (2 1 9 4) + { 3.555556f, 7.555556f, 2.444444f, 0.047872f }, // 314 (2 1 10 3) + { 3.666667f, 7.000000f, 2.666667f, 0.053892f }, // 315 (2 1 11 2) + { 3.777778f, 6.444445f, 2.888889f, 0.062500f }, // 316 (2 1 12 1) + { 3.888889f, 5.888889f, 3.111111f, 0.075630f }, // 317 (2 1 13 0) + { 2.888889f, 12.222222f, 0.444444f, 0.028481f }, // 318 (2 2 0 12) + { 3.000000f, 11.666667f, 0.666667f, 0.028939f }, // 319 (2 2 1 11) + { 3.111111f, 11.111111f, 0.888889f, 0.029605f }, // 320 (2 2 2 10) + { 3.222222f, 10.555555f, 1.111111f, 0.030508f }, // 321 (2 2 3 9) + { 3.333333f, 10.000000f, 1.333333f, 0.031690f }, // 322 (2 2 4 8) + { 3.444444f, 9.444445f, 1.555556f, 0.033210f }, // 323 (2 2 5 7) + { 3.555556f, 8.888889f, 1.777778f, 0.035156f }, // 324 (2 2 6 6) + { 3.666667f, 8.333333f, 2.000000f, 0.037657f }, // 325 (2 2 7 5) + { 3.777778f, 7.777778f, 2.222222f, 0.040909f }, // 326 (2 2 8 4) + { 3.888889f, 7.222222f, 2.444444f, 0.045226f }, // 327 (2 2 9 3) + { 4.000000f, 6.666667f, 2.666667f, 0.051136f }, // 328 (2 2 10 2) + { 4.111111f, 6.111111f, 2.888889f, 0.059603f }, // 329 (2 2 11 1) + { 4.222222f, 5.555556f, 3.111111f, 0.072581f }, // 330 (2 2 12 0) + { 3.333333f, 11.333333f, 0.666667f, 0.026786f }, // 331 (2 3 0 11) + { 3.444444f, 10.777778f, 0.888889f, 0.027523f }, // 332 (2 3 1 10) + { 3.555556f, 10.222222f, 1.111111f, 0.028481f }, // 333 (2 3 2 9) + { 3.666667f, 9.666667f, 1.333333f, 0.029703f }, // 334 (2 3 3 8) + { 3.777778f, 9.111111f, 1.555556f, 0.031250f }, // 335 (2 3 4 7) + { 3.888889f, 8.555555f, 1.777778f, 0.033210f }, // 336 (2 3 5 6) + { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 337 (2 3 6 5) + { 4.111111f, 7.444445f, 2.222222f, 0.038961f }, // 338 (2 3 7 4) + { 4.222222f, 6.888889f, 2.444444f, 0.043269f }, // 339 (2 3 8 3) + { 4.333333f, 6.333333f, 2.666667f, 0.049180f }, // 340 (2 3 9 2) + { 4.444445f, 5.777778f, 2.888889f, 0.057692f }, // 341 (2 3 10 1) + { 4.555555f, 5.222222f, 3.111111f, 0.070866f }, // 342 (2 3 11 0) + { 3.777778f, 10.444445f, 0.888889f, 0.025862f }, // 343 (2 4 0 10) + { 3.888889f, 9.888889f, 1.111111f, 0.026866f }, // 344 (2 4 1 9) + { 4.000000f, 9.333333f, 1.333333f, 0.028125f }, // 345 (2 4 2 8) + { 4.111111f, 8.777778f, 1.555556f, 0.029703f }, // 346 (2 4 3 7) + { 4.222222f, 8.222222f, 1.777778f, 0.031690f }, // 347 (2 4 4 6) + { 4.333333f, 7.666667f, 2.000000f, 0.034221f }, // 348 (2 4 5 5) + { 4.444445f, 7.111111f, 2.222222f, 0.037500f }, // 349 (2 4 6 4) + { 4.555555f, 6.555555f, 2.444444f, 0.041860f }, // 350 (2 4 7 3) + { 4.666667f, 6.000000f, 2.666667f, 0.047872f }, // 351 (2 4 8 2) + { 4.777778f, 5.444445f, 2.888889f, 0.056604f }, // 352 (2 4 9 1) + { 4.888889f, 4.888889f, 3.111111f, 0.070313f }, // 353 (2 4 10 0) + { 4.222222f, 9.555555f, 1.111111f, 0.025568f }, // 354 (2 5 0 9) + { 4.333333f, 9.000000f, 1.333333f, 0.026866f }, // 355 (2 5 1 8) + { 4.444445f, 8.444445f, 1.555556f, 0.028481f }, // 356 (2 5 2 7) + { 4.555555f, 7.888889f, 1.777778f, 0.030508f }, // 357 (2 5 3 6) + { 4.666667f, 7.333333f, 2.000000f, 0.033088f }, // 358 (2 5 4 5) + { 4.777778f, 6.777778f, 2.222222f, 0.036437f }, // 359 (2 5 5 4) + { 4.888889f, 6.222222f, 2.444444f, 0.040909f }, // 360 (2 5 6 3) + { 5.000000f, 5.666667f, 2.666667f, 0.047120f }, // 361 (2 5 7 2) + { 5.111111f, 5.111111f, 2.888889f, 0.056250f }, // 362 (2 5 8 1) + { 5.222222f, 4.555556f, 3.111111f, 0.070866f }, // 363 (2 5 9 0) + { 4.666667f, 8.666667f, 1.333333f, 0.025862f }, // 364 (2 6 0 8) + { 4.777778f, 8.111111f, 1.555556f, 0.027523f }, // 365 (2 6 1 7) + { 4.888889f, 7.555555f, 1.777778f, 0.029605f }, // 366 (2 6 2 6) + { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 367 (2 6 3 5) + { 5.111111f, 6.444445f, 2.222222f, 0.035714f }, // 368 (2 6 4 4) + { 5.222222f, 5.888889f, 2.444444f, 0.040359f }, // 369 (2 6 5 3) + { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 370 (2 6 6 2) + { 5.444445f, 4.777778f, 2.888889f, 0.056604f }, // 371 (2 6 7 1) + { 5.555555f, 4.222222f, 3.111111f, 0.072581f }, // 372 (2 6 8 0) + { 5.111111f, 7.777778f, 1.555556f, 0.026786f }, // 373 (2 7 0 7) + { 5.222222f, 7.222222f, 1.777778f, 0.028939f }, // 374 (2 7 1 6) + { 5.333333f, 6.666667f, 2.000000f, 0.031690f }, // 375 (2 7 2 5) + { 5.444445f, 6.111111f, 2.222222f, 0.035294f }, // 376 (2 7 3 4) + { 5.555555f, 5.555555f, 2.444444f, 0.040179f }, // 377 (2 7 4 3) + { 5.666667f, 5.000000f, 2.666667f, 0.047120f }, // 378 (2 7 5 2) + { 5.777778f, 4.444445f, 2.888889f, 0.057692f }, // 379 (2 7 6 1) + { 5.888889f, 3.888889f, 3.111111f, 0.075630f }, // 380 (2 7 7 0) + { 5.555555f, 6.888889f, 1.777778f, 0.028481f }, // 381 (2 8 0 6) + { 5.666667f, 6.333333f, 2.000000f, 0.031359f }, // 382 (2 8 1 5) + { 5.777778f, 5.777778f, 2.222222f, 0.035156f }, // 383 (2 8 2 4) + { 5.888889f, 5.222222f, 2.444444f, 0.040359f }, // 384 (2 8 3 3) + { 6.000000f, 4.666667f, 2.666667f, 0.047872f }, // 385 (2 8 4 2) + { 6.111111f, 4.111111f, 2.888889f, 0.059603f }, // 386 (2 8 5 1) + { 6.222222f, 3.555556f, 3.111111f, 0.080357f }, // 387 (2 8 6 0) + { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 388 (2 9 0 5) + { 6.111111f, 5.444445f, 2.222222f, 0.035294f }, // 389 (2 9 1 4) + { 6.222222f, 4.888889f, 2.444444f, 0.040909f }, // 390 (2 9 2 3) + { 6.333333f, 4.333333f, 2.666667f, 0.049180f }, // 391 (2 9 3 2) + { 6.444445f, 3.777778f, 2.888889f, 0.062500f }, // 392 (2 9 4 1) + { 6.555556f, 3.222222f, 3.111111f, 0.087379f }, // 393 (2 9 5 0) + { 6.444445f, 5.111111f, 2.222222f, 0.035714f }, // 394 (2 10 0 4) + { 6.555556f, 4.555555f, 2.444444f, 0.041860f }, // 395 (2 10 1 3) + { 6.666667f, 4.000000f, 2.666667f, 0.051136f }, // 396 (2 10 2 2) + { 6.777778f, 3.444444f, 2.888889f, 0.066667f }, // 397 (2 10 3 1) + { 6.888889f, 2.888889f, 3.111111f, 0.097826f }, // 398 (2 10 4 0) + { 6.888889f, 4.222222f, 2.444444f, 0.043269f }, // 399 (2 11 0 3) + { 7.000000f, 3.666667f, 2.666667f, 0.053892f }, // 400 (2 11 1 2) + { 7.111111f, 3.111111f, 2.888889f, 0.072581f }, // 401 (2 11 2 1) + { 7.222222f, 2.555556f, 3.111111f, 0.113924f }, // 402 (2 11 3 0) + { 7.333333f, 3.333333f, 2.666667f, 0.057692f }, // 403 (2 12 0 2) + { 7.444445f, 2.777778f, 2.888889f, 0.081081f }, // 404 (2 12 1 1) + { 7.555556f, 2.222222f, 3.111111f, 0.140625f }, // 405 (2 12 2 0) + { 7.777778f, 2.444444f, 2.888889f, 0.093750f }, // 406 (2 13 0 1) + { 7.888889f, 1.888889f, 3.111111f, 0.191489f }, // 407 (2 13 1 0) + { 8.222222f, 1.555556f, 3.111111f, 0.321429f }, // 408 (2 14 0 0) + { 3.000000f, 13.000000f, 0.000000f, 0.025641f }, // 409 (3 0 0 13) + { 3.111111f, 12.444445f, 0.222222f, 0.025862f }, // 410 (3 0 1 12) + { 3.222222f, 11.888889f, 0.444444f, 0.026239f }, // 411 (3 0 2 11) + { 3.333333f, 11.333333f, 0.666667f, 0.026786f }, // 412 (3 0 3 10) + { 3.444444f, 10.777778f, 0.888889f, 0.027523f }, // 413 (3 0 4 9) + { 3.555556f, 10.222222f, 1.111111f, 0.028481f }, // 414 (3 0 5 8) + { 3.666667f, 9.666667f, 1.333333f, 0.029703f }, // 415 (3 0 6 7) + { 3.777778f, 9.111111f, 1.555556f, 0.031250f }, // 416 (3 0 7 6) + { 3.888889f, 8.555555f, 1.777778f, 0.033210f }, // 417 (3 0 8 5) + { 4.000000f, 8.000000f, 2.000000f, 0.035714f }, // 418 (3 0 9 4) + { 4.111111f, 7.444445f, 2.222222f, 0.038961f }, // 419 (3 0 10 3) + { 4.222222f, 6.888889f, 2.444444f, 0.043269f }, // 420 (3 0 11 2) + { 4.333333f, 6.333333f, 2.666667f, 0.049180f }, // 421 (3 0 12 1) + { 4.444445f, 5.777778f, 2.888889f, 0.057692f }, // 422 (3 0 13 0) + { 3.444444f, 12.111111f, 0.222222f, 0.024000f }, // 423 (3 1 0 12) + { 3.555556f, 11.555555f, 0.444444f, 0.024457f }, // 424 (3 1 1 11) + { 3.666667f, 11.000000f, 0.666667f, 0.025070f }, // 425 (3 1 2 10) + { 3.777778f, 10.444445f, 0.888889f, 0.025862f }, // 426 (3 1 3 9) + { 3.888889f, 9.888889f, 1.111111f, 0.026866f }, // 427 (3 1 4 8) + { 4.000000f, 9.333333f, 1.333333f, 0.028125f }, // 428 (3 1 5 7) + { 4.111111f, 8.777778f, 1.555556f, 0.029703f }, // 429 (3 1 6 6) + { 4.222222f, 8.222222f, 1.777778f, 0.031690f }, // 430 (3 1 7 5) + { 4.333333f, 7.666667f, 2.000000f, 0.034221f }, // 431 (3 1 8 4) + { 4.444445f, 7.111111f, 2.222222f, 0.037500f }, // 432 (3 1 9 3) + { 4.555555f, 6.555556f, 2.444444f, 0.041860f }, // 433 (3 1 10 2) + { 4.666667f, 6.000000f, 2.666667f, 0.047872f }, // 434 (3 1 11 1) + { 4.777778f, 5.444445f, 2.888889f, 0.056604f }, // 435 (3 1 12 0) + { 3.888889f, 11.222222f, 0.444444f, 0.023018f }, // 436 (3 2 0 11) + { 4.000000f, 10.666667f, 0.666667f, 0.023684f }, // 437 (3 2 1 10) + { 4.111111f, 10.111111f, 0.888889f, 0.024523f }, // 438 (3 2 2 9) + { 4.222222f, 9.555555f, 1.111111f, 0.025568f }, // 439 (3 2 3 8) + { 4.333333f, 9.000000f, 1.333333f, 0.026866f }, // 440 (3 2 4 7) + { 4.444445f, 8.444445f, 1.555556f, 0.028481f }, // 441 (3 2 5 6) + { 4.555555f, 7.888889f, 1.777778f, 0.030508f }, // 442 (3 2 6 5) + { 4.666667f, 7.333333f, 2.000000f, 0.033088f }, // 443 (3 2 7 4) + { 4.777778f, 6.777778f, 2.222222f, 0.036437f }, // 444 (3 2 8 3) + { 4.888889f, 6.222222f, 2.444444f, 0.040909f }, // 445 (3 2 9 2) + { 5.000000f, 5.666667f, 2.666667f, 0.047120f }, // 446 (3 2 10 1) + { 5.111111f, 5.111111f, 2.888889f, 0.056250f }, // 447 (3 2 11 0) + { 4.333333f, 10.333333f, 0.666667f, 0.022556f }, // 448 (3 3 0 10) + { 4.444445f, 9.777778f, 0.888889f, 0.023438f }, // 449 (3 3 1 9) + { 4.555555f, 9.222222f, 1.111111f, 0.024523f }, // 450 (3 3 2 8) + { 4.666667f, 8.666667f, 1.333333f, 0.025862f }, // 451 (3 3 3 7) + { 4.777778f, 8.111111f, 1.555556f, 0.027523f }, // 452 (3 3 4 6) + { 4.888889f, 7.555555f, 1.777778f, 0.029605f }, // 453 (3 3 5 5) + { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 454 (3 3 6 4) + { 5.111111f, 6.444445f, 2.222222f, 0.035714f }, // 455 (3 3 7 3) + { 5.222222f, 5.888889f, 2.444444f, 0.040359f }, // 456 (3 3 8 2) + { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 457 (3 3 9 1) + { 5.444445f, 4.777778f, 2.888889f, 0.056604f }, // 458 (3 3 10 0) + { 4.777778f, 9.444445f, 0.888889f, 0.022556f }, // 459 (3 4 0 9) + { 4.888889f, 8.888889f, 1.111111f, 0.023684f }, // 460 (3 4 1 8) + { 5.000000f, 8.333333f, 1.333333f, 0.025070f }, // 461 (3 4 2 7) + { 5.111111f, 7.777778f, 1.555556f, 0.026786f }, // 462 (3 4 3 6) + { 5.222222f, 7.222222f, 1.777778f, 0.028939f }, // 463 (3 4 4 5) + { 5.333333f, 6.666667f, 2.000000f, 0.031690f }, // 464 (3 4 5 4) + { 5.444445f, 6.111111f, 2.222222f, 0.035294f }, // 465 (3 4 6 3) + { 5.555555f, 5.555555f, 2.444444f, 0.040179f }, // 466 (3 4 7 2) + { 5.666667f, 5.000000f, 2.666667f, 0.047120f }, // 467 (3 4 8 1) + { 5.777778f, 4.444445f, 2.888889f, 0.057692f }, // 468 (3 4 9 0) + { 5.222222f, 8.555555f, 1.111111f, 0.023018f }, // 469 (3 5 0 8) + { 5.333333f, 8.000000f, 1.333333f, 0.024457f }, // 470 (3 5 1 7) + { 5.444445f, 7.444445f, 1.555556f, 0.026239f }, // 471 (3 5 2 6) + { 5.555555f, 6.888889f, 1.777778f, 0.028481f }, // 472 (3 5 3 5) + { 5.666667f, 6.333333f, 2.000000f, 0.031359f }, // 473 (3 5 4 4) + { 5.777778f, 5.777778f, 2.222222f, 0.035156f }, // 474 (3 5 5 3) + { 5.888889f, 5.222222f, 2.444444f, 0.040359f }, // 475 (3 5 6 2) + { 6.000000f, 4.666667f, 2.666667f, 0.047872f }, // 476 (3 5 7 1) + { 6.111111f, 4.111111f, 2.888889f, 0.059603f }, // 477 (3 5 8 0) + { 5.666667f, 7.666667f, 1.333333f, 0.024000f }, // 478 (3 6 0 7) + { 5.777778f, 7.111111f, 1.555556f, 0.025862f }, // 479 (3 6 1 6) + { 5.888889f, 6.555555f, 1.777778f, 0.028213f }, // 480 (3 6 2 5) + { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 481 (3 6 3 4) + { 6.111111f, 5.444445f, 2.222222f, 0.035294f }, // 482 (3 6 4 3) + { 6.222222f, 4.888889f, 2.444444f, 0.040909f }, // 483 (3 6 5 2) + { 6.333333f, 4.333333f, 2.666667f, 0.049180f }, // 484 (3 6 6 1) + { 6.444445f, 3.777778f, 2.888889f, 0.062500f }, // 485 (3 6 7 0) + { 6.111111f, 6.777778f, 1.555556f, 0.025641f }, // 486 (3 7 0 6) + { 6.222222f, 6.222222f, 1.777778f, 0.028125f }, // 487 (3 7 1 5) + { 6.333333f, 5.666667f, 2.000000f, 0.031359f }, // 488 (3 7 2 4) + { 6.444445f, 5.111111f, 2.222222f, 0.035714f }, // 489 (3 7 3 3) + { 6.555555f, 4.555555f, 2.444444f, 0.041860f }, // 490 (3 7 4 2) + { 6.666667f, 4.000000f, 2.666667f, 0.051136f }, // 491 (3 7 5 1) + { 6.777778f, 3.444444f, 2.888889f, 0.066667f }, // 492 (3 7 6 0) + { 6.555555f, 5.888889f, 1.777778f, 0.028213f }, // 493 (3 8 0 5) + { 6.666667f, 5.333333f, 2.000000f, 0.031690f }, // 494 (3 8 1 4) + { 6.777778f, 4.777778f, 2.222222f, 0.036437f }, // 495 (3 8 2 3) + { 6.888889f, 4.222222f, 2.444444f, 0.043269f }, // 496 (3 8 3 2) + { 7.000000f, 3.666667f, 2.666667f, 0.053892f }, // 497 (3 8 4 1) + { 7.111111f, 3.111111f, 2.888889f, 0.072581f }, // 498 (3 8 5 0) + { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 499 (3 9 0 4) + { 7.111111f, 4.444445f, 2.222222f, 0.037500f }, // 500 (3 9 1 3) + { 7.222222f, 3.888889f, 2.444444f, 0.045226f }, // 501 (3 9 2 2) + { 7.333333f, 3.333333f, 2.666667f, 0.057692f }, // 502 (3 9 3 1) + { 7.444445f, 2.777778f, 2.888889f, 0.081081f }, // 503 (3 9 4 0) + { 7.444445f, 4.111111f, 2.222222f, 0.038961f }, // 504 (3 10 0 3) + { 7.555556f, 3.555556f, 2.444444f, 0.047872f }, // 505 (3 10 1 2) + { 7.666667f, 3.000000f, 2.666667f, 0.062937f }, // 506 (3 10 2 1) + { 7.777778f, 2.444444f, 2.888889f, 0.093750f }, // 507 (3 10 3 0) + { 7.888889f, 3.222222f, 2.444444f, 0.051429f }, // 508 (3 11 0 2) + { 8.000000f, 2.666667f, 2.666667f, 0.070313f }, // 509 (3 11 1 1) + { 8.111111f, 2.111111f, 2.888889f, 0.113924f }, // 510 (3 11 2 0) + { 8.333333f, 2.333333f, 2.666667f, 0.081081f }, // 511 (3 12 0 1) + { 8.444445f, 1.777778f, 2.888889f, 0.150000f }, // 512 (3 12 1 0) + { 8.777778f, 1.444444f, 2.888889f, 0.230769f }, // 513 (3 13 0 0) + { 4.000000f, 12.000000f, 0.000000f, 0.020833f }, // 514 (4 0 0 12) + { 4.111111f, 11.444445f, 0.222222f, 0.021277f }, // 515 (4 0 1 11) + { 4.222222f, 10.888889f, 0.444444f, 0.021845f }, // 516 (4 0 2 10) + { 4.333333f, 10.333333f, 0.666667f, 0.022556f }, // 517 (4 0 3 9) + { 4.444445f, 9.777778f, 0.888889f, 0.023438f }, // 518 (4 0 4 8) + { 4.555555f, 9.222222f, 1.111111f, 0.024523f }, // 519 (4 0 5 7) + { 4.666667f, 8.666667f, 1.333333f, 0.025862f }, // 520 (4 0 6 6) + { 4.777778f, 8.111111f, 1.555556f, 0.027523f }, // 521 (4 0 7 5) + { 4.888889f, 7.555555f, 1.777778f, 0.029605f }, // 522 (4 0 8 4) + { 5.000000f, 7.000000f, 2.000000f, 0.032258f }, // 523 (4 0 9 3) + { 5.111111f, 6.444445f, 2.222222f, 0.035714f }, // 524 (4 0 10 2) + { 5.222222f, 5.888889f, 2.444444f, 0.040359f }, // 525 (4 0 11 1) + { 5.333333f, 5.333333f, 2.666667f, 0.046875f }, // 526 (4 0 12 0) + { 4.444445f, 11.111111f, 0.222222f, 0.020270f }, // 527 (4 1 0 11) + { 4.555555f, 10.555555f, 0.444444f, 0.020882f }, // 528 (4 1 1 10) + { 4.666667f, 10.000000f, 0.666667f, 0.021635f }, // 529 (4 1 2 9) + { 4.777778f, 9.444445f, 0.888889f, 0.022556f }, // 530 (4 1 3 8) + { 4.888889f, 8.888889f, 1.111111f, 0.023684f }, // 531 (4 1 4 7) + { 5.000000f, 8.333333f, 1.333333f, 0.025070f }, // 532 (4 1 5 6) + { 5.111111f, 7.777778f, 1.555556f, 0.026786f }, // 533 (4 1 6 5) + { 5.222222f, 7.222222f, 1.777778f, 0.028939f }, // 534 (4 1 7 4) + { 5.333333f, 6.666667f, 2.000000f, 0.031690f }, // 535 (4 1 8 3) + { 5.444445f, 6.111111f, 2.222222f, 0.035294f }, // 536 (4 1 9 2) + { 5.555555f, 5.555556f, 2.444444f, 0.040179f }, // 537 (4 1 10 1) + { 5.666667f, 5.000000f, 2.666667f, 0.047120f }, // 538 (4 1 11 0) + { 4.888889f, 10.222222f, 0.444444f, 0.020089f }, // 539 (4 2 0 10) + { 5.000000f, 9.666667f, 0.666667f, 0.020882f }, // 540 (4 2 1 9) + { 5.111111f, 9.111111f, 0.888889f, 0.021845f }, // 541 (4 2 2 8) + { 5.222222f, 8.555555f, 1.111111f, 0.023018f }, // 542 (4 2 3 7) + { 5.333333f, 8.000000f, 1.333333f, 0.024457f }, // 543 (4 2 4 6) + { 5.444445f, 7.444445f, 1.555556f, 0.026239f }, // 544 (4 2 5 5) + { 5.555555f, 6.888889f, 1.777778f, 0.028481f }, // 545 (4 2 6 4) + { 5.666667f, 6.333333f, 2.000000f, 0.031359f }, // 546 (4 2 7 3) + { 5.777778f, 5.777778f, 2.222222f, 0.035156f }, // 547 (4 2 8 2) + { 5.888889f, 5.222222f, 2.444444f, 0.040359f }, // 548 (4 2 9 1) + { 6.000000f, 4.666667f, 2.666667f, 0.047872f }, // 549 (4 2 10 0) + { 5.333333f, 9.333333f, 0.666667f, 0.020270f }, // 550 (4 3 0 9) + { 5.444445f, 8.777778f, 0.888889f, 0.021277f }, // 551 (4 3 1 8) + { 5.555555f, 8.222222f, 1.111111f, 0.022500f }, // 552 (4 3 2 7) + { 5.666667f, 7.666667f, 1.333333f, 0.024000f }, // 553 (4 3 3 6) + { 5.777778f, 7.111111f, 1.555556f, 0.025862f }, // 554 (4 3 4 5) + { 5.888889f, 6.555555f, 1.777778f, 0.028213f }, // 555 (4 3 5 4) + { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 556 (4 3 6 3) + { 6.111111f, 5.444445f, 2.222222f, 0.035294f }, // 557 (4 3 7 2) + { 6.222222f, 4.888889f, 2.444444f, 0.040909f }, // 558 (4 3 8 1) + { 6.333333f, 4.333333f, 2.666667f, 0.049180f }, // 559 (4 3 9 0) + { 5.777778f, 8.444445f, 0.888889f, 0.020833f }, // 560 (4 4 0 8) + { 5.888889f, 7.888889f, 1.111111f, 0.022113f }, // 561 (4 4 1 7) + { 6.000000f, 7.333333f, 1.333333f, 0.023684f }, // 562 (4 4 2 6) + { 6.111111f, 6.777778f, 1.555556f, 0.025641f }, // 563 (4 4 3 5) + { 6.222222f, 6.222222f, 1.777778f, 0.028125f }, // 564 (4 4 4 4) + { 6.333333f, 5.666667f, 2.000000f, 0.031359f }, // 565 (4 4 5 3) + { 6.444445f, 5.111111f, 2.222222f, 0.035714f }, // 566 (4 4 6 2) + { 6.555555f, 4.555555f, 2.444444f, 0.041860f }, // 567 (4 4 7 1) + { 6.666667f, 4.000000f, 2.666667f, 0.051136f }, // 568 (4 4 8 0) + { 6.222222f, 7.555555f, 1.111111f, 0.021845f }, // 569 (4 5 0 7) + { 6.333333f, 7.000000f, 1.333333f, 0.023499f }, // 570 (4 5 1 6) + { 6.444445f, 6.444445f, 1.555556f, 0.025568f }, // 571 (4 5 2 5) + { 6.555555f, 5.888889f, 1.777778f, 0.028213f }, // 572 (4 5 3 4) + { 6.666667f, 5.333333f, 2.000000f, 0.031690f }, // 573 (4 5 4 3) + { 6.777778f, 4.777778f, 2.222222f, 0.036437f }, // 574 (4 5 5 2) + { 6.888889f, 4.222222f, 2.444444f, 0.043269f }, // 575 (4 5 6 1) + { 7.000000f, 3.666667f, 2.666667f, 0.053892f }, // 576 (4 5 7 0) + { 6.666667f, 6.666667f, 1.333333f, 0.023438f }, // 577 (4 6 0 6) + { 6.777778f, 6.111111f, 1.555556f, 0.025641f }, // 578 (4 6 1 5) + { 6.888889f, 5.555555f, 1.777778f, 0.028481f }, // 579 (4 6 2 4) + { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 580 (4 6 3 3) + { 7.111111f, 4.444445f, 2.222222f, 0.037500f }, // 581 (4 6 4 2) + { 7.222222f, 3.888889f, 2.444444f, 0.045226f }, // 582 (4 6 5 1) + { 7.333333f, 3.333333f, 2.666667f, 0.057692f }, // 583 (4 6 6 0) + { 7.111111f, 5.777778f, 1.555556f, 0.025862f }, // 584 (4 7 0 5) + { 7.222222f, 5.222222f, 1.777778f, 0.028939f }, // 585 (4 7 1 4) + { 7.333333f, 4.666667f, 2.000000f, 0.033088f }, // 586 (4 7 2 3) + { 7.444445f, 4.111111f, 2.222222f, 0.038961f }, // 587 (4 7 3 2) + { 7.555555f, 3.555556f, 2.444444f, 0.047872f }, // 588 (4 7 4 1) + { 7.666667f, 3.000000f, 2.666667f, 0.062937f }, // 589 (4 7 5 0) + { 7.555555f, 4.888889f, 1.777778f, 0.029605f }, // 590 (4 8 0 4) + { 7.666667f, 4.333333f, 2.000000f, 0.034221f }, // 591 (4 8 1 3) + { 7.777778f, 3.777778f, 2.222222f, 0.040909f }, // 592 (4 8 2 2) + { 7.888889f, 3.222222f, 2.444444f, 0.051429f }, // 593 (4 8 3 1) + { 8.000000f, 2.666667f, 2.666667f, 0.070313f }, // 594 (4 8 4 0) + { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 595 (4 9 0 3) + { 8.111111f, 3.444444f, 2.222222f, 0.043478f }, // 596 (4 9 1 2) + { 8.222222f, 2.888889f, 2.444444f, 0.056250f }, // 597 (4 9 2 1) + { 8.333333f, 2.333333f, 2.666667f, 0.081081f }, // 598 (4 9 3 0) + { 8.444445f, 3.111111f, 2.222222f, 0.046875f }, // 599 (4 10 0 2) + { 8.555555f, 2.555556f, 2.444444f, 0.062937f }, // 600 (4 10 1 1) + { 8.666667f, 2.000000f, 2.666667f, 0.097826f }, // 601 (4 10 2 0) + { 8.888889f, 2.222222f, 2.444444f, 0.072581f }, // 602 (4 11 0 1) + { 9.000000f, 1.666667f, 2.666667f, 0.126761f }, // 603 (4 11 1 0) + { 9.333333f, 1.333333f, 2.666667f, 0.187500f }, // 604 (4 12 0 0) + { 5.000000f, 11.000000f, 0.000000f, 0.018182f }, // 605 (5 0 0 11) + { 5.111111f, 10.444445f, 0.222222f, 0.018750f }, // 606 (5 0 1 10) + { 5.222222f, 9.888889f, 0.444444f, 0.019438f }, // 607 (5 0 2 9) + { 5.333333f, 9.333333f, 0.666667f, 0.020270f }, // 608 (5 0 3 8) + { 5.444445f, 8.777778f, 0.888889f, 0.021277f }, // 609 (5 0 4 7) + { 5.555555f, 8.222222f, 1.111111f, 0.022500f }, // 610 (5 0 5 6) + { 5.666667f, 7.666667f, 1.333333f, 0.024000f }, // 611 (5 0 6 5) + { 5.777778f, 7.111111f, 1.555556f, 0.025862f }, // 612 (5 0 7 4) + { 5.888889f, 6.555555f, 1.777778f, 0.028213f }, // 613 (5 0 8 3) + { 6.000000f, 6.000000f, 2.000000f, 0.031250f }, // 614 (5 0 9 2) + { 6.111111f, 5.444445f, 2.222222f, 0.035294f }, // 615 (5 0 10 1) + { 6.222222f, 4.888889f, 2.444444f, 0.040909f }, // 616 (5 0 11 0) + { 5.444445f, 10.111111f, 0.222222f, 0.018182f }, // 617 (5 1 0 10) + { 5.555555f, 9.555555f, 0.444444f, 0.018908f }, // 618 (5 1 1 9) + { 5.666667f, 9.000000f, 0.666667f, 0.019780f }, // 619 (5 1 2 8) + { 5.777778f, 8.444445f, 0.888889f, 0.020833f }, // 620 (5 1 3 7) + { 5.888889f, 7.888889f, 1.111111f, 0.022113f }, // 621 (5 1 4 6) + { 6.000000f, 7.333333f, 1.333333f, 0.023684f }, // 622 (5 1 5 5) + { 6.111111f, 6.777778f, 1.555556f, 0.025641f }, // 623 (5 1 6 4) + { 6.222222f, 6.222222f, 1.777778f, 0.028125f }, // 624 (5 1 7 3) + { 6.333333f, 5.666667f, 2.000000f, 0.031359f }, // 625 (5 1 8 2) + { 6.444445f, 5.111111f, 2.222222f, 0.035714f }, // 626 (5 1 9 1) + { 6.555555f, 4.555556f, 2.444444f, 0.041860f }, // 627 (5 1 10 0) + { 5.888889f, 9.222222f, 0.444444f, 0.018480f }, // 628 (5 2 0 9) + { 6.000000f, 8.666667f, 0.666667f, 0.019397f }, // 629 (5 2 1 8) + { 6.111111f, 8.111111f, 0.888889f, 0.020501f }, // 630 (5 2 2 7) + { 6.222222f, 7.555555f, 1.111111f, 0.021845f }, // 631 (5 2 3 6) + { 6.333333f, 7.000000f, 1.333333f, 0.023499f }, // 632 (5 2 4 5) + { 6.444445f, 6.444445f, 1.555556f, 0.025568f }, // 633 (5 2 5 4) + { 6.555555f, 5.888889f, 1.777778f, 0.028213f }, // 634 (5 2 6 3) + { 6.666667f, 5.333333f, 2.000000f, 0.031690f }, // 635 (5 2 7 2) + { 6.777778f, 4.777778f, 2.222222f, 0.036437f }, // 636 (5 2 8 1) + { 6.888889f, 4.222222f, 2.444444f, 0.043269f }, // 637 (5 2 9 0) + { 6.333333f, 8.333333f, 0.666667f, 0.019108f }, // 638 (5 3 0 8) + { 6.444445f, 7.777778f, 0.888889f, 0.020270f }, // 639 (5 3 1 7) + { 6.555555f, 7.222222f, 1.111111f, 0.021687f }, // 640 (5 3 2 6) + { 6.666667f, 6.666667f, 1.333333f, 0.023438f }, // 641 (5 3 3 5) + { 6.777778f, 6.111111f, 1.555556f, 0.025641f }, // 642 (5 3 4 4) + { 6.888889f, 5.555555f, 1.777778f, 0.028481f }, // 643 (5 3 5 3) + { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 644 (5 3 6 2) + { 7.111111f, 4.444445f, 2.222222f, 0.037500f }, // 645 (5 3 7 1) + { 7.222222f, 3.888889f, 2.444444f, 0.045226f }, // 646 (5 3 8 0) + { 6.777778f, 7.444445f, 0.888889f, 0.020134f }, // 647 (5 4 0 7) + { 6.888889f, 6.888889f, 1.111111f, 0.021635f }, // 648 (5 4 1 6) + { 7.000000f, 6.333333f, 1.333333f, 0.023499f }, // 649 (5 4 2 5) + { 7.111111f, 5.777778f, 1.555556f, 0.025862f }, // 650 (5 4 3 4) + { 7.222222f, 5.222222f, 1.777778f, 0.028939f }, // 651 (5 4 4 3) + { 7.333333f, 4.666667f, 2.000000f, 0.033088f }, // 652 (5 4 5 2) + { 7.444445f, 4.111111f, 2.222222f, 0.038961f }, // 653 (5 4 6 1) + { 7.555555f, 3.555556f, 2.444444f, 0.047872f }, // 654 (5 4 7 0) + { 7.222222f, 6.555555f, 1.111111f, 0.021687f }, // 655 (5 5 0 6) + { 7.333333f, 6.000000f, 1.333333f, 0.023684f }, // 656 (5 5 1 5) + { 7.444445f, 5.444445f, 1.555556f, 0.026239f }, // 657 (5 5 2 4) + { 7.555555f, 4.888889f, 1.777778f, 0.029605f }, // 658 (5 5 3 3) + { 7.666667f, 4.333333f, 2.000000f, 0.034221f }, // 659 (5 5 4 2) + { 7.777778f, 3.777778f, 2.222222f, 0.040909f }, // 660 (5 5 5 1) + { 7.888889f, 3.222222f, 2.444444f, 0.051429f }, // 661 (5 5 6 0) + { 7.666667f, 5.666667f, 1.333333f, 0.024000f }, // 662 (5 6 0 5) + { 7.777778f, 5.111111f, 1.555556f, 0.026786f }, // 663 (5 6 1 4) + { 7.888889f, 4.555555f, 1.777778f, 0.030508f }, // 664 (5 6 2 3) + { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 665 (5 6 3 2) + { 8.111111f, 3.444444f, 2.222222f, 0.043478f }, // 666 (5 6 4 1) + { 8.222222f, 2.888889f, 2.444444f, 0.056250f }, // 667 (5 6 5 0) + { 8.111111f, 4.777778f, 1.555556f, 0.027523f }, // 668 (5 7 0 4) + { 8.222222f, 4.222222f, 1.777778f, 0.031690f }, // 669 (5 7 1 3) + { 8.333333f, 3.666667f, 2.000000f, 0.037657f }, // 670 (5 7 2 2) + { 8.444445f, 3.111111f, 2.222222f, 0.046875f }, // 671 (5 7 3 1) + { 8.555555f, 2.555556f, 2.444444f, 0.062937f }, // 672 (5 7 4 0) + { 8.555555f, 3.888889f, 1.777778f, 0.033210f }, // 673 (5 8 0 3) + { 8.666667f, 3.333333f, 2.000000f, 0.040179f }, // 674 (5 8 1 2) + { 8.777778f, 2.777778f, 2.222222f, 0.051429f }, // 675 (5 8 2 1) + { 8.888889f, 2.222222f, 2.444444f, 0.072581f }, // 676 (5 8 3 0) + { 9.000000f, 3.000000f, 2.000000f, 0.043478f }, // 677 (5 9 0 2) + { 9.111111f, 2.444444f, 2.222222f, 0.057692f }, // 678 (5 9 1 1) + { 9.222222f, 1.888889f, 2.444444f, 0.087379f }, // 679 (5 9 2 0) + { 9.444445f, 2.111111f, 2.222222f, 0.066667f }, // 680 (5 10 0 1) + { 9.555555f, 1.555556f, 2.444444f, 0.112500f }, // 681 (5 10 1 0) + { 9.888889f, 1.222222f, 2.444444f, 0.163636f }, // 682 (5 11 0 0) + { 6.000000f, 10.000000f, 0.000000f, 0.016667f }, // 683 (6 0 0 10) + { 6.111111f, 9.444445f, 0.222222f, 0.017341f }, // 684 (6 0 1 9) + { 6.222222f, 8.888889f, 0.444444f, 0.018145f }, // 685 (6 0 2 8) + { 6.333333f, 8.333333f, 0.666667f, 0.019108f }, // 686 (6 0 3 7) + { 6.444445f, 7.777778f, 0.888889f, 0.020270f }, // 687 (6 0 4 6) + { 6.555555f, 7.222222f, 1.111111f, 0.021687f }, // 688 (6 0 5 5) + { 6.666667f, 6.666667f, 1.333333f, 0.023438f }, // 689 (6 0 6 4) + { 6.777778f, 6.111111f, 1.555556f, 0.025641f }, // 690 (6 0 7 3) + { 6.888889f, 5.555555f, 1.777778f, 0.028481f }, // 691 (6 0 8 2) + { 7.000000f, 5.000000f, 2.000000f, 0.032258f }, // 692 (6 0 9 1) + { 7.111111f, 4.444445f, 2.222222f, 0.037500f }, // 693 (6 0 10 0) + { 6.444445f, 9.111111f, 0.222222f, 0.017045f }, // 694 (6 1 0 9) + { 6.555555f, 8.555555f, 0.444444f, 0.017893f }, // 695 (6 1 1 8) + { 6.666667f, 8.000000f, 0.666667f, 0.018908f }, // 696 (6 1 2 7) + { 6.777778f, 7.444445f, 0.888889f, 0.020134f }, // 697 (6 1 3 6) + { 6.888889f, 6.888889f, 1.111111f, 0.021635f }, // 698 (6 1 4 5) + { 7.000000f, 6.333333f, 1.333333f, 0.023499f }, // 699 (6 1 5 4) + { 7.111111f, 5.777778f, 1.555556f, 0.025862f }, // 700 (6 1 6 3) + { 7.222222f, 5.222222f, 1.777778f, 0.028939f }, // 701 (6 1 7 2) + { 7.333333f, 4.666667f, 2.000000f, 0.033088f }, // 702 (6 1 8 1) + { 7.444445f, 4.111111f, 2.222222f, 0.038961f }, // 703 (6 1 9 0) + { 6.888889f, 8.222222f, 0.444444f, 0.017717f }, // 704 (6 2 0 8) + { 7.000000f, 7.666667f, 0.666667f, 0.018789f }, // 705 (6 2 1 7) + { 7.111111f, 7.111111f, 0.888889f, 0.020089f }, // 706 (6 2 2 6) + { 7.222222f, 6.555555f, 1.111111f, 0.021687f }, // 707 (6 2 3 5) + { 7.333333f, 6.000000f, 1.333333f, 0.023684f }, // 708 (6 2 4 4) + { 7.444445f, 5.444445f, 1.555556f, 0.026239f }, // 709 (6 2 5 3) + { 7.555555f, 4.888889f, 1.777778f, 0.029605f }, // 710 (6 2 6 2) + { 7.666667f, 4.333333f, 2.000000f, 0.034221f }, // 711 (6 2 7 1) + { 7.777778f, 3.777778f, 2.222222f, 0.040909f }, // 712 (6 2 8 0) + { 7.333333f, 7.333333f, 0.666667f, 0.018750f }, // 713 (6 3 0 7) + { 7.444445f, 6.777778f, 0.888889f, 0.020134f }, // 714 (6 3 1 6) + { 7.555555f, 6.222222f, 1.111111f, 0.021845f }, // 715 (6 3 2 5) + { 7.666667f, 5.666667f, 1.333333f, 0.024000f }, // 716 (6 3 3 4) + { 7.777778f, 5.111111f, 1.555556f, 0.026786f }, // 717 (6 3 4 3) + { 7.888889f, 4.555555f, 1.777778f, 0.030508f }, // 718 (6 3 5 2) + { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 719 (6 3 6 1) + { 8.111111f, 3.444444f, 2.222222f, 0.043478f }, // 720 (6 3 7 0) + { 7.777778f, 6.444445f, 0.888889f, 0.020270f }, // 721 (6 4 0 6) + { 7.888889f, 5.888889f, 1.111111f, 0.022113f }, // 722 (6 4 1 5) + { 8.000000f, 5.333333f, 1.333333f, 0.024457f }, // 723 (6 4 2 4) + { 8.111111f, 4.777778f, 1.555556f, 0.027523f }, // 724 (6 4 3 3) + { 8.222222f, 4.222222f, 1.777778f, 0.031690f }, // 725 (6 4 4 2) + { 8.333333f, 3.666667f, 2.000000f, 0.037657f }, // 726 (6 4 5 1) + { 8.444445f, 3.111111f, 2.222222f, 0.046875f }, // 727 (6 4 6 0) + { 8.222222f, 5.555555f, 1.111111f, 0.022500f }, // 728 (6 5 0 5) + { 8.333333f, 5.000000f, 1.333333f, 0.025070f }, // 729 (6 5 1 4) + { 8.444445f, 4.444445f, 1.555556f, 0.028481f }, // 730 (6 5 2 3) + { 8.555555f, 3.888889f, 1.777778f, 0.033210f }, // 731 (6 5 3 2) + { 8.666667f, 3.333333f, 2.000000f, 0.040179f }, // 732 (6 5 4 1) + { 8.777778f, 2.777778f, 2.222222f, 0.051429f }, // 733 (6 5 5 0) + { 8.666667f, 4.666667f, 1.333333f, 0.025862f }, // 734 (6 6 0 4) + { 8.777778f, 4.111111f, 1.555556f, 0.029703f }, // 735 (6 6 1 3) + { 8.888889f, 3.555556f, 1.777778f, 0.035156f }, // 736 (6 6 2 2) + { 9.000000f, 3.000000f, 2.000000f, 0.043478f }, // 737 (6 6 3 1) + { 9.111111f, 2.444444f, 2.222222f, 0.057692f }, // 738 (6 6 4 0) + { 9.111111f, 3.777778f, 1.555556f, 0.031250f }, // 739 (6 7 0 3) + { 9.222222f, 3.222222f, 1.777778f, 0.037657f }, // 740 (6 7 1 2) + { 9.333333f, 2.666667f, 2.000000f, 0.047872f }, // 741 (6 7 2 1) + { 9.444445f, 2.111111f, 2.222222f, 0.066667f }, // 742 (6 7 3 0) + { 9.555555f, 2.888889f, 1.777778f, 0.040909f }, // 743 (6 8 0 2) + { 9.666667f, 2.333333f, 2.000000f, 0.053892f }, // 744 (6 8 1 1) + { 9.777778f, 1.777778f, 2.222222f, 0.080357f }, // 745 (6 8 2 0) + { 10.000000f, 2.000000f, 2.000000f, 0.062500f }, // 746 (6 9 0 1) + { 10.111111f, 1.444444f, 2.222222f, 0.103448f }, // 747 (6 9 1 0) + { 10.444445f, 1.111111f, 2.222222f, 0.150000f }, // 748 (6 10 0 0) + { 7.000000f, 9.000000f, 0.000000f, 0.015873f }, // 749 (7 0 0 9) + { 7.111111f, 8.444445f, 0.222222f, 0.016667f }, // 750 (7 0 1 8) + { 7.222222f, 7.888889f, 0.444444f, 0.017613f }, // 751 (7 0 2 7) + { 7.333333f, 7.333333f, 0.666667f, 0.018750f }, // 752 (7 0 3 6) + { 7.444445f, 6.777778f, 0.888889f, 0.020134f }, // 753 (7 0 4 5) + { 7.555555f, 6.222222f, 1.111111f, 0.021845f }, // 754 (7 0 5 4) + { 7.666667f, 5.666667f, 1.333333f, 0.024000f }, // 755 (7 0 6 3) + { 7.777778f, 5.111111f, 1.555556f, 0.026786f }, // 756 (7 0 7 2) + { 7.888889f, 4.555555f, 1.777778f, 0.030508f }, // 757 (7 0 8 1) + { 8.000000f, 4.000000f, 2.000000f, 0.035714f }, // 758 (7 0 9 0) + { 7.444445f, 8.111111f, 0.222222f, 0.016575f }, // 759 (7 1 0 8) + { 7.555555f, 7.555555f, 0.444444f, 0.017578f }, // 760 (7 1 1 7) + { 7.666667f, 7.000000f, 0.666667f, 0.018789f }, // 761 (7 1 2 6) + { 7.777778f, 6.444445f, 0.888889f, 0.020270f }, // 762 (7 1 3 5) + { 7.888889f, 5.888889f, 1.111111f, 0.022113f }, // 763 (7 1 4 4) + { 8.000000f, 5.333333f, 1.333333f, 0.024457f }, // 764 (7 1 5 3) + { 8.111111f, 4.777778f, 1.555556f, 0.027523f }, // 765 (7 1 6 2) + { 8.222222f, 4.222222f, 1.777778f, 0.031690f }, // 766 (7 1 7 1) + { 8.333333f, 3.666667f, 2.000000f, 0.037657f }, // 767 (7 1 8 0) + { 7.888889f, 7.222222f, 0.444444f, 0.017613f }, // 768 (7 2 0 7) + { 8.000000f, 6.666667f, 0.666667f, 0.018908f }, // 769 (7 2 1 6) + { 8.111111f, 6.111111f, 0.888889f, 0.020501f }, // 770 (7 2 2 5) + { 8.222222f, 5.555555f, 1.111111f, 0.022500f }, // 771 (7 2 3 4) + { 8.333333f, 5.000000f, 1.333333f, 0.025070f }, // 772 (7 2 4 3) + { 8.444445f, 4.444445f, 1.555556f, 0.028481f }, // 773 (7 2 5 2) + { 8.555555f, 3.888889f, 1.777778f, 0.033210f }, // 774 (7 2 6 1) + { 8.666667f, 3.333333f, 2.000000f, 0.040179f }, // 775 (7 2 7 0) + { 8.333333f, 6.333333f, 0.666667f, 0.019108f }, // 776 (7 3 0 6) + { 8.444445f, 5.777778f, 0.888889f, 0.020833f }, // 777 (7 3 1 5) + { 8.555555f, 5.222222f, 1.111111f, 0.023018f }, // 778 (7 3 2 4) + { 8.666667f, 4.666667f, 1.333333f, 0.025862f }, // 779 (7 3 3 3) + { 8.777778f, 4.111111f, 1.555556f, 0.029703f }, // 780 (7 3 4 2) + { 8.888889f, 3.555556f, 1.777778f, 0.035156f }, // 781 (7 3 5 1) + { 9.000000f, 3.000000f, 2.000000f, 0.043478f }, // 782 (7 3 6 0) + { 8.777778f, 5.444445f, 0.888889f, 0.021277f }, // 783 (7 4 0 5) + { 8.888889f, 4.888889f, 1.111111f, 0.023684f }, // 784 (7 4 1 4) + { 9.000000f, 4.333333f, 1.333333f, 0.026866f }, // 785 (7 4 2 3) + { 9.111111f, 3.777778f, 1.555556f, 0.031250f }, // 786 (7 4 3 2) + { 9.222222f, 3.222222f, 1.777778f, 0.037657f }, // 787 (7 4 4 1) + { 9.333333f, 2.666667f, 2.000000f, 0.047872f }, // 788 (7 4 5 0) + { 9.222222f, 4.555555f, 1.111111f, 0.024523f }, // 789 (7 5 0 4) + { 9.333333f, 4.000000f, 1.333333f, 0.028125f }, // 790 (7 5 1 3) + { 9.444445f, 3.444444f, 1.555556f, 0.033210f }, // 791 (7 5 2 2) + { 9.555555f, 2.888889f, 1.777778f, 0.040909f }, // 792 (7 5 3 1) + { 9.666667f, 2.333333f, 2.000000f, 0.053892f }, // 793 (7 5 4 0) + { 9.666667f, 3.666667f, 1.333333f, 0.029703f }, // 794 (7 6 0 3) + { 9.777778f, 3.111111f, 1.555556f, 0.035714f }, // 795 (7 6 1 2) + { 9.888889f, 2.555556f, 1.777778f, 0.045226f }, // 796 (7 6 2 1) + { 10.000000f, 2.000000f, 2.000000f, 0.062500f }, // 797 (7 6 3 0) + { 10.111111f, 2.777778f, 1.555556f, 0.038961f }, // 798 (7 7 0 2) + { 10.222222f, 2.222222f, 1.777778f, 0.051136f }, // 799 (7 7 1 1) + { 10.333333f, 1.666667f, 2.000000f, 0.075630f }, // 800 (7 7 2 0) + { 10.555555f, 1.888889f, 1.777778f, 0.059603f }, // 801 (7 8 0 1) + { 10.666667f, 1.333333f, 2.000000f, 0.097826f }, // 802 (7 8 1 0) + { 11.000000f, 1.000000f, 2.000000f, 0.142857f }, // 803 (7 9 0 0) + { 8.000000f, 8.000000f, 0.000000f, 0.015625f }, // 804 (8 0 0 8) + { 8.111111f, 7.444445f, 0.222222f, 0.016575f }, // 805 (8 0 1 7) + { 8.222222f, 6.888889f, 0.444444f, 0.017717f }, // 806 (8 0 2 6) + { 8.333333f, 6.333333f, 0.666667f, 0.019108f }, // 807 (8 0 3 5) + { 8.444445f, 5.777778f, 0.888889f, 0.020833f }, // 808 (8 0 4 4) + { 8.555555f, 5.222222f, 1.111111f, 0.023018f }, // 809 (8 0 5 3) + { 8.666667f, 4.666667f, 1.333333f, 0.025862f }, // 810 (8 0 6 2) + { 8.777778f, 4.111111f, 1.555556f, 0.029703f }, // 811 (8 0 7 1) + { 8.888889f, 3.555556f, 1.777778f, 0.035156f }, // 812 (8 0 8 0) + { 8.444445f, 7.111111f, 0.222222f, 0.016667f }, // 813 (8 1 0 7) + { 8.555555f, 6.555555f, 0.444444f, 0.017893f }, // 814 (8 1 1 6) + { 8.666667f, 6.000000f, 0.666667f, 0.019397f }, // 815 (8 1 2 5) + { 8.777778f, 5.444445f, 0.888889f, 0.021277f }, // 816 (8 1 3 4) + { 8.888889f, 4.888889f, 1.111111f, 0.023684f }, // 817 (8 1 4 3) + { 9.000000f, 4.333333f, 1.333333f, 0.026866f }, // 818 (8 1 5 2) + { 9.111111f, 3.777778f, 1.555556f, 0.031250f }, // 819 (8 1 6 1) + { 9.222222f, 3.222222f, 1.777778f, 0.037657f }, // 820 (8 1 7 0) + { 8.888889f, 6.222222f, 0.444444f, 0.018145f }, // 821 (8 2 0 6) + { 9.000000f, 5.666667f, 0.666667f, 0.019780f }, // 822 (8 2 1 5) + { 9.111111f, 5.111111f, 0.888889f, 0.021845f }, // 823 (8 2 2 4) + { 9.222222f, 4.555555f, 1.111111f, 0.024523f }, // 824 (8 2 3 3) + { 9.333333f, 4.000000f, 1.333333f, 0.028125f }, // 825 (8 2 4 2) + { 9.444445f, 3.444444f, 1.555556f, 0.033210f }, // 826 (8 2 5 1) + { 9.555555f, 2.888889f, 1.777778f, 0.040909f }, // 827 (8 2 6 0) + { 9.333333f, 5.333333f, 0.666667f, 0.020270f }, // 828 (8 3 0 5) + { 9.444445f, 4.777778f, 0.888889f, 0.022556f }, // 829 (8 3 1 4) + { 9.555555f, 4.222222f, 1.111111f, 0.025568f }, // 830 (8 3 2 3) + { 9.666667f, 3.666667f, 1.333333f, 0.029703f }, // 831 (8 3 3 2) + { 9.777778f, 3.111111f, 1.555556f, 0.035714f }, // 832 (8 3 4 1) + { 9.888889f, 2.555556f, 1.777778f, 0.045226f }, // 833 (8 3 5 0) + { 9.777778f, 4.444445f, 0.888889f, 0.023438f }, // 834 (8 4 0 4) + { 9.888889f, 3.888889f, 1.111111f, 0.026866f }, // 835 (8 4 1 3) + { 10.000000f, 3.333333f, 1.333333f, 0.031690f }, // 836 (8 4 2 2) + { 10.111111f, 2.777778f, 1.555556f, 0.038961f }, // 837 (8 4 3 1) + { 10.222222f, 2.222222f, 1.777778f, 0.051136f }, // 838 (8 4 4 0) + { 10.222222f, 3.555556f, 1.111111f, 0.028481f }, // 839 (8 5 0 3) + { 10.333333f, 3.000000f, 1.333333f, 0.034221f }, // 840 (8 5 1 2) + { 10.444445f, 2.444444f, 1.555556f, 0.043269f }, // 841 (8 5 2 1) + { 10.555555f, 1.888889f, 1.777778f, 0.059603f }, // 842 (8 5 3 0) + { 10.666667f, 2.666667f, 1.333333f, 0.037500f }, // 843 (8 6 0 2) + { 10.777778f, 2.111111f, 1.555556f, 0.049180f }, // 844 (8 6 1 1) + { 10.888889f, 1.555556f, 1.777778f, 0.072581f }, // 845 (8 6 2 0) + { 11.111111f, 1.777778f, 1.555556f, 0.057692f }, // 846 (8 7 0 1) + { 11.222222f, 1.222222f, 1.777778f, 0.094737f }, // 847 (8 7 1 0) + { 11.555555f, 0.888889f, 1.777778f, 0.140625f }, // 848 (8 8 0 0) + { 9.000000f, 7.000000f, 0.000000f, 0.015873f }, // 849 (9 0 0 7) + { 9.111111f, 6.444445f, 0.222222f, 0.017045f }, // 850 (9 0 1 6) + { 9.222222f, 5.888889f, 0.444444f, 0.018480f }, // 851 (9 0 2 5) + { 9.333333f, 5.333333f, 0.666667f, 0.020270f }, // 852 (9 0 3 4) + { 9.444445f, 4.777778f, 0.888889f, 0.022556f }, // 853 (9 0 4 3) + { 9.555555f, 4.222222f, 1.111111f, 0.025568f }, // 854 (9 0 5 2) + { 9.666667f, 3.666667f, 1.333333f, 0.029703f }, // 855 (9 0 6 1) + { 9.777778f, 3.111111f, 1.555556f, 0.035714f }, // 856 (9 0 7 0) + { 9.444445f, 6.111111f, 0.222222f, 0.017341f }, // 857 (9 1 0 6) + { 9.555555f, 5.555555f, 0.444444f, 0.018908f }, // 858 (9 1 1 5) + { 9.666667f, 5.000000f, 0.666667f, 0.020882f }, // 859 (9 1 2 4) + { 9.777778f, 4.444445f, 0.888889f, 0.023438f }, // 860 (9 1 3 3) + { 9.888889f, 3.888889f, 1.111111f, 0.026866f }, // 861 (9 1 4 2) + { 10.000000f, 3.333333f, 1.333333f, 0.031690f }, // 862 (9 1 5 1) + { 10.111111f, 2.777778f, 1.555556f, 0.038961f }, // 863 (9 1 6 0) + { 9.888889f, 5.222222f, 0.444444f, 0.019438f }, // 864 (9 2 0 5) + { 10.000000f, 4.666667f, 0.666667f, 0.021635f }, // 865 (9 2 1 4) + { 10.111111f, 4.111111f, 0.888889f, 0.024523f }, // 866 (9 2 2 3) + { 10.222222f, 3.555556f, 1.111111f, 0.028481f }, // 867 (9 2 3 2) + { 10.333333f, 3.000000f, 1.333333f, 0.034221f }, // 868 (9 2 4 1) + { 10.444445f, 2.444444f, 1.555556f, 0.043269f }, // 869 (9 2 5 0) + { 10.333333f, 4.333333f, 0.666667f, 0.022556f }, // 870 (9 3 0 4) + { 10.444445f, 3.777778f, 0.888889f, 0.025862f }, // 871 (9 3 1 3) + { 10.555555f, 3.222222f, 1.111111f, 0.030508f }, // 872 (9 3 2 2) + { 10.666667f, 2.666667f, 1.333333f, 0.037500f }, // 873 (9 3 3 1) + { 10.777778f, 2.111111f, 1.555556f, 0.049180f }, // 874 (9 3 4 0) + { 10.777778f, 3.444444f, 0.888889f, 0.027523f }, // 875 (9 4 0 3) + { 10.888889f, 2.888889f, 1.111111f, 0.033088f }, // 876 (9 4 1 2) + { 11.000000f, 2.333333f, 1.333333f, 0.041860f }, // 877 (9 4 2 1) + { 11.111111f, 1.777778f, 1.555556f, 0.057692f }, // 878 (9 4 3 0) + { 11.222222f, 2.555556f, 1.111111f, 0.036437f }, // 879 (9 5 0 2) + { 11.333333f, 2.000000f, 1.333333f, 0.047872f }, // 880 (9 5 1 1) + { 11.444445f, 1.444444f, 1.555556f, 0.070866f }, // 881 (9 5 2 0) + { 11.666667f, 1.666667f, 1.333333f, 0.056604f }, // 882 (9 6 0 1) + { 11.777778f, 1.111111f, 1.555556f, 0.093750f }, // 883 (9 6 1 0) + { 12.111111f, 0.777778f, 1.555556f, 0.142857f }, // 884 (9 7 0 0) + { 10.000000f, 6.000000f, 0.000000f, 0.016667f }, // 885 (10 0 0 6) + { 10.111111f, 5.444445f, 0.222222f, 0.018182f }, // 886 (10 0 1 5) + { 10.222222f, 4.888889f, 0.444444f, 0.020089f }, // 887 (10 0 2 4) + { 10.333333f, 4.333333f, 0.666667f, 0.022556f }, // 888 (10 0 3 3) + { 10.444445f, 3.777778f, 0.888889f, 0.025862f }, // 889 (10 0 4 2) + { 10.555555f, 3.222222f, 1.111111f, 0.030508f }, // 890 (10 0 5 1) + { 10.666667f, 2.666667f, 1.333333f, 0.037500f }, // 891 (10 0 6 0) + { 10.444445f, 5.111111f, 0.222222f, 0.018750f }, // 892 (10 1 0 5) + { 10.555555f, 4.555555f, 0.444444f, 0.020882f }, // 893 (10 1 1 4) + { 10.666667f, 4.000000f, 0.666667f, 0.023684f }, // 894 (10 1 2 3) + { 10.777778f, 3.444444f, 0.888889f, 0.027523f }, // 895 (10 1 3 2) + { 10.888889f, 2.888889f, 1.111111f, 0.033088f }, // 896 (10 1 4 1) + { 11.000000f, 2.333333f, 1.333333f, 0.041860f }, // 897 (10 1 5 0) + { 10.888889f, 4.222222f, 0.444444f, 0.021845f }, // 898 (10 2 0 4) + { 11.000000f, 3.666667f, 0.666667f, 0.025070f }, // 899 (10 2 1 3) + { 11.111111f, 3.111111f, 0.888889f, 0.029605f }, // 900 (10 2 2 2) + { 11.222222f, 2.555556f, 1.111111f, 0.036437f }, // 901 (10 2 3 1) + { 11.333333f, 2.000000f, 1.333333f, 0.047872f }, // 902 (10 2 4 0) + { 11.333333f, 3.333333f, 0.666667f, 0.026786f }, // 903 (10 3 0 3) + { 11.444445f, 2.777778f, 0.888889f, 0.032258f }, // 904 (10 3 1 2) + { 11.555555f, 2.222222f, 1.111111f, 0.040909f }, // 905 (10 3 2 1) + { 11.666667f, 1.666667f, 1.333333f, 0.056604f }, // 906 (10 3 3 0) + { 11.777778f, 2.444444f, 0.888889f, 0.035714f }, // 907 (10 4 0 2) + { 11.888889f, 1.888889f, 1.111111f, 0.047120f }, // 908 (10 4 1 1) + { 12.000000f, 1.333333f, 1.333333f, 0.070313f }, // 909 (10 4 2 0) + { 12.222222f, 1.555556f, 1.111111f, 0.056250f }, // 910 (10 5 0 1) + { 12.333333f, 1.000000f, 1.333333f, 0.094737f }, // 911 (10 5 1 0) + { 12.666667f, 0.666667f, 1.333333f, 0.150000f }, // 912 (10 6 0 0) + { 11.000000f, 5.000000f, 0.000000f, 0.018182f }, // 913 (11 0 0 5) + { 11.111111f, 4.444445f, 0.222222f, 0.020270f }, // 914 (11 0 1 4) + { 11.222222f, 3.888889f, 0.444444f, 0.023018f }, // 915 (11 0 2 3) + { 11.333333f, 3.333333f, 0.666667f, 0.026786f }, // 916 (11 0 3 2) + { 11.444445f, 2.777778f, 0.888889f, 0.032258f }, // 917 (11 0 4 1) + { 11.555555f, 2.222222f, 1.111111f, 0.040909f }, // 918 (11 0 5 0) + { 11.444445f, 4.111111f, 0.222222f, 0.021277f }, // 919 (11 1 0 4) + { 11.555555f, 3.555556f, 0.444444f, 0.024457f }, // 920 (11 1 1 3) + { 11.666667f, 3.000000f, 0.666667f, 0.028939f }, // 921 (11 1 2 2) + { 11.777778f, 2.444444f, 0.888889f, 0.035714f }, // 922 (11 1 3 1) + { 11.888889f, 1.888889f, 1.111111f, 0.047120f }, // 923 (11 1 4 0) + { 11.888889f, 3.222222f, 0.444444f, 0.026239f }, // 924 (11 2 0 3) + { 12.000000f, 2.666667f, 0.666667f, 0.031690f }, // 925 (11 2 1 2) + { 12.111111f, 2.111111f, 0.888889f, 0.040359f }, // 926 (11 2 2 1) + { 12.222222f, 1.555556f, 1.111111f, 0.056250f }, // 927 (11 2 3 0) + { 12.333333f, 2.333333f, 0.666667f, 0.035294f }, // 928 (11 3 0 2) + { 12.444445f, 1.777778f, 0.888889f, 0.046875f }, // 929 (11 3 1 1) + { 12.555555f, 1.222222f, 1.111111f, 0.070866f }, // 930 (11 3 2 0) + { 12.777778f, 1.444444f, 0.888889f, 0.056604f }, // 931 (11 4 0 1) + { 12.888889f, 0.888889f, 1.111111f, 0.097826f }, // 932 (11 4 1 0) + { 13.222222f, 0.555556f, 1.111111f, 0.163636f }, // 933 (11 5 0 0) + { 12.000000f, 4.000000f, 0.000000f, 0.020833f }, // 934 (12 0 0 4) + { 12.111111f, 3.444444f, 0.222222f, 0.024000f }, // 935 (12 0 1 3) + { 12.222222f, 2.888889f, 0.444444f, 0.028481f }, // 936 (12 0 2 2) + { 12.333333f, 2.333333f, 0.666667f, 0.035294f }, // 937 (12 0 3 1) + { 12.444445f, 1.777778f, 0.888889f, 0.046875f }, // 938 (12 0 4 0) + { 12.444445f, 3.111111f, 0.222222f, 0.025862f }, // 939 (12 1 0 3) + { 12.555555f, 2.555556f, 0.444444f, 0.031359f }, // 940 (12 1 1 2) + { 12.666667f, 2.000000f, 0.666667f, 0.040179f }, // 941 (12 1 2 1) + { 12.777778f, 1.444444f, 0.888889f, 0.056604f }, // 942 (12 1 3 0) + { 12.888889f, 2.222222f, 0.444444f, 0.035156f }, // 943 (12 2 0 2) + { 13.000000f, 1.666667f, 0.666667f, 0.047120f }, // 944 (12 2 1 1) + { 13.111111f, 1.111111f, 0.888889f, 0.072581f }, // 945 (12 2 2 0) + { 13.333333f, 1.333333f, 0.666667f, 0.057692f }, // 946 (12 3 0 1) + { 13.444445f, 0.777778f, 0.888889f, 0.103448f }, // 947 (12 3 1 0) + { 13.777778f, 0.444444f, 0.888889f, 0.187500f }, // 948 (12 4 0 0) + { 13.000000f, 3.000000f, 0.000000f, 0.025641f }, // 949 (13 0 0 3) + { 13.111111f, 2.444444f, 0.222222f, 0.031250f }, // 950 (13 0 1 2) + { 13.222222f, 1.888889f, 0.444444f, 0.040359f }, // 951 (13 0 2 1) + { 13.333333f, 1.333333f, 0.666667f, 0.057692f }, // 952 (13 0 3 0) + { 13.444445f, 2.111111f, 0.222222f, 0.035294f }, // 953 (13 1 0 2) + { 13.555555f, 1.555556f, 0.444444f, 0.047872f }, // 954 (13 1 1 1) + { 13.666667f, 1.000000f, 0.666667f, 0.075630f }, // 955 (13 1 2 0) + { 13.888889f, 1.222222f, 0.444444f, 0.059603f }, // 956 (13 2 0 1) + { 14.000000f, 0.666667f, 0.666667f, 0.112500f }, // 957 (13 2 1 0) + { 14.333333f, 0.333333f, 0.666667f, 0.230769f }, // 958 (13 3 0 0) + { 14.000000f, 2.000000f, 0.000000f, 0.035714f }, // 959 (14 0 0 2) + { 14.111111f, 1.444444f, 0.222222f, 0.049180f }, // 960 (14 0 1 1) + { 14.222222f, 0.888889f, 0.444444f, 0.080357f }, // 961 (14 0 2 0) + { 14.444445f, 1.111111f, 0.222222f, 0.062500f }, // 962 (14 1 0 1) + { 14.555555f, 0.555556f, 0.444444f, 0.126761f }, // 963 (14 1 1 0) + { 14.888889f, 0.222222f, 0.444444f, 0.321429f }, // 964 (14 2 0 0) + { 15.000000f, 1.000000f, 0.000000f, 0.066667f }, // 965 (15 0 0 1) + { 15.111111f, 0.444444f, 0.222222f, 0.150000f }, // 966 (15 0 1 0) + { 15.444445f, 0.111111f, 0.222222f, 0.600000f }, // 967 (15 1 0 0) + { 16.000000f, 0.000000f, 0.000000f, FLT_MAX }, // 968 (16 0 0 0) +}; // 969 four cluster elements + Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/maths.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/maths.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/maths.h @@ -23,14 +23,22 @@ -------------------------------------------------------------------------- */ -#ifndef SQUISH_MATHS_H -#define SQUISH_MATHS_H +#ifndef NV_SQUISH_MATHS_H +#define NV_SQUISH_MATHS_H + +#if NV_USE_ALTIVEC +#undef vector +#endif #include #include #include "config.h" -namespace squish { +#if NV_USE_ALTIVEC +#define vector __vector +#endif + +namespace nvsquish { class Vec3 { @@ -234,6 +242,6 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights, Vec3::Arg metric ); Vec3 ComputePrincipleComponent( Sym3x3 const& matrix ); -} // namespace squish +} // namespace nvsquish #endif // ndef SQUISH_MATHS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/maths.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/maths.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/maths.cpp @@ -27,7 +27,7 @@ #include "simd.h" #include -namespace squish { +namespace nvsquish { Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights, Vec3::Arg metric ) { @@ -134,4 +134,4 @@ #endif -} // namespace squish +} // namespace nvsquish Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/simd.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/simd.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/simd.h @@ -23,8 +23,8 @@ -------------------------------------------------------------------------- */ -#ifndef SQUISH_SIMD_H -#define SQUISH_SIMD_H +#ifndef NV_SQUISH_SIMD_H +#define NV_SQUISH_SIMD_H #include "maths.h" Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/simd_sse.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/simd_sse.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/simd_sse.h @@ -23,8 +23,8 @@ -------------------------------------------------------------------------- */ -#ifndef SQUISH_SIMD_SSE_H -#define SQUISH_SIMD_SSE_H +#ifndef NV_SQUISH_SIMD_SSE_H +#define NV_SQUISH_SIMD_SSE_H #include #if ( SQUISH_USE_SSE > 1 ) @@ -35,7 +35,7 @@ #define SQUISH_SSE_SPLAT( a ) \ ( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) ) -namespace squish { +namespace nvsquish { #define VEC4_CONST( X ) Vec4( _mm_set1_ps( X ) ) @@ -72,6 +72,13 @@ _mm_store_ps( c, m_v ); return Vec3( c[0], c[1], c[2] ); } + + float GetX() const + { + SQUISH_ALIGN_16 float f; + _mm_store_ss(&f, m_v); + return f; + } Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); } Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); } Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/simd_ve.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/simd_ve.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/simd_ve.h @@ -1,6 +1,7 @@ /* ----------------------------------------------------------------------------- Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + Copyright (c) 2016 Raptor Engineering, LLC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -26,12 +27,14 @@ #ifndef SQUISH_SIMD_VE_H #define SQUISH_SIMD_VE_H +#ifndef __APPLE_ALTIVEC__ #include #undef bool +#endif -namespace squish { +namespace nvsquish { -#define VEC4_CONST( X ) Vec4( ( vector float )( X ) ) +#define VEC4_CONST( X ) Vec4( vec_splats( (float)X ) ) class Vec4 { @@ -76,7 +79,14 @@ u.v = m_v; return Vec3( u.c[0], u.c[1], u.c[2] ); } - + + float GetX() const + { + union { vector float v; float c[4]; } u; + u.v = m_v; + return u.c[0]; + } + Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); } Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); } Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); } @@ -96,7 +106,7 @@ Vec4& operator*=( Arg v ) { - m_v = vec_madd( m_v, v.m_v, ( vector float )( -0.0f ) ); + m_v = vec_madd( m_v, v.m_v, vec_splats( -0.0f ) ); return *this; } @@ -112,7 +122,7 @@ friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right ) { - return Vec4( vec_madd( left.m_v, right.m_v, ( vector float )( -0.0f ) ) ); + return Vec4( vec_madd( left.m_v, right.m_v, vec_splats( -0.0f ) ) ); } //! Returns a*b + c @@ -133,7 +143,7 @@ vector float estimate = vec_re( v.m_v ); // one round of Newton-Rhaphson refinement - vector float diff = vec_nmsub( estimate, v.m_v, ( vector float )( 1.0f ) ); + vector float diff = vec_nmsub( estimate, v.m_v, vec_splats( 1.0f ) ); return Vec4( vec_madd( diff, estimate, estimate ) ); } Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/squish.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/squish.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/squish.h @@ -23,11 +23,11 @@ -------------------------------------------------------------------------- */ -#ifndef SQUISH_H -#define SQUISH_H +#ifndef NV_SQUISH_H +#define NV_SQUISH_H //! All squish API functions live in this namespace. -namespace squish { +namespace nvsquish { // ----------------------------------------------------------------------------- Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/squish.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/squish.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/squish.cpp @@ -23,7 +23,7 @@ -------------------------------------------------------------------------- */ -#include +#include "squish.h" #include "colourset.h" #include "maths.h" #include "rangefit.h" Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/squish.xcodeproj/project.pbxproj =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/squish.xcodeproj/project.pbxproj +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/squish.xcodeproj/project.pbxproj @@ -1,531 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 42; - objects = { - -/* Begin PBXBuildFile section */ - 133FA0DC096A7B8E0050752E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 133FA0DA096A7B8E0050752E /* alpha.h */; }; - 133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 133FA0DB096A7B8E0050752E /* alpha.cpp */; }; - 1342B4160999DF1900152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; }; - 1342B41A0999DF7000152915 /* squishpng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B4190999DF7000152915 /* squishpng.cpp */; }; - 1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1342B43E0999E0CC00152915 /* squishtest.cpp */; }; - 1342B4420999E0EC00152915 /* libsquish.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC046055464E500DB518D /* libsquish.a */; }; - 1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70B092AA857005EE038 /* clusterfit.cpp */; }; - 1350D71B092AA858005EE038 /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D70C092AA858005EE038 /* clusterfit.h */; }; - 1350D71E092AA858005EE038 /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D70F092AA858005EE038 /* colourblock.cpp */; }; - 1350D71F092AA858005EE038 /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D710092AA858005EE038 /* colourblock.h */; }; - 1350D720092AA858005EE038 /* config.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D711092AA858005EE038 /* config.h */; }; - 1350D721092AA858005EE038 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D712092AA858005EE038 /* maths.cpp */; }; - 1350D722092AA858005EE038 /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D713092AA858005EE038 /* maths.h */; }; - 1350D725092AA858005EE038 /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D716092AA858005EE038 /* rangefit.cpp */; }; - 1350D726092AA858005EE038 /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D717092AA858005EE038 /* rangefit.h */; }; - 1350D727092AA858005EE038 /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1350D718092AA858005EE038 /* squish.cpp */; }; - 1350D728092AA858005EE038 /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 1350D719092AA858005EE038 /* squish.h */; settings = {ATTRIBUTES = (Public, ); }; }; - 139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C21CE09ADAB0800A2500D /* squishgen.cpp */; }; - 139C234F09B0602700A2500D /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 139C234D09B0602700A2500D /* singlecolourfit.h */; }; - 139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 139C234E09B0602700A2500D /* singlecolourfit.cpp */; }; - 13A7CCA40952BE63001C963A /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 13A7CCA20952BE63001C963A /* colourfit.h */; }; - 13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13A7CCA30952BE63001C963A /* colourfit.cpp */; }; - 13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 13C4C7AB0941C18000AC5B89 /* colourset.cpp */; }; - 13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 13C4C7AC0941C18000AC5B89 /* colourset.h */; }; - 13CD64C2092BCF8A00488C97 /* simd.h in Headers */ = {isa = PBXBuildFile; fileRef = 13CD64C0092BCF8A00488C97 /* simd.h */; }; - 13D0DC910931F93A00909807 /* simd_ve.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC900931F93A00909807 /* simd_ve.h */; }; - 13D0DC970931F9D600909807 /* simd_sse.h in Headers */ = {isa = PBXBuildFile; fileRef = 13D0DC960931F9D600909807 /* simd_sse.h */; }; -/* End PBXBuildFile section */ - -/* Begin PBXContainerItemProxy section */ - 1342B52B099BF72F00152915 /* PBXContainerItemProxy */ = { - isa = PBXContainerItemProxy; - containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; - proxyType = 1; - remoteGlobalIDString = D2AAC045055464E500DB518D; - remoteInfo = squish; - }; - 1342B58E099BF93D00152915 /* PBXContainerItemProxy */ = { - isa = PBXContainerItemProxy; - containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */; - proxyType = 1; - remoteGlobalIDString = D2AAC045055464E500DB518D; - remoteInfo = squish; - }; -/* End PBXContainerItemProxy section */ - -/* Begin PBXFileReference section */ - 133FA0DA096A7B8E0050752E /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = ""; }; - 133FA0DB096A7B8E0050752E /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = ""; }; - 1342B4110999DE7F00152915 /* squishpng */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishpng; sourceTree = BUILT_PRODUCTS_DIR; }; - 1342B4190999DF7000152915 /* squishpng.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = squishpng.cpp; path = extra/squishpng.cpp; sourceTree = ""; }; - 1342B4370999E07C00152915 /* squishtest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishtest; sourceTree = BUILT_PRODUCTS_DIR; }; - 1342B43E0999E0CC00152915 /* squishtest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishtest.cpp; path = extra/squishtest.cpp; sourceTree = ""; }; - 1350D70B092AA857005EE038 /* clusterfit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfit.cpp; sourceTree = ""; }; - 1350D70C092AA858005EE038 /* clusterfit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = clusterfit.h; sourceTree = ""; }; - 1350D70F092AA858005EE038 /* colourblock.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourblock.cpp; sourceTree = ""; }; - 1350D710092AA858005EE038 /* colourblock.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourblock.h; sourceTree = ""; }; - 1350D711092AA858005EE038 /* config.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; - 1350D712092AA858005EE038 /* maths.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = ""; }; - 1350D713092AA858005EE038 /* maths.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = ""; }; - 1350D716092AA858005EE038 /* rangefit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = rangefit.cpp; sourceTree = ""; }; - 1350D717092AA858005EE038 /* rangefit.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = rangefit.h; sourceTree = ""; }; - 1350D718092AA858005EE038 /* squish.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = squish.cpp; sourceTree = ""; }; - 1350D719092AA858005EE038 /* squish.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = squish.h; sourceTree = ""; }; - 13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = texture_compression_s3tc.txt; sourceTree = ""; }; - 139C21C409ADAA7000A2500D /* squishgen */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = squishgen; sourceTree = BUILT_PRODUCTS_DIR; }; - 139C21CE09ADAB0800A2500D /* squishgen.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = squishgen.cpp; path = extra/squishgen.cpp; sourceTree = ""; }; - 139C234D09B0602700A2500D /* singlecolourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = singlecolourfit.h; sourceTree = ""; }; - 139C234E09B0602700A2500D /* singlecolourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = singlecolourfit.cpp; sourceTree = ""; }; - 139C236D09B060A900A2500D /* singlecolourlookup.inl */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = singlecolourlookup.inl; sourceTree = ""; }; - 13A7CCA20952BE63001C963A /* colourfit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = colourfit.h; sourceTree = ""; }; - 13A7CCA30952BE63001C963A /* colourfit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = colourfit.cpp; sourceTree = ""; }; - 13C4C7AB0941C18000AC5B89 /* colourset.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = colourset.cpp; sourceTree = ""; }; - 13C4C7AC0941C18000AC5B89 /* colourset.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = colourset.h; sourceTree = ""; }; - 13CD64C0092BCF8A00488C97 /* simd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd.h; sourceTree = ""; }; - 13D0DC900931F93A00909807 /* simd_ve.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_ve.h; sourceTree = ""; }; - 13D0DC960931F9D600909807 /* simd_sse.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simd_sse.h; sourceTree = ""; }; - D2AAC046055464E500DB518D /* libsquish.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsquish.a; sourceTree = BUILT_PRODUCTS_DIR; }; -/* End PBXFileReference section */ - -/* Begin PBXFrameworksBuildPhase section */ - 1342B40F0999DE7F00152915 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - 1342B4160999DF1900152915 /* libsquish.a in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - 1342B4350999E07C00152915 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - 1342B4420999E0EC00152915 /* libsquish.a in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - 139C21C209ADAA7000A2500D /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D289987405E68DCB004EDB86 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXFrameworksBuildPhase section */ - -/* Begin PBXGroup section */ - 08FB7794FE84155DC02AAC07 /* squish */ = { - isa = PBXGroup; - children = ( - 08FB7795FE84155DC02AAC07 /* Source */, - C6A0FF2B0290797F04C91782 /* Documentation */, - 1AB674ADFE9D54B511CA2CBB /* Products */, - ); - name = squish; - sourceTree = ""; - }; - 08FB7795FE84155DC02AAC07 /* Source */ = { - isa = PBXGroup; - children = ( - 133FA0DB096A7B8E0050752E /* alpha.cpp */, - 133FA0DA096A7B8E0050752E /* alpha.h */, - 1350D70B092AA857005EE038 /* clusterfit.cpp */, - 1350D70C092AA858005EE038 /* clusterfit.h */, - 13A7CCA30952BE63001C963A /* colourfit.cpp */, - 13A7CCA20952BE63001C963A /* colourfit.h */, - 13C4C7AB0941C18000AC5B89 /* colourset.cpp */, - 13C4C7AC0941C18000AC5B89 /* colourset.h */, - 1350D70F092AA858005EE038 /* colourblock.cpp */, - 1350D710092AA858005EE038 /* colourblock.h */, - 13906CE3096938880000A6A7 /* texture_compression_s3tc.txt */, - 1350D711092AA858005EE038 /* config.h */, - 1350D712092AA858005EE038 /* maths.cpp */, - 1350D713092AA858005EE038 /* maths.h */, - 1350D716092AA858005EE038 /* rangefit.cpp */, - 1350D717092AA858005EE038 /* rangefit.h */, - 13CD64C0092BCF8A00488C97 /* simd.h */, - 13D0DC960931F9D600909807 /* simd_sse.h */, - 13D0DC900931F93A00909807 /* simd_ve.h */, - 139C234E09B0602700A2500D /* singlecolourfit.cpp */, - 139C234D09B0602700A2500D /* singlecolourfit.h */, - 139C236D09B060A900A2500D /* singlecolourlookup.inl */, - 1350D718092AA858005EE038 /* squish.cpp */, - 1350D719092AA858005EE038 /* squish.h */, - 139C21CE09ADAB0800A2500D /* squishgen.cpp */, - 1342B4190999DF7000152915 /* squishpng.cpp */, - 1342B43E0999E0CC00152915 /* squishtest.cpp */, - ); - name = Source; - sourceTree = ""; - }; - 1AB674ADFE9D54B511CA2CBB /* Products */ = { - isa = PBXGroup; - children = ( - D2AAC046055464E500DB518D /* libsquish.a */, - 1342B4110999DE7F00152915 /* squishpng */, - 1342B4370999E07C00152915 /* squishtest */, - 139C21C409ADAA7000A2500D /* squishgen */, - ); - name = Products; - sourceTree = ""; - }; - C6A0FF2B0290797F04C91782 /* Documentation */ = { - isa = PBXGroup; - children = ( - ); - name = Documentation; - sourceTree = ""; - }; -/* End PBXGroup section */ - -/* Begin PBXHeadersBuildPhase section */ - D2AAC043055464E500DB518D /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - 1350D71B092AA858005EE038 /* clusterfit.h in Headers */, - 1350D71F092AA858005EE038 /* colourblock.h in Headers */, - 1350D720092AA858005EE038 /* config.h in Headers */, - 1350D722092AA858005EE038 /* maths.h in Headers */, - 1350D726092AA858005EE038 /* rangefit.h in Headers */, - 1350D728092AA858005EE038 /* squish.h in Headers */, - 13CD64C2092BCF8A00488C97 /* simd.h in Headers */, - 13D0DC910931F93A00909807 /* simd_ve.h in Headers */, - 13D0DC970931F9D600909807 /* simd_sse.h in Headers */, - 13C4C7AE0941C18000AC5B89 /* colourset.h in Headers */, - 13A7CCA40952BE63001C963A /* colourfit.h in Headers */, - 133FA0DC096A7B8E0050752E /* alpha.h in Headers */, - 139C234F09B0602700A2500D /* singlecolourfit.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXHeadersBuildPhase section */ - -/* Begin PBXNativeTarget section */ - 1342B4100999DE7F00152915 /* squishpng */ = { - isa = PBXNativeTarget; - buildConfigurationList = 1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */; - buildPhases = ( - 1342B40E0999DE7F00152915 /* Sources */, - 1342B40F0999DE7F00152915 /* Frameworks */, - ); - buildRules = ( - ); - dependencies = ( - 1342B58F099BF93D00152915 /* PBXTargetDependency */, - ); - name = squishpng; - productName = squishpng; - productReference = 1342B4110999DE7F00152915 /* squishpng */; - productType = "com.apple.product-type.tool"; - }; - 1342B4360999E07C00152915 /* squishtest */ = { - isa = PBXNativeTarget; - buildConfigurationList = 1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */; - buildPhases = ( - 1342B4340999E07C00152915 /* Sources */, - 1342B4350999E07C00152915 /* Frameworks */, - ); - buildRules = ( - ); - dependencies = ( - 1342B52C099BF72F00152915 /* PBXTargetDependency */, - ); - name = squishtest; - productName = squishtest; - productReference = 1342B4370999E07C00152915 /* squishtest */; - productType = "com.apple.product-type.tool"; - }; - 139C21C309ADAA7000A2500D /* squishgen */ = { - isa = PBXNativeTarget; - buildConfigurationList = 139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */; - buildPhases = ( - 139C21C109ADAA7000A2500D /* Sources */, - 139C21C209ADAA7000A2500D /* Frameworks */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = squishgen; - productName = squishgen; - productReference = 139C21C409ADAA7000A2500D /* squishgen */; - productType = "com.apple.product-type.tool"; - }; - D2AAC045055464E500DB518D /* squish */ = { - isa = PBXNativeTarget; - buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */; - buildPhases = ( - D2AAC043055464E500DB518D /* Headers */, - D2AAC044055464E500DB518D /* Sources */, - D289987405E68DCB004EDB86 /* Frameworks */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = squish; - productName = squish; - productReference = D2AAC046055464E500DB518D /* libsquish.a */; - productType = "com.apple.product-type.library.static"; - }; -/* End PBXNativeTarget section */ - -/* Begin PBXProject section */ - 08FB7793FE84155DC02AAC07 /* Project object */ = { - isa = PBXProject; - buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */; - hasScannedForEncodings = 1; - mainGroup = 08FB7794FE84155DC02AAC07 /* squish */; - projectDirPath = ""; - targets = ( - D2AAC045055464E500DB518D /* squish */, - 1342B4100999DE7F00152915 /* squishpng */, - 1342B4360999E07C00152915 /* squishtest */, - 139C21C309ADAA7000A2500D /* squishgen */, - ); - }; -/* End PBXProject section */ - -/* Begin PBXSourcesBuildPhase section */ - 1342B40E0999DE7F00152915 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 1342B41A0999DF7000152915 /* squishpng.cpp in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - 1342B4340999E07C00152915 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 1342B43F0999E0CC00152915 /* squishtest.cpp in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - 139C21C109ADAA7000A2500D /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 139C21CF09ADAB0800A2500D /* squishgen.cpp in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D2AAC044055464E500DB518D /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 1350D71A092AA858005EE038 /* clusterfit.cpp in Sources */, - 1350D71E092AA858005EE038 /* colourblock.cpp in Sources */, - 1350D721092AA858005EE038 /* maths.cpp in Sources */, - 1350D725092AA858005EE038 /* rangefit.cpp in Sources */, - 1350D727092AA858005EE038 /* squish.cpp in Sources */, - 13C4C7AD0941C18000AC5B89 /* colourset.cpp in Sources */, - 13A7CCA50952BE63001C963A /* colourfit.cpp in Sources */, - 133FA0DD096A7B8E0050752E /* alpha.cpp in Sources */, - 139C235009B0602700A2500D /* singlecolourfit.cpp in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXSourcesBuildPhase section */ - -/* Begin PBXTargetDependency section */ - 1342B52C099BF72F00152915 /* PBXTargetDependency */ = { - isa = PBXTargetDependency; - target = D2AAC045055464E500DB518D /* squish */; - targetProxy = 1342B52B099BF72F00152915 /* PBXContainerItemProxy */; - }; - 1342B58F099BF93D00152915 /* PBXTargetDependency */ = { - isa = PBXTargetDependency; - target = D2AAC045055464E500DB518D /* squish */; - targetProxy = 1342B58E099BF93D00152915 /* PBXContainerItemProxy */; - }; -/* End PBXTargetDependency section */ - -/* Begin XCBuildConfiguration section */ - 1342B4140999DE9F00152915 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - HEADER_SEARCH_PATHS = ( - .., - /sw/include, - ); - INSTALL_PATH = "$(HOME)/bin"; - LIBRARY_SEARCH_PATHS = /sw/lib; - OTHER_LDFLAGS = "-lpng"; - PRODUCT_NAME = squishpng; - }; - name = Debug; - }; - 1342B4150999DE9F00152915 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - HEADER_SEARCH_PATHS = ( - .., - /sw/include, - ); - INSTALL_PATH = "$(HOME)/bin"; - LIBRARY_SEARCH_PATHS = /sw/lib; - OTHER_LDFLAGS = "-lpng"; - PRODUCT_NAME = squishpng; - }; - name = Release; - }; - 1342B43C0999E0C000152915 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - HEADER_SEARCH_PATHS = ..; - INSTALL_PATH = "$(HOME)/bin"; - PRODUCT_NAME = squishtest; - }; - name = Debug; - }; - 1342B43D0999E0C000152915 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - HEADER_SEARCH_PATHS = ..; - INSTALL_PATH = "$(HOME)/bin"; - PRODUCT_NAME = squishtest; - }; - name = Release; - }; - 139C21CC09ADAB0300A2500D /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - HEADER_SEARCH_PATHS = ..; - INSTALL_PATH = "$(HOME)/bin"; - PRODUCT_NAME = squishgen; - }; - name = Debug; - }; - 139C21CD09ADAB0300A2500D /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - HEADER_SEARCH_PATHS = ..; - INSTALL_PATH = "$(HOME)/bin"; - PRODUCT_NAME = squishgen; - }; - name = Release; - }; - 1DEB91EC08733DB70010E9CD /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1"; - INSTALL_PATH = /usr/local/lib; - OTHER_CFLAGS = "-maltivec"; - PRODUCT_NAME = squish; - STRIP_INSTALLED_PRODUCT = NO; - }; - name = Debug; - }; - 1DEB91ED08733DB70010E9CD /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = "SQUISH_USE_ALTIVEC=1"; - INSTALL_PATH = /usr/local/lib; - OTHER_CFLAGS = "-maltivec"; - PRODUCT_NAME = squish; - STRIP_INSTALLED_PRODUCT = YES; - }; - name = Release; - }; - 1DEB91F008733DB70010E9CD /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - GCC_DYNAMIC_NO_PIC = YES; - GCC_OPTIMIZATION_LEVEL = 0; - GCC_TREAT_WARNINGS_AS_ERRORS = YES; - GCC_WARN_ABOUT_MISSING_NEWLINE = YES; - GCC_WARN_ABOUT_RETURN_TYPE = YES; - GCC_WARN_PEDANTIC = YES; - GCC_WARN_SHADOW = YES; - GCC_WARN_SIGN_COMPARE = YES; - GCC_WARN_UNUSED_PARAMETER = YES; - GCC_WARN_UNUSED_VALUE = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - PREBINDING = NO; - SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk; - }; - name = Debug; - }; - 1DEB91F108733DB70010E9CD /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - GCC_DYNAMIC_NO_PIC = YES; - GCC_OPTIMIZATION_LEVEL = 3; - GCC_TREAT_WARNINGS_AS_ERRORS = YES; - GCC_UNROLL_LOOPS = YES; - GCC_WARN_ABOUT_MISSING_NEWLINE = YES; - GCC_WARN_ABOUT_RETURN_TYPE = YES; - GCC_WARN_PEDANTIC = YES; - GCC_WARN_SHADOW = YES; - GCC_WARN_SIGN_COMPARE = YES; - GCC_WARN_UNUSED_PARAMETER = YES; - GCC_WARN_UNUSED_VALUE = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - PREBINDING = NO; - SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk; - }; - name = Release; - }; -/* End XCBuildConfiguration section */ - -/* Begin XCConfigurationList section */ - 1342B4130999DE9F00152915 /* Build configuration list for PBXNativeTarget "squishpng" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 1342B4140999DE9F00152915 /* Debug */, - 1342B4150999DE9F00152915 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - 1342B43B0999E0C000152915 /* Build configuration list for PBXNativeTarget "squishtest" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 1342B43C0999E0C000152915 /* Debug */, - 1342B43D0999E0C000152915 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - 139C21CB09ADAB0300A2500D /* Build configuration list for PBXNativeTarget "squishgen" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 139C21CC09ADAB0300A2500D /* Debug */, - 139C21CD09ADAB0300A2500D /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "squish" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 1DEB91EC08733DB70010E9CD /* Debug */, - 1DEB91ED08733DB70010E9CD /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "squish" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 1DEB91F008733DB70010E9CD /* Debug */, - 1DEB91F108733DB70010E9CD /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; -/* End XCConfigurationList section */ - }; - rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; -} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/texture_compression_s3tc.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/texture_compression_s3tc.txt +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/texture_compression_s3tc.txt @@ -1,508 +0,0 @@ -Name - - EXT_texture_compression_s3tc - -Name Strings - - GL_EXT_texture_compression_s3tc - -Contact - - Pat Brown, NVIDIA Corporation (pbrown 'at' nvidia.com) - -Status - - FINAL - -Version - - 1.1, 16 November 2001 (containing only clarifications relative to - version 1.0, dated 7 July 2000) - -Number - - 198 - -Dependencies - - OpenGL 1.1 is required. - - GL_ARB_texture_compression is required. - - This extension is written against the OpenGL 1.2.1 Specification. - -Overview - - This extension provides additional texture compression functionality - specific to S3's S3TC format (called DXTC in Microsoft's DirectX API), - subject to all the requirements and limitations described by the extension - GL_ARB_texture_compression. - - This extension supports DXT1, DXT3, and DXT5 texture compression formats. - For the DXT1 image format, this specification supports an RGB-only mode - and a special RGBA mode with single-bit "transparent" alpha. - -IP Status - - Contact S3 Incorporated (http://www.s3.com) regarding any intellectual - property issues associated with implementing this extension. - - WARNING: Vendors able to support S3TC texture compression in Direct3D - drivers do not necessarily have the right to use the same functionality in - OpenGL. - -Issues - - (1) Should DXT2 and DXT4 (premultiplied alpha) formats be supported? - - RESOLVED: No -- insufficient interest. Supporting DXT2 and DXT4 - would require some rework to the TexEnv definition (maybe add a new - base internal format RGBA_PREMULTIPLIED_ALPHA) for these formats. - Note that the EXT_texture_env_combine extension (which extends normal - TexEnv modes) can be used to support textures with premultipled alpha. - - (2) Should generic "RGB_S3TC_EXT" and "RGBA_S3TC_EXT" enums be supported - or should we use only the DXT enums? - - RESOLVED: No. A generic RGBA_S3TC_EXT is problematic because DXT3 - and DXT5 are both nominally RGBA (and DXT1 with the 1-bit alpha is - also) yet one format must be chosen up front. - - (3) Should TexSubImage support all block-aligned edits or just the minimal - functionality required by the ARB_texture_compression extension? - - RESOLVED: Allow all valid block-aligned edits. - - (4) A pre-compressed image with a DXT1 format can be used as either an - RGB_S3TC_DXT1 or an RGBA_S3TC_DXT1 image. If the image has - transparent texels, how are they treated in each format? - - RESOLVED: The renderer has to make sure that an RGB_S3TC_DXT1 format - is decoded as RGB (where alpha is effectively one for all texels), - while RGBA_S3TC_DXT1 is decoded as RGBA (where alpha is zero for all - texels with "transparent" encodings). Otherwise, the formats are - identical. - - (5) Is the encoding of the RGB components for DXT1 formats correct in this - spec? MSDN documentation does not specify an RGB color for the - "transparent" encoding. Is it really black? - - RESOLVED: Yes. The specification for the DXT1 format initially - required black, but later changed that requirement to a - recommendation. All vendors involved in the definition of this - specification support black. In addition, specifying black has a - useful behavior. - - When blending multiple texels (GL_LINEAR filtering), mixing opaque and - transparent samples is problematic. Defining a black color on - transparent texels achieves a sensible result that works like a - texture with premultiplied alpha. For example, if three opaque white - and one transparent sample is being averaged, the result would be a - 75% intensity gray (with an alpha of 75%). This is the same result on - the color channels as would be obtained using a white color, 75% - alpha, and a SRC_ALPHA blend factor. - - (6) Is the encoding of the RGB components for DXT3 and DXT5 formats - correct in this spec? MSDN documentation suggests that the RGB blocks - for DXT3 and DXT5 are decoded as described by the DXT1 format. - - RESOLVED: Yes -- this appears to be a bug in the MSDN documentation. - The specification for the DXT2-DXT5 formats require decoding using the - opaque block encoding, regardless of the relative values of "color0" - and "color1". - -New Procedures and Functions - - None. - -New Tokens - - Accepted by the parameter of TexImage2D, CopyTexImage2D, - and CompressedTexImage2DARB and the parameter of - CompressedTexSubImage2DARB: - - COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 - COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 - COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 - COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 - -Additions to Chapter 2 of the OpenGL 1.2.1 Specification (OpenGL Operation) - - None. - -Additions to Chapter 3 of the OpenGL 1.2.1 Specification (Rasterization) - - Add to Table 3.16.1: Specific Compressed Internal Formats - - Compressed Internal Format Base Internal Format - ========================== ==================== - COMPRESSED_RGB_S3TC_DXT1_EXT RGB - COMPRESSED_RGBA_S3TC_DXT1_EXT RGBA - COMPRESSED_RGBA_S3TC_DXT3_EXT RGBA - COMPRESSED_RGBA_S3TC_DXT5_EXT RGBA - - - Modify Section 3.8.2, Alternate Image Specification - - (add to end of TexSubImage discussion, p.123 -- after edit from the - ARB_texture_compression spec) - - If the internal format of the texture image being modified is - COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the - texture is stored using one of the several S3TC compressed texture image - formats. Such images are easily edited along 4x4 texel boundaries, so the - limitations on TexSubImage2D or CopyTexSubImage2D parameters are relaxed. - TexSubImage2D and CopyTexSubImage2D will result in an INVALID_OPERATION - error only if one of the following conditions occurs: - - * is not a multiple of four or equal to TEXTURE_WIDTH, - unless and are both zero. - * is not a multiple of four or equal to TEXTURE_HEIGHT, - unless and are both zero. - * or is not a multiple of four. - - The contents of any 4x4 block of texels of an S3TC compressed texture - image that does not intersect the area being modified are preserved during - valid TexSubImage2D and CopyTexSubImage2D calls. - - - Add to Section 3.8.2, Alternate Image Specification (adding to the end of - the CompressedTexImage section introduced by the ARB_texture_compression - spec) - - If is COMPRESSED_RGB_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or - COMPRESSED_RGBA_S3TC_DXT5_EXT, the compressed texture is stored using one - of several S3TC compressed texture image formats. The S3TC texture - compression algorithm supports only 2D images without borders. - CompressedTexImage1DARB and CompressedTexImage3DARB produce an - INVALID_ENUM error if is an S3TC format. - CompressedTexImage2DARB will produce an INVALID_OPERATION error if - is non-zero. - - - Add to Section 3.8.2, Alternate Image Specification (adding to the end of - the CompressedTexSubImage section introduced by the - ARB_texture_compression spec) - - If the internal format of the texture image being modified is - COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT, the - texture is stored using one of the several S3TC compressed texture image - formats. Since the S3TC texture compression algorithm supports only 2D - images, CompressedTexSubImage1DARB and CompressedTexSubImage3DARB produce - an INVALID_ENUM error if is an S3TC format. Since S3TC images - are easily edited along 4x4 texel boundaries, the limitations on - CompressedTexSubImage2D are relaxed. CompressedTexSubImage2D will result - in an INVALID_OPERATION error only if one of the following conditions - occurs: - - * is not a multiple of four or equal to TEXTURE_WIDTH. - * is not a multiple of four or equal to TEXTURE_HEIGHT. - * or is not a multiple of four. - - The contents of any 4x4 block of texels of an S3TC compressed texture - image that does not intersect the area being modified are preserved during - valid TexSubImage2D and CopyTexSubImage2D calls. - -Additions to Chapter 4 of the OpenGL 1.2.1 Specification (Per-Fragment -Operations and the Frame Buffer) - - None. - -Additions to Chapter 5 of the OpenGL 1.2.1 Specification (Special Functions) - - None. - -Additions to Chapter 6 of the OpenGL 1.2.1 Specification (State and -State Requests) - - None. - -Additions to Appendix A of the OpenGL 1.2.1 Specification (Invariance) - - None. - -Additions to the AGL/GLX/WGL Specifications - - None. - -GLX Protocol - - None. - -Errors - - INVALID_ENUM is generated by CompressedTexImage1DARB or - CompressedTexImage3DARB if is - COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT. - - INVALID_OPERATION is generated by CompressedTexImage2DARB if - is COMPRESSED_RGB_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or - COMPRESSED_RGBA_S3TC_DXT5_EXT and is not equal to zero. - - INVALID_ENUM is generated by CompressedTexSubImage1DARB or - CompressedTexSubImage3DARB if is COMPRESSED_RGB_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT3_EXT, or - COMPRESSED_RGBA_S3TC_DXT5_EXT. - - INVALID_OPERATION is generated by TexSubImage2D CopyTexSubImage2D, or - CompressedTexSubImage2D if TEXTURE_INTERNAL_FORMAT is - COMPRESSED_RGB_S3TC_DXT1_EXT, COMPRESSED_RGBA_S3TC_DXT1_EXT, - COMPRESSED_RGBA_S3TC_DXT3_EXT, or COMPRESSED_RGBA_S3TC_DXT5_EXT and any of - the following apply: is not a multiple of four or equal to - TEXTURE_WIDTH; is not a multiple of four or equal to - TEXTURE_HEIGHT; or is not a multiple of four. - - - The following restrictions from the ARB_texture_compression specification - do not apply to S3TC texture formats, since subimage modification is - straightforward as long as the subimage is properly aligned. - - DELETE: INVALID_OPERATION is generated by TexSubImage1D, TexSubImage2D, - DELETE: TexSubImage3D, CopyTexSubImage1D, CopyTexSubImage2D, or - DELETE: CopyTexSubImage3D if the internal format of the texture image is - DELETE: compressed and , , or does not equal - DELETE: -b, where b is value of TEXTURE_BORDER. - - DELETE: INVALID_VALUE is generated by CompressedTexSubImage1DARB, - DELETE: CompressedTexSubImage2DARB, or CompressedTexSubImage3DARB if the - DELETE: entire texture image is not being edited: if , - DELETE: , or is greater than -b, + is - DELETE: less than w+b, + is less than h+b, or - DELETE: + is less than d+b, where b is the value of - DELETE: TEXTURE_BORDER, w is the value of TEXTURE_WIDTH, h is the value of - DELETE: TEXTURE_HEIGHT, and d is the value of TEXTURE_DEPTH. - - See also errors in the GL_ARB_texture_compression specification. - -New State - - In the "Textures" state table, increment the TEXTURE_INTERNAL_FORMAT - subscript for Z by 4 in the "Type" row. - -New Implementation Dependent State - - None - -Appendix - - S3TC Compressed Texture Image Formats - - Compressed texture images stored using the S3TC compressed image formats - are represented as a collection of 4x4 texel blocks, where each block - contains 64 or 128 bits of texel data. The image is encoded as a normal - 2D raster image in which each 4x4 block is treated as a single pixel. If - an S3TC image has a width or height less than four, the data corresponding - to texels outside the image are irrelevant and undefined. - - When an S3TC image with a width of , height of , and block size of - (8 or 16 bytes) is decoded, the corresponding image size (in - bytes) is: - - ceil(/4) * ceil(/4) * blocksize. - - When decoding an S3TC image, the block containing the texel at offset - (, ) begins at an offset (in bytes) relative to the base of the - image of: - - blocksize * (ceil(/4) * floor(/4) + floor(/4)). - - The data corresponding to a specific texel (, ) are extracted from a - 4x4 texel block using a relative (x,y) value of - - ( modulo 4, modulo 4). - - There are four distinct S3TC image formats: - - COMPRESSED_RGB_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64 - bits of RGB image data. - - Each RGB image data block is encoded as a sequence of 8 bytes, called (in - order of increasing address): - - c0_lo, c0_hi, c1_lo, c1_hi, bits_0, bits_1, bits_2, bits_3 - - The 8 bytes of the block are decoded into three quantities: - - color0 = c0_lo + c0_hi * 256 - color1 = c1_lo + c1_hi * 256 - bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * bits_3)) - - color0 and color1 are 16-bit unsigned integers that are unpacked to - RGB colors RGB0 and RGB1 as though they were 16-bit packed pixels with - a of RGB and a type of UNSIGNED_SHORT_5_6_5. - - bits is a 32-bit unsigned integer, from which a two-bit control code - is extracted for a texel at location (x,y) in the block using: - - code(x,y) = bits[2*(4*y+x)+1..2*(4*y+x)+0] - - where bit 31 is the most significant and bit 0 is the least - significant bit. - - The RGB color for a texel at location (x,y) in the block is given by: - - RGB0, if color0 > color1 and code(x,y) == 0 - RGB1, if color0 > color1 and code(x,y) == 1 - (2*RGB0+RGB1)/3, if color0 > color1 and code(x,y) == 2 - (RGB0+2*RGB1)/3, if color0 > color1 and code(x,y) == 3 - - RGB0, if color0 <= color1 and code(x,y) == 0 - RGB1, if color0 <= color1 and code(x,y) == 1 - (RGB0+RGB1)/2, if color0 <= color1 and code(x,y) == 2 - BLACK, if color0 <= color1 and code(x,y) == 3 - - Arithmetic operations are done per component, and BLACK refers to an - RGB color where red, green, and blue are all zero. - - Since this image has an RGB format, there is no alpha component and the - image is considered fully opaque. - - - COMPRESSED_RGBA_S3TC_DXT1_EXT: Each 4x4 block of texels consists of 64 - bits of RGB image data and minimal alpha information. The RGB components - of a texel are extracted in the same way as COMPRESSED_RGB_S3TC_DXT1_EXT. - - The alpha component for a texel at location (x,y) in the block is - given by: - - 0.0, if color0 <= color1 and code(x,y) == 3 - 1.0, otherwise - - IMPORTANT: When encoding an RGBA image into a format using 1-bit - alpha, any texels with an alpha component less than 0.5 end up with an - alpha of 0.0 and any texels with an alpha component greater than or - equal to 0.5 end up with an alpha of 1.0. When encoding an RGBA image - into the COMPRESSED_RGBA_S3TC_DXT1_EXT format, the resulting red, - green, and blue components of any texels with a final alpha of 0.0 - will automatically be zero (black). If this behavior is not desired - by an application, it should not use COMPRESSED_RGBA_S3TC_DXT1_EXT. - This format will never be used when a generic compressed internal - format (Table 3.16.2) is specified, although the nearly identical - format COMPRESSED_RGB_S3TC_DXT1_EXT (above) may be. - - - COMPRESSED_RGBA_S3TC_DXT3_EXT: Each 4x4 block of texels consists of 64 - bits of uncompressed alpha image data followed by 64 bits of RGB image - data. - - Each RGB image data block is encoded according to the - COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code - bits always use the non-transparent encodings. In other words, they are - treated as though color0 > color1, regardless of the actual values of - color0 and color1. - - Each alpha image data block is encoded as a sequence of 8 bytes, called - (in order of increasing address): - - a0, a1, a2, a3, a4, a5, a6, a7 - - The 8 bytes of the block are decoded into one 64-bit integer: - - alpha = a0 + 256 * (a1 + 256 * (a2 + 256 * (a3 + 256 * (a4 + - 256 * (a5 + 256 * (a6 + 256 * a7)))))) - - alpha is a 64-bit unsigned integer, from which a four-bit alpha value - is extracted for a texel at location (x,y) in the block using: - - alpha(x,y) = bits[4*(4*y+x)+3..4*(4*y+x)+0] - - where bit 63 is the most significant and bit 0 is the least - significant bit. - - The alpha component for a texel at location (x,y) in the block is - given by alpha(x,y) / 15. - - - COMPRESSED_RGBA_S3TC_DXT5_EXT: Each 4x4 block of texels consists of 64 - bits of compressed alpha image data followed by 64 bits of RGB image data. - - Each RGB image data block is encoded according to the - COMPRESSED_RGB_S3TC_DXT1_EXT format, with the exception that the two code - bits always use the non-transparent encodings. In other words, they are - treated as though color0 > color1, regardless of the actual values of - color0 and color1. - - Each alpha image data block is encoded as a sequence of 8 bytes, called - (in order of increasing address): - - alpha0, alpha1, bits_0, bits_1, bits_2, bits_3, bits_4, bits_5 - - The alpha0 and alpha1 are 8-bit unsigned bytes converted to alpha - components by multiplying by 1/255. - - The 6 "bits" bytes of the block are decoded into one 48-bit integer: - - bits = bits_0 + 256 * (bits_1 + 256 * (bits_2 + 256 * (bits_3 + - 256 * (bits_4 + 256 * bits_5)))) - - bits is a 48-bit unsigned integer, from which a three-bit control code - is extracted for a texel at location (x,y) in the block using: - - code(x,y) = bits[3*(4*y+x)+1..3*(4*y+x)+0] - - where bit 47 is the most significant and bit 0 is the least - significant bit. - - The alpha component for a texel at location (x,y) in the block is - given by: - - alpha0, code(x,y) == 0 - alpha1, code(x,y) == 1 - - (6*alpha0 + 1*alpha1)/7, alpha0 > alpha1 and code(x,y) == 2 - (5*alpha0 + 2*alpha1)/7, alpha0 > alpha1 and code(x,y) == 3 - (4*alpha0 + 3*alpha1)/7, alpha0 > alpha1 and code(x,y) == 4 - (3*alpha0 + 4*alpha1)/7, alpha0 > alpha1 and code(x,y) == 5 - (2*alpha0 + 5*alpha1)/7, alpha0 > alpha1 and code(x,y) == 6 - (1*alpha0 + 6*alpha1)/7, alpha0 > alpha1 and code(x,y) == 7 - - (4*alpha0 + 1*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 2 - (3*alpha0 + 2*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 3 - (2*alpha0 + 3*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 4 - (1*alpha0 + 4*alpha1)/5, alpha0 <= alpha1 and code(x,y) == 5 - 0.0, alpha0 <= alpha1 and code(x,y) == 6 - 1.0, alpha0 <= alpha1 and code(x,y) == 7 - - -Revision History - - 1.1, 11/16/01 pbrown: Updated contact info, clarified where texels - fall within a single block. - - 1.0, 07/07/00 prbrown1: Published final version agreed to by working - group members. - - 0.9, 06/24/00 prbrown1: Documented that block-aligned TexSubImage calls - do not modify existing texels outside the - modified blocks. Added caveat to allow for a - (0,0)-anchored TexSubImage operation of - arbitrary size. - - 0.7, 04/11/00 prbrown1: Added issues on DXT1, DXT3, and DXT5 encodings - where the MSDN documentation doesn't match what - is really done. Added enum values from the - extension registry. - - 0.4, 03/28/00 prbrown1: Updated to reflect final version of the - ARB_texture_compression extension. Allowed - block-aligned TexSubImage calls. - - 0.3, 03/07/00 prbrown1: Resolved issues pertaining to the format of RGB - blocks in the DXT3 and DXT5 formats (they don't - ever use the "transparent" encoding). Fixed - decoding of DXT1 blocks. Pointed out issue of - "transparent" texels in DXT1 encodings having - different behaviors for RGB and RGBA internal - formats. - - 0.2, 02/23/00 prbrown1: Minor revisions; added several issues. - - 0.11, 02/17/00 prbrown1: Slight modification to error semantics - (INVALID_ENUM instead of INVALID_OPERATION). - - 0.1, 02/15/00 prbrown1: Initial revision. Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squish.sln =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squish.sln +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squish.sln @@ -1,39 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 8.00 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcproj", "{6A8518C3-D81A-4428-BD7F-C37933088AC1}" - ProjectSection(ProjectDependencies) = postProject - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishpng", "squishpng\squishpng.vcproj", "{3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}" - ProjectSection(ProjectDependencies) = postProject - {6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squishtest", "squishtest\squishtest.vcproj", "{77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}" - ProjectSection(ProjectDependencies) = postProject - {6A8518C3-D81A-4428-BD7F-C37933088AC1} = {6A8518C3-D81A-4428-BD7F-C37933088AC1} - EndProjectSection -EndProject -Global - GlobalSection(SolutionConfiguration) = preSolution - Debug = Debug - Release = Release - EndGlobalSection - GlobalSection(ProjectConfiguration) = postSolution - {6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.ActiveCfg = Debug|Win32 - {6A8518C3-D81A-4428-BD7F-C37933088AC1}.Debug.Build.0 = Debug|Win32 - {6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.ActiveCfg = Release|Win32 - {6A8518C3-D81A-4428-BD7F-C37933088AC1}.Release.Build.0 = Release|Win32 - {3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.ActiveCfg = Debug|Win32 - {3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Debug.Build.0 = Debug|Win32 - {3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.ActiveCfg = Release|Win32 - {3BC7CF47-F1C8-4BDA-BE30-92F17B21D2C7}.Release.Build.0 = Release|Win32 - {77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.ActiveCfg = Debug|Win32 - {77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Debug.Build.0 = Debug|Win32 - {77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.ActiveCfg = Release|Win32 - {77A3F26C-A1D6-4535-9E37-7D3DF34E4B4B}.Release.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - EndGlobalSection - GlobalSection(ExtensibilityAddIns) = postSolution - EndGlobalSection -EndGlobal Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squish/squish.vcproj =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squish/squish.vcproj +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squish/squish.vcproj @@ -1,198 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squishpng/squishpng.vcproj =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squishpng/squishpng.vcproj +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squishpng/squishpng.vcproj @@ -1,140 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squishtest/squishtest.vcproj =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squishtest/squishtest.vcproj +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/vs7/squishtest/squishtest.vcproj @@ -1,138 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/weightedclusterfit.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/weightedclusterfit.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/weightedclusterfit.h @@ -23,16 +23,16 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------- */ - -#ifndef SQUISH_WEIGHTEDCLUSTERFIT_H -#define SQUISH_WEIGHTEDCLUSTERFIT_H + +#ifndef NV_SQUISH_WEIGHTEDCLUSTERFIT_H +#define NV_SQUISH_WEIGHTEDCLUSTERFIT_H #include "squish.h" #include "maths.h" #include "simd.h" #include "colourfit.h" -namespace squish { +namespace nvsquish { class WeightedClusterFit : public ColourFit { Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/weightedclusterfit.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/weightedclusterfit.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/squish/weightedclusterfit.cpp @@ -1,28 +1,28 @@ /* ----------------------------------------------------------------------------- -Copyright (c) 2006 Simon Brown si@sjbrown.co.uk -Copyright (c) 2006 Ignacio Castano icastano@nvidia.com + Copyright (c) 2006 Simon Brown si@sjbrown.co.uk + Copyright (c) 2006 Ignacio Castano icastano@nvidia.com -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------- */ + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------- */ #include "weightedclusterfit.h" #include "colourset.h" @@ -30,158 +30,277 @@ #include -namespace squish { +namespace nvsquish { - WeightedClusterFit::WeightedClusterFit() - { - } +WeightedClusterFit::WeightedClusterFit() +{ +} - void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags ) - { - ColourFit::SetColourSet( colours, flags ); +void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags ) +{ + ColourFit::SetColourSet( colours, flags ); - // initialise the best error + // initialise the best error #if SQUISH_USE_SIMD - m_besterror = VEC4_CONST( FLT_MAX ); - Vec3 metric = m_metric.GetVec3(); + m_besterror = VEC4_CONST( FLT_MAX ); + Vec3 metric = m_metric.GetVec3(); #else - m_besterror = FLT_MAX; - Vec3 metric = m_metric; + m_besterror = FLT_MAX; + Vec3 metric = m_metric; #endif - // cache some values - int const count = m_colours->GetCount(); - Vec3 const* values = m_colours->GetPoints(); - - // get the covariance matrix - Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights(), metric ); - - // compute the principle component - Vec3 principle = ComputePrincipleComponent( covariance ); - - // build the list of values - float dps[16]; - for( int i = 0; i < count; ++i ) - { - dps[i] = Dot( values[i], principle ); - m_order[i] = i; - } - - // stable sort - for( int i = 0; i < count; ++i ) + // cache some values + int const count = m_colours->GetCount(); + Vec3 const* values = m_colours->GetPoints(); + + // get the covariance matrix + Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights(), metric ); + + // compute the principle component + Vec3 principle = ComputePrincipleComponent( covariance ); + + // build the list of values + float dps[16]; + for( int i = 0; i < count; ++i ) + { + dps[i] = Dot( values[i], principle ); + m_order[i] = i; + } + + // stable sort + for( int i = 0; i < count; ++i ) + { + for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j ) { - for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j ) - { - std::swap( dps[j], dps[j - 1] ); - std::swap( m_order[j], m_order[j - 1] ); - } + std::swap( dps[j], dps[j - 1] ); + std::swap( m_order[j], m_order[j - 1] ); } - - // weight all the points + } + + // weight all the points #if SQUISH_USE_SIMD - Vec4 const* unweighted = m_colours->GetPointsSimd(); - Vec4 const* weights = m_colours->GetWeightsSimd(); - m_xxsum = VEC4_CONST( 0.0f ); - m_xsum = VEC4_CONST( 0.0f ); + Vec4 const* unweighted = m_colours->GetPointsSimd(); + Vec4 const* weights = m_colours->GetWeightsSimd(); + m_xxsum = VEC4_CONST( 0.0f ); + m_xsum = VEC4_CONST( 0.0f ); #else - Vec3 const* unweighted = m_colours->GetPoints(); - float const* weights = m_colours->GetWeights(); - m_xxsum = Vec3( 0.0f ); - m_xsum = Vec3( 0.0f ); - m_wsum = 0.0f; + Vec3 const* unweighted = m_colours->GetPoints(); + float const* weights = m_colours->GetWeights(); + m_xxsum = Vec3( 0.0f ); + m_xsum = Vec3( 0.0f ); + m_wsum = 0.0f; #endif - - for( int i = 0; i < count; ++i ) - { - int p = m_order[i]; - m_weighted[i] = weights[p] * unweighted[p]; - m_xxsum += m_weighted[i] * m_weighted[i]; - m_xsum += m_weighted[i]; + + for( int i = 0; i < count; ++i ) + { + int p = m_order[i]; + m_weighted[i] = weights[p] * unweighted[p]; + m_xxsum += m_weighted[i] * m_weighted[i]; + m_xsum += m_weighted[i]; #if !SQUISH_USE_SIMD - m_weights[i] = weights[p]; - m_wsum += m_weights[i]; + m_weights[i] = weights[p]; + m_wsum += m_weights[i]; #endif - } } +} - void WeightedClusterFit::SetMetric(float r, float g, float b) - { +void WeightedClusterFit::SetMetric(float r, float g, float b) +{ #if SQUISH_USE_SIMD - m_metric = Vec4(r, g, b, 0); + m_metric = Vec4(r, g, b, 0); #else - m_metric = Vec3(r, g, b); + m_metric = Vec3(r, g, b); #endif - m_metricSqr = m_metric * m_metric; - } + m_metricSqr = m_metric * m_metric; +} - float WeightedClusterFit::GetBestError() const - { +float WeightedClusterFit::GetBestError() const +{ #if SQUISH_USE_SIMD - Vec4 x = m_xxsum * m_metricSqr; - Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ(); - return error.GetVec3().X(); + Vec4 x = m_xxsum * m_metricSqr; + Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ(); + return error.GetX(); #else - return m_besterror + Dot(m_xxsum, m_metricSqr); + return m_besterror + Dot(m_xxsum, m_metricSqr); #endif - } +} #if SQUISH_USE_SIMD - void WeightedClusterFit::Compress3( void* block ) +void WeightedClusterFit::Compress3( void* block ) +{ + int const count = m_colours->GetCount(); + Vec4 const one = VEC4_CONST(1.0f); + Vec4 const zero = VEC4_CONST(0.0f); + Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f); + Vec4 const two = VEC4_CONST(2.0); + Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); + Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); + + // declare variables + Vec4 beststart = VEC4_CONST( 0.0f ); + Vec4 bestend = VEC4_CONST( 0.0f ); + Vec4 besterror = VEC4_CONST( FLT_MAX ); + + Vec4 x0 = zero; + + int b0 = 0, b1 = 0; + + // check all possible clusters for this total order + for( int c0 = 0; c0 <= count; c0++) + { + Vec4 x1 = zero; + + for( int c1 = 0; c1 <= count-c0; c1++) + { + Vec4 const x2 = m_xsum - x1 - x0; + + //Vec3 const alphax_sum = x0 + x1 * 0.5f; + //float const alpha2_sum = w0 + w1 * 0.25f; + Vec4 const alphax_sum = MultiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum + Vec4 const alpha2_sum = alphax_sum.SplatW(); + + //Vec3 const betax_sum = x2 + x1 * 0.5f; + //float const beta2_sum = w2 + w1 * 0.25f; + Vec4 const betax_sum = MultiplyAdd(x1, half, x2); // betax_sum, beta2_sum + Vec4 const beta2_sum = betax_sum.SplatW(); + + //float const alphabeta_sum = w1 * 0.25f; + Vec4 const alphabeta_sum = (x1 * half).SplatW(); // alphabeta_sum + + // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); + + Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; + Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; + + // clamp to the grid + a = Min( one, Max( zero, a ) ); + b = Min( one, Max( zero, b ) ); + a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; + b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; + + // compute the error (we skip the constant xxsum) + Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); + Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); + Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); + Vec4 e4 = MultiplyAdd( two, e3, e1 ); + + // apply the metric to the error term + Vec4 e5 = e4 * m_metricSqr; + Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); + + // keep the solution if it wins + if( CompareAnyLessThan( error, besterror ) ) + { + besterror = error; + beststart = a; + bestend = b; + b0 = c0; + b1 = c1; + } + + x1 += m_weighted[c0+c1]; + } + + x0 += m_weighted[c0]; + } + + // save the block if necessary + if( CompareAnyLessThan( besterror, m_besterror ) ) { - int const count = m_colours->GetCount(); - Vec4 const one = VEC4_CONST(1.0f); - Vec4 const zero = VEC4_CONST(0.0f); - Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f); - Vec4 const two = VEC4_CONST(2.0); - Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); - - // declare variables - Vec4 beststart = VEC4_CONST( 0.0f ); - Vec4 bestend = VEC4_CONST( 0.0f ); - Vec4 besterror = VEC4_CONST( FLT_MAX ); + // compute indices from cluster sizes. + u8 bestindices[16]; + { + int i = 0; + for(; i < b0; i++) { + bestindices[i] = 0; + } + for(; i < b0+b1; i++) { + bestindices[i] = 2; + } + for(; i < count; i++) { + bestindices[i] = 1; + } + } + + // remap the indices + u8 ordered[16]; + for( int i = 0; i < count; ++i ) + ordered[m_order[i]] = bestindices[i]; + + m_colours->RemapIndices( ordered, bestindices ); - Vec4 x0 = zero; - int b0 = 0, b1 = 0; + // save the block + WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); + + // save the error + m_besterror = besterror; + } +} - // check all possible clusters for this total order - for( int c0 = 0; c0 <= count; c0++) +void WeightedClusterFit::Compress4( void* block ) +{ + int const count = m_colours->GetCount(); + Vec4 const one = VEC4_CONST(1.0f); + Vec4 const zero = VEC4_CONST(0.0f); + Vec4 const half = VEC4_CONST(0.5f); + Vec4 const two = VEC4_CONST(2.0); + Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); + Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); + Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f ); + Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); + Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); + + // declare variables + Vec4 beststart = VEC4_CONST( 0.0f ); + Vec4 bestend = VEC4_CONST( 0.0f ); + Vec4 besterror = VEC4_CONST( FLT_MAX ); + + Vec4 x0 = zero; + int b0 = 0, b1 = 0, b2 = 0; + + // check all possible clusters for this total order + for( int c0 = 0; c0 <= count; c0++) + { + Vec4 x1 = zero; + + for( int c1 = 0; c1 <= count-c0; c1++) { - Vec4 x1 = zero; - - for( int c1 = 0; c1 <= count-c0; c1++) + Vec4 x2 = zero; + + for( int c2 = 0; c2 <= count-c0-c1; c2++) { - Vec4 const x2 = m_xsum - x1 - x0; - - //Vec3 const alphax_sum = x0 + x1 * 0.5f; - //float const alpha2_sum = w0 + w1 * 0.25f; - Vec4 const alphax_sum = MultiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum + Vec4 const x3 = m_xsum - x2 - x1 - x0; + + //Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); + //float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); + Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum Vec4 const alpha2_sum = alphax_sum.SplatW(); - - //Vec3 const betax_sum = x2 + x1 * 0.5f; - //float const beta2_sum = w2 + w1 * 0.25f; - Vec4 const betax_sum = MultiplyAdd(x1, half, x2); // betax_sum, beta2_sum + + //Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f); + //float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); + Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum Vec4 const beta2_sum = betax_sum.SplatW(); - - //float const alphabeta_sum = w1 * 0.25f; - Vec4 const alphabeta_sum = (x1 * half).SplatW(); // alphabeta_sum - + + //float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); + Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum + // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); - + Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; - + // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - + // compute the error (we skip the constant xxsum) Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); @@ -191,7 +310,7 @@ // apply the metric to the error term Vec4 e5 = e4 * m_metricSqr; Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); - + // keep the solution if it wins if( CompareAnyLessThan( error, besterror ) ) { @@ -200,228 +319,216 @@ bestend = b; b0 = c0; b1 = c1; + b2 = c2; } - - x1 += m_weighted[c0+c1]; + + x2 += m_weighted[c0+c1+c2]; } - - x0 += m_weighted[c0]; + + x1 += m_weighted[c0+c1]; } + + x0 += m_weighted[c0]; + } - // save the block if necessary - if( CompareAnyLessThan( besterror, m_besterror ) ) + // save the block if necessary + if( CompareAnyLessThan( besterror, m_besterror ) ) + { + // compute indices from cluster sizes. + u8 bestindices[16]; { - // compute indices from cluster sizes. - u8 bestindices[16]; - { - int i = 0; - for(; i < b0; i++) { - bestindices[i] = 0; - } - for(; i < b0+b1; i++) { - bestindices[i] = 2; - } - for(; i < count; i++) { - bestindices[i] = 1; - } + int i = 0; + for(; i < b0; i++) { + bestindices[i] = 0; + } + for(; i < b0+b1; i++) { + bestindices[i] = 2; + } + for(; i < b0+b1+b2; i++) { + bestindices[i] = 3; + } + for(; i < count; i++) { + bestindices[i] = 1; } - - // remap the indices - u8 ordered[16]; - for( int i = 0; i < count; ++i ) - ordered[m_order[i]] = bestindices[i]; - - m_colours->RemapIndices( ordered, bestindices ); - - - // save the block - WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); - - // save the error - m_besterror = besterror; } + + // remap the indices + u8 ordered[16]; + for( int i = 0; i < count; ++i ) + ordered[m_order[i]] = bestindices[i]; + + m_colours->RemapIndices( ordered, bestindices ); + + // save the block + WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); + + // save the error + m_besterror = besterror; } +} - void WeightedClusterFit::Compress4( void* block ) - { - int const count = m_colours->GetCount(); - Vec4 const one = VEC4_CONST(1.0f); - Vec4 const zero = VEC4_CONST(0.0f); - Vec4 const half = VEC4_CONST(0.5f); - Vec4 const two = VEC4_CONST(2.0); - Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); - Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); - Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f ); - Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); - Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); - - // declare variables - Vec4 beststart = VEC4_CONST( 0.0f ); - Vec4 bestend = VEC4_CONST( 0.0f ); - Vec4 besterror = VEC4_CONST( FLT_MAX ); - - Vec4 x0 = zero; - int b0 = 0, b1 = 0, b2 = 0; +#else - // check all possible clusters for this total order - for( int c0 = 0; c0 <= count; c0++) +void WeightedClusterFit::Compress3( void* block ) +{ + int const count = m_colours->GetCount(); + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); + Vec3 const half( 0.5f ); + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + + // declare variables + Vec3 beststart( 0.0f ); + Vec3 bestend( 0.0f ); + float besterror = FLT_MAX; + + Vec3 x0(0.0f); + float w0 = 0.0f; + + int b0 = 0, b1 = 0; + + // check all possible clusters for this total order + for( int c0 = 0; c0 <= count; c0++) + { + Vec3 x1(0.0f); + float w1 = 0.0f; + + for( int c1 = 0; c1 <= count-c0; c1++) { - Vec4 x1 = zero; - - for( int c1 = 0; c1 <= count-c0; c1++) - { - Vec4 x2 = zero; - - for( int c2 = 0; c2 <= count-c0-c1; c2++) - { - Vec4 const x3 = m_xsum - x2 - x1 - x0; - - //Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); - //float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); - Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum - Vec4 const alpha2_sum = alphax_sum.SplatW(); - - //Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f); - //float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); - Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum - Vec4 const beta2_sum = betax_sum.SplatW(); - - //float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); - Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum - - // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); - - Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; - Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; - - // clamp to the grid - a = Min( one, Max( zero, a ) ); - b = Min( one, Max( zero, b ) ); - a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; - b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; - - // compute the error (we skip the constant xxsum) - Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); - Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); - Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); - Vec4 e4 = MultiplyAdd( two, e3, e1 ); - - // apply the metric to the error term - Vec4 e5 = e4 * m_metricSqr; - Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); - - // keep the solution if it wins - if( CompareAnyLessThan( error, besterror ) ) - { - besterror = error; - beststart = a; - bestend = b; - b0 = c0; - b1 = c1; - b2 = c2; - } - - x2 += m_weighted[c0+c1+c2]; - } - - x1 += m_weighted[c0+c1]; + float w2 = m_wsum - w0 - w1; + + // These factors could be entirely precomputed. + float const alpha2_sum = w0 + w1 * 0.25f; + float const beta2_sum = w2 + w1 * 0.25f; + float const alphabeta_sum = w1 * 0.25f; + float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); + + Vec3 const alphax_sum = x0 + x1 * 0.5f; + Vec3 const betax_sum = m_xsum - alphax_sum; + + Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; + Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; + + // clamp to the grid + a = Min( one, Max( zero, a ) ); + b = Min( one, Max( zero, b ) ); + a = Floor( grid*a + half )*gridrcp; + b = Floor( grid*b + half )*gridrcp; + + // compute the error + Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); + + // apply the metric to the error term + float error = Dot( e1, m_metricSqr ); + + // keep the solution if it wins + if( error < besterror ) + { + besterror = error; + beststart = a; + bestend = b; + b0 = c0; + b1 = c1; } + + x1 += m_weighted[c0+c1]; + w1 += m_weights[c0+c1]; + } + + x0 += m_weighted[c0]; + w0 += m_weights[c0]; + } - x0 += m_weighted[c0]; - } - - // save the block if necessary - if( CompareAnyLessThan( besterror, m_besterror ) ) + // save the block if necessary + if( besterror < m_besterror ) + { + // compute indices from cluster sizes. + u8 bestindices[16]; { - // compute indices from cluster sizes. - u8 bestindices[16]; - { - int i = 0; - for(; i < b0; i++) { - bestindices[i] = 0; - } - for(; i < b0+b1; i++) { - bestindices[i] = 2; - } - for(; i < b0+b1+b2; i++) { - bestindices[i] = 3; - } - for(; i < count; i++) { - bestindices[i] = 1; - } + int i = 0; + for(; i < b0; i++) { + bestindices[i] = 0; + } + for(; i < b0+b1; i++) { + bestindices[i] = 2; + } + for(; i < count; i++) { + bestindices[i] = 1; } - - // remap the indices - u8 ordered[16]; - for( int i = 0; i < count; ++i ) - ordered[m_order[i]] = bestindices[i]; - - m_colours->RemapIndices( ordered, bestindices ); - - // save the block - WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); - - // save the error - m_besterror = besterror; } + + // remap the indices + u8 ordered[16]; + for( int i = 0; i < count; ++i ) + ordered[m_order[i]] = bestindices[i]; + + m_colours->RemapIndices( ordered, bestindices ); + + // save the block + WriteColourBlock3( beststart, bestend, bestindices, block ); + + // save the error + m_besterror = besterror; } +} -#else - - void WeightedClusterFit::Compress3( void* block ) - { - int const count = m_colours->GetCount(); - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); - Vec3 const half( 0.5f ); - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); - - // declare variables - Vec3 beststart( 0.0f ); - Vec3 bestend( 0.0f ); - float besterror = FLT_MAX; - - Vec3 x0(0.0f); - float w0 = 0.0f; - - int b0 = 0, b1 = 0; - - // check all possible clusters for this total order - for( int c0 = 0; c0 <= count; c0++) +void WeightedClusterFit::Compress4( void* block ) +{ + int const count = m_colours->GetCount(); + Vec3 const one( 1.0f ); + Vec3 const zero( 0.0f ); + Vec3 const half( 0.5f ); + Vec3 const grid( 31.0f, 63.0f, 31.0f ); + Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); + + // declare variables + Vec3 beststart( 0.0f ); + Vec3 bestend( 0.0f ); + float besterror = FLT_MAX; + + Vec3 x0(0.0f); + float w0 = 0.0f; + int b0 = 0, b1 = 0, b2 = 0; + + // check all possible clusters for this total order + for( int c0 = 0; c0 <= count; c0++) + { + Vec3 x1(0.0f); + float w1 = 0.0f; + + for( int c1 = 0; c1 <= count-c0; c1++) { - Vec3 x1(0.0f); - float w1 = 0.0f; - - for( int c1 = 0; c1 <= count-c0; c1++) - { - float w2 = m_wsum - w0 - w1; - - // These factors could be entirely precomputed. - float const alpha2_sum = w0 + w1 * 0.25f; - float const beta2_sum = w2 + w1 * 0.25f; - float const alphabeta_sum = w1 * 0.25f; + Vec3 x2(0.0f); + float w2 = 0.0f; + + for( int c2 = 0; c2 <= count-c0-c1; c2++) + { + float w3 = m_wsum - w0 - w1 - w2; + + float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); + float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); + float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - Vec3 const alphax_sum = x0 + x1 * 0.5f; + + Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); Vec3 const betax_sum = m_xsum - alphax_sum; - - Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; - Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; - + + Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; + Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; + // clamp to the grid a = Min( one, Max( zero, a ) ); b = Min( one, Max( zero, b ) ); a = Floor( grid*a + half )*gridrcp; b = Floor( grid*b + half )*gridrcp; - + // compute the error Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); - + // apply the metric to the error term float error = Dot( e1, m_metricSqr ); - + // keep the solution if it wins if( error < besterror ) { @@ -430,163 +537,56 @@ bestend = b; b0 = c0; b1 = c1; + b2 = c2; } - - x1 += m_weighted[c0+c1]; - w1 += m_weights[c0+c1]; - } - - x0 += m_weighted[c0]; - w0 += m_weights[c0]; - } - - // save the block if necessary - if( besterror < m_besterror ) - { - // compute indices from cluster sizes. - u8 bestindices[16]; - { - int i = 0; - for(; i < b0; i++) { - bestindices[i] = 0; - } - for(; i < b0+b1; i++) { - bestindices[i] = 2; - } - for(; i < count; i++) { - bestindices[i] = 1; - } + + x2 += m_weighted[c0+c1+c2]; + w2 += m_weights[c0+c1+c2]; } - - // remap the indices - u8 ordered[16]; - for( int i = 0; i < count; ++i ) - ordered[m_order[i]] = bestindices[i]; - - m_colours->RemapIndices( ordered, bestindices ); - - // save the block - WriteColourBlock3( beststart, bestend, bestindices, block ); - - // save the error - m_besterror = besterror; - } + + x1 += m_weighted[c0+c1]; + w1 += m_weights[c0+c1]; + } + + x0 += m_weighted[c0]; + w0 += m_weights[c0]; } - void WeightedClusterFit::Compress4( void* block ) + // save the block if necessary + if( besterror < m_besterror ) { - int const count = m_colours->GetCount(); - Vec3 const one( 1.0f ); - Vec3 const zero( 0.0f ); - Vec3 const half( 0.5f ); - Vec3 const grid( 31.0f, 63.0f, 31.0f ); - Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); - - // declare variables - Vec3 beststart( 0.0f ); - Vec3 bestend( 0.0f ); - float besterror = FLT_MAX; - - Vec3 x0(0.0f); - float w0 = 0.0f; - int b0 = 0, b1 = 0, b2 = 0; - - // check all possible clusters for this total order - for( int c0 = 0; c0 <= count; c0++) - { - Vec3 x1(0.0f); - float w1 = 0.0f; - - for( int c1 = 0; c1 <= count-c0; c1++) - { - Vec3 x2(0.0f); - float w2 = 0.0f; - - for( int c2 = 0; c2 <= count-c0-c1; c2++) - { - float w3 = m_wsum - w0 - w1 - w2; - - float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); - float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); - float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); - float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); - - Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); - Vec3 const betax_sum = m_xsum - alphax_sum; - - Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; - Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; - - // clamp to the grid - a = Min( one, Max( zero, a ) ); - b = Min( one, Max( zero, b ) ); - a = Floor( grid*a + half )*gridrcp; - b = Floor( grid*b + half )*gridrcp; - - // compute the error - Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); - - // apply the metric to the error term - float error = Dot( e1, m_metricSqr ); - - // keep the solution if it wins - if( error < besterror ) - { - besterror = error; - beststart = a; - bestend = b; - b0 = c0; - b1 = c1; - b2 = c2; - } - - x2 += m_weighted[c0+c1+c2]; - w2 += m_weights[c0+c1+c2]; - } - - x1 += m_weighted[c0+c1]; - w1 += m_weights[c0+c1]; - } - - x0 += m_weighted[c0]; - w0 += m_weights[c0]; - } - - // save the block if necessary - if( besterror < m_besterror ) + // compute indices from cluster sizes. + u8 bestindices[16]; { - // compute indices from cluster sizes. - u8 bestindices[16]; - { - int i = 0; - for(; i < b0; i++) { - bestindices[i] = 0; - } - for(; i < b0+b1; i++) { - bestindices[i] = 2; - } - for(; i < b0+b1+b2; i++) { - bestindices[i] = 3; - } - for(; i < count; i++) { - bestindices[i] = 1; - } + int i = 0; + for(; i < b0; i++) { + bestindices[i] = 0; } + for(; i < b0+b1; i++) { + bestindices[i] = 2; + } + for(; i < b0+b1+b2; i++) { + bestindices[i] = 3; + } + for(; i < count; i++) { + bestindices[i] = 1; + } + } + + // remap the indices + u8 ordered[16]; + for( int i = 0; i < count; ++i ) + ordered[m_order[i]] = bestindices[i]; - // remap the indices - u8 ordered[16]; - for( int i = 0; i < count; ++i ) - ordered[m_order[i]] = bestindices[i]; - - m_colours->RemapIndices( ordered, bestindices ); - - // save the block - WriteColourBlock4( beststart, bestend, bestindices, block ); + m_colours->RemapIndices( ordered, bestindices ); + + // save the block + WriteColourBlock4( beststart, bestend, bestindices, block ); - // save the error - m_besterror = besterror; - } + // save the error + m_besterror = besterror; } +} #endif Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tests/ctest.c =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tests/ctest.c +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tests/ctest.c @@ -1,35 +0,0 @@ - -#include - -#include - - -int main(void) -{ - NvttInputOptions inputOptions = 0; - NvttOutputOptions outputOptions = 0; - NvttCompressionOptions compressionOptions = 0; - - const unsigned int img[16*16]; - - memset(img, 0, sizeof(unsigned int) * 16 * 16); - - inputOptions = nvttCreateInputOptions(); - nvttSetInputOptionsTextureLayout(inputOptions, NVTT_TextureType_2D, 16, 16, 1); - nvttSetInputOptionsMipmapData(inputOptions, img, 16, 16, 1, 0, 0); - - outputOptions = nvttCreateOutputOptions(); - nvttSetOutputOptionsFileName(outputOptions, "output.dds"); - - compressionOptions = nvttCreateCompressionOptions(); - nvttSetCompressionOptionsFormat(compressionOptions, NVTT_Format_BC1); - - nvttCompress(inputOptions, outputOptions, compressionOptions); - - nvttDestroyCompressionOptions(compressionOptions); - nvttDestroyOutputOptions(outputOptions); - nvttDestroyInputOptions(inputOptions); - - return 0; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tests/filtertest.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tests/filtertest.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tests/filtertest.cpp @@ -1,80 +0,0 @@ - -#include -#include "../tools/cmdline.h" - -#include - -using namespace nv; - -int main(void) -{ -// MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - BoxFilter box1(0.5); - Kernel1 k1(box1, 2); - k1.debugPrint(); nvDebug("\n"); - - BoxFilter box2(1); - Kernel1 k2(box2, 2); - k2.debugPrint(); nvDebug("\n"); - - BoxFilter boxr3(1); - Kernel1 k3(boxr3, 2); - k3.debugPrint(); nvDebug("\n"); - - KaiserFilter kai4(5); - kai4.setParameters(4, 2); - Kernel1 k4(kai4, 2); - k4.debugPrint(); nvDebug("\n"); - -/* Kernel1 k3(3); - Kernel1 k4(9); - Kernel1 k5(10); - -// k3.initFilter(Filter::Box); -// k4.initFilter(Filter::Box); -// k5.initFilter(Filter::Box); - -// nvDebug("Box Filter:\n"); -// k3.debugPrint(); nvDebug("\n"); -// k4.debugPrint(); nvDebug("\n"); -// k5.debugPrint(); nvDebug("\n"); - - k3.initSinc(0.75); - k4.initSinc(0.75); - k5.initSinc(0.75); - - nvDebug("Sinc Filter:\n"); - k3.debugPrint(); nvDebug("\n"); - k4.debugPrint(); nvDebug("\n"); - k5.debugPrint(); nvDebug("\n"); - - k3.initKaiser(4, 1, 100); - k4.initKaiser(4, 1, 100); - k5.initKaiser(4, 1, 100); - - nvDebug("Kaiser Filter:\n"); - k3.debugPrint(); nvDebug("\n"); - k4.debugPrint(); nvDebug("\n"); - k5.debugPrint(); nvDebug("\n"); - - k3.initKaiser(4, 1, 10); - k4.initKaiser(4, 1, 10); - k5.initKaiser(4, 1, 10); - - nvDebug("Kaiser Filter 2:\n"); - k3.debugPrint(); nvDebug("\n"); - k4.debugPrint(); nvDebug("\n"); - k5.debugPrint(); nvDebug("\n"); -*/ - int l_start = 4; - int l_end = 2; - - BoxFilter filter; - PolyphaseKernel kp(kai4, l_start, l_end); - - kp.debugPrint(); - - return 0; -} Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/assemble.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/assemble.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/assemble.cpp @@ -1,189 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include - -#include - -#include -#include -#include - -#include "cmdline.h" - -// @@ Add decent error messages. -// @@ Add option to resize images. -// @@ Add support for reading DDS files with 2D images and possibly mipmaps. - -int main(int argc, char *argv[]) -{ - MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - bool assembleCubeMap = true; - bool assembleVolume = false; - bool assembleTextureArray = false; - - nv::Array files; - nv::Path output = "output.dds"; - - // Parse arguments. - for (int i = 1; i < argc; i++) - { - // Input options. - if (strcmp("-cube", argv[i]) == 0) - { - assembleCubeMap = true; - assembleVolume = false; - assembleTextureArray = false; - } - /*if (strcmp("-volume", argv[i]) == 0) - { - assembleCubeMap = false; - assembleVolume = true; - assembleTextureArray = false; - } - if (strcmp("-array", argv[i]) == 0) - { - assembleCubeMap = false; - assembleVolume = false; - assembleTextureArray = true; - }*/ - else if (strcmp("-o", argv[i]) == 0) - { - i++; - if (i < argc && argv[i][0] != '-') - { - output = argv[i]; - } - } - else if (argv[i][0] != '-') - { - files.append(argv[i]); - } - } - - if (files.count() == 0) - { - printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); - printf("usage: nvassemble [-cube|-volume|-array] 'file0' 'file1' ...\n\n"); - return 1; - } - - if (nv::strCaseCmp(output.extension(), ".dds") != 0) - { - //output.stripExtension(); - output.append(".dds"); - } - - if (assembleCubeMap && files.count() != 6) - { - printf("*** error, 6 files expected, but got %d\n", files.count()); - return 1; - } - - // Load all files. - nv::Array images; - - uint w = 0, h = 0; - bool hasAlpha = false; - - const uint imageCount = files.count(); - images.resize(imageCount); - - for (uint i = 0; i < imageCount; i++) - { - if (!images[i].load(files[i])) - { - printf("*** error loading file\n"); - return 1; - } - - if (i == 0) - { - w = images[i].width(); - h = images[i].height(); - } - else if (images[i].width() != w || images[i].height() != h) - { - printf("*** error, size of image '%s' does not match\n", files[i].str()); - return 1; - } - - if (images[i].format() == nv::Image::Format_ARGB) - { - hasAlpha = true; - } - } - - - nv::StdOutputStream stream(output); - if (stream.isError()) { - printf("Error opening '%s' for writting\n", output.str()); - return 1; - } - - // Output DDS header. - nv::DDSHeader header; - header.setWidth(w); - header.setHeight(h); - - if (assembleCubeMap) - { - header.setTextureCube(); - } - else if (assembleVolume) - { - header.setTexture3D(); - header.setDepth(imageCount); - } - else if (assembleTextureArray) - { - //header.setTextureArray(imageCount); - } - - // @@ It always outputs 32 bpp. - header.setPitch(4 * w); - header.setPixelFormat(32, 0xFF0000, 0xFF00, 0xFF, hasAlpha ? 0xFF000000 : 0); - - stream << header; - - // Output images. - for (uint i = 0; i < imageCount; i++) - { - const uint pixelCount = w * h; - for (uint p = 0; p < pixelCount; p++) - { - nv::Color32 c = images[i].pixel(p); - uint8 r = c.r; - uint8 g = c.g; - uint8 b = c.b; - uint8 a = c.a; - stream << b << g << r << a; - } - } - - return 0; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/benchmark.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/benchmark.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/benchmark.cpp @@ -1,374 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include - -#include -#include - -#include - -#include "cmdline.h" - -#include // clock - - -struct MyErrorHandler : public nvtt::ErrorHandler -{ - virtual void error(nvtt::Error e) - { - nvDebugBreak(); - } -}; - - -// Set color to normal map conversion options. -void setColorToNormalMap(nvtt::InputOptions & inputOptions) -{ - inputOptions.setNormalMap(false); - inputOptions.setConvertToNormalMap(true); - inputOptions.setHeightEvaluation(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 0.0f); - //inputOptions.setNormalFilter(1.0f, 0, 0, 0); - //inputOptions.setNormalFilter(0.0f, 0, 0, 1); - inputOptions.setGamma(1.0f, 1.0f); - inputOptions.setNormalizeMipmaps(true); -} - -// Set options for normal maps. -void setNormalMap(nvtt::InputOptions & inputOptions) -{ - inputOptions.setNormalMap(true); - inputOptions.setConvertToNormalMap(false); - inputOptions.setGamma(1.0f, 1.0f); - inputOptions.setNormalizeMipmaps(true); -} - -// Set options for color maps. -void setColorMap(nvtt::InputOptions & inputOptions) -{ - inputOptions.setNormalMap(false); - inputOptions.setConvertToNormalMap(false); - inputOptions.setGamma(2.2f, 2.2f); - inputOptions.setNormalizeMipmaps(false); -} - - - -int main(int argc, char *argv[]) -{ - MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - bool normal = false; - bool color2normal = false; - bool wrapRepeat = false; - bool noMipmaps = false; - bool fast = false; - bool nocuda = false; - bool silent = false; - nvtt::Format format = nvtt::Format_BC1; - - const char * externalCompressor = NULL; - - nv::Path input; - nv::Path output; - - - // Parse arguments. - for (int i = 1; i < argc; i++) - { - // Input options. - if (strcmp("-color", argv[i]) == 0) - { - } - else if (strcmp("-normal", argv[i]) == 0) - { - normal = true; - } - else if (strcmp("-tonormal", argv[i]) == 0) - { - color2normal = true; - } - else if (strcmp("-clamp", argv[i]) == 0) - { - } - else if (strcmp("-repeat", argv[i]) == 0) - { - wrapRepeat = true; - } - else if (strcmp("-nomips", argv[i]) == 0) - { - noMipmaps = true; - } - - // Compression options. - else if (strcmp("-fast", argv[i]) == 0) - { - fast = true; - } - else if (strcmp("-nocuda", argv[i]) == 0) - { - nocuda = true; - } - else if (strcmp("-rgb", argv[i]) == 0) - { - format = nvtt::Format_RGB; - } - else if (strcmp("-bc1", argv[i]) == 0) - { - format = nvtt::Format_BC1; - } - else if (strcmp("-bc1a", argv[i]) == 0) - { - format = nvtt::Format_BC1a; - } - else if (strcmp("-bc2", argv[i]) == 0) - { - format = nvtt::Format_BC2; - } - else if (strcmp("-bc3", argv[i]) == 0) - { - format = nvtt::Format_BC3; - } - else if (strcmp("-bc3n", argv[i]) == 0) - { - format = nvtt::Format_BC3n; - } - else if (strcmp("-bc4", argv[i]) == 0) - { - format = nvtt::Format_BC4; - } - else if (strcmp("-bc5", argv[i]) == 0) - { - format = nvtt::Format_BC5; - } - - // Undocumented option. Mainly used for testing. - else if (strcmp("-ext", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - externalCompressor = argv[i+1]; - i++; - } - } - - // Misc options - else if (strcmp("-silent", argv[i]) == 0) - { - silent = true; - } - - else if (argv[i][0] != '-') - { - input = argv[i]; - - if (i+1 < argc && argv[i+1][0] != '-') { - output = argv[i+1]; - } - else - { - output.copy(input.str()); - output.stripExtension(); - output.append(".dds"); - } - - break; - } - } - - printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); - - if (input.isNull()) - { - printf("usage: nvttbenchmark [options] infile [outfile]\n\n"); - - printf("Input options:\n"); - printf(" -color \tThe input image is a color map (default).\n"); - printf(" -normal \tThe input image is a normal map.\n"); - printf(" -tonormal\tConvert input to normal map.\n"); - printf(" -clamp \tClamp wrapping mode (default).\n"); - printf(" -repeat \tRepeat wrapping mode.\n"); - printf(" -nomips \tDisable mipmap generation.\n\n"); - - printf("Compression options:\n"); - printf(" -fast \tFast compression.\n"); - printf(" -nocuda \tDo not use cuda compressor.\n"); - printf(" -rgb \tRGBA format\n"); - printf(" -bc1 \tBC1 format (DXT1)\n"); - printf(" -bc1a \tBC1 format with binary alpha (DXT1a)\n"); - printf(" -bc2 \tBC2 format (DXT3)\n"); - printf(" -bc3 \tBC3 format (DXT5)\n"); - printf(" -bc3n \tBC3 normal map format (DXT5nm)\n"); - printf(" -bc4 \tBC4 format (ATI1)\n"); - printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n"); - - return 1; - } - - // @@ Make sure input file exists. - - // Set input options. - nvtt::InputOptions inputOptions; - - if (nv::strCaseCmp(input.extension(), ".dds") == 0) - { - // Load surface. - nv::DirectDrawSurface dds(input); - if (!dds.isValid()) - { - fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str()); - return 1; - } - - if (!dds.isSupported() || dds.isTexture3D()) - { - fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str()); - return 1; - } - - uint faceCount; - if (dds.isTexture2D()) - { - inputOptions.setTextureLayout(nvtt::TextureType_2D, dds.width(), dds.height()); - faceCount = 1; - } - else - { - nvDebugCheck(dds.isTextureCube()); - inputOptions.setTextureLayout(nvtt::TextureType_Cube, dds.width(), dds.height()); - faceCount = 6; - } - - uint mipmapCount = dds.mipmapCount(); - - nv::Image mipmap; - - for (uint f = 0; f < faceCount; f++) - { - for (uint m = 0; m <= mipmapCount; m++) - { - dds.mipmap(&mipmap, f, m); - - inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), 1, f, m); - } - } - } - else - { - // Regular image. - nv::Image image; - if (!image.load(input)) - { - fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); - return 1; - } - - inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height()); - inputOptions.setMipmapData(image.pixels(), image.width(), image.height()); - } - - if (fast) - { - inputOptions.setMipmapping(true, nvtt::MipmapFilter_Box); - } - else - { - inputOptions.setMipmapping(true, nvtt::MipmapFilter_Box); - //inputOptions.setMipmapping(true, nvtt::MipmapFilter_Kaiser); - } - - if (wrapRepeat) - { - inputOptions.setWrapMode(nvtt::WrapMode_Repeat); - } - else - { - inputOptions.setWrapMode(nvtt::WrapMode_Clamp); - } - - if (normal) - { - setNormalMap(inputOptions); - } - else if (color2normal) - { - setColorToNormalMap(inputOptions); - } - else - { - setColorMap(inputOptions); - } - - if (noMipmaps) - { - inputOptions.setMipmapping(false); - } - - - nvtt::CompressionOptions compressionOptions; - compressionOptions.setFormat(format); - if (fast) - { - compressionOptions.setQuality(nvtt::Quality_Fastest); - } - else - { - compressionOptions.setQuality(nvtt::Quality_Normal); - //compressionOptions.setQuality(nvtt::Quality_Production, 0.5f); - //compressionOptions.setQuality(nvtt::Quality_Highest); - } - compressionOptions.enableHardwareCompression(!nocuda); - compressionOptions.setColorWeights(1, 1, 1); - - if (externalCompressor != NULL) - { - compressionOptions.setExternalCompressor(externalCompressor); - } - - - MyErrorHandler errorHandler; - nvtt::OutputOptions outputOptions(NULL, &errorHandler); - -// printf("Press ENTER.\n"); -// fflush(stdout); -// getchar(); - - clock_t start = clock(); - - const int iterationCount = 20; - for (int i = 0; i < iterationCount; i++) - { - nvtt::compress(inputOptions, outputOptions, compressionOptions); - } - - clock_t end = clock(); - - float seconds = float(end-start) / CLOCKS_PER_SEC - printf("total time taken: %.3f seconds\n", seconds); - printf("time taken per texture: %.3f seconds\n", seconds / iterationCount); - printf("textures per second: %.3f T/s\n", iterationCount / seconds); - - return 0; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/cmdline.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/cmdline.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/cmdline.h @@ -1,68 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef CMDLINE_H -#define CMDLINE_H - -#include - -#include // stderr -#include // exit -#include // va_list - - -struct MyMessageHandler : public nv::MessageHandler { - MyMessageHandler() { - nv::debug::setMessageHandler( this ); - } - ~MyMessageHandler() { - nv::debug::resetMessageHandler(); - } - - virtual void log( const char * str, va_list arg ) { - va_list val; - va_copy(val, arg); - vfprintf(stderr, str, arg); - va_end(val); - } -}; - - -struct MyAssertHandler : public nv::AssertHandler { - MyAssertHandler() { - nv::debug::setAssertHandler( this ); - } - ~MyAssertHandler() { - nv::debug::resetAssertHandler(); - } - - // Handler method, note that func might be NULL! - virtual int assert( const char *exp, const char *file, int line, const char *func ) { - fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line); - nv::debug::dumpInfo(); - exit(1); - } -}; - - -#endif // CMDLINE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/compress.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/compress.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/compress.cpp @@ -1,468 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include - -#include -#include - -#include - -#include "cmdline.h" - -#include // clock - -//#define WINDOWS_LEAN_AND_MEAN -//#include // TIMER - - -struct MyOutputHandler : public nvtt::OutputHandler -{ - MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {} - virtual ~MyOutputHandler() { delete stream; } - - void setTotal(int64 t) - { - total = t + 128; - } - void setDisplayProgress(bool b) - { - verbose = b; - } - - virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) - { - // ignore. - } - - // Output data. - virtual bool writeData(const void * data, int size) - { - nvDebugCheck(stream != NULL); - stream->serialize(const_cast(data), size); - - progress += size; - int p = int((100 * progress) / total); - if (verbose && p != percentage) - { - nvCheck(p >= 0); - - percentage = p; - printf("\r%d%%", percentage); - fflush(stdout); - } - - return true; - } - - int64 total; - int64 progress; - int percentage; - bool verbose; - nv::StdOutputStream * stream; -}; - -struct MyErrorHandler : public nvtt::ErrorHandler -{ - virtual void error(nvtt::Error e) - { -#if _DEBUG - nvDebugBreak(); -#endif - printf("Error: '%s'\n", nvtt::errorString(e)); - } -}; - - - - -// Set color to normal map conversion options. -void setColorToNormalMap(nvtt::InputOptions & inputOptions) -{ - inputOptions.setNormalMap(false); - inputOptions.setConvertToNormalMap(true); - inputOptions.setHeightEvaluation(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 0.0f); - //inputOptions.setNormalFilter(1.0f, 0, 0, 0); - //inputOptions.setNormalFilter(0.0f, 0, 0, 1); - inputOptions.setGamma(1.0f, 1.0f); - inputOptions.setNormalizeMipmaps(true); -} - -// Set options for normal maps. -void setNormalMap(nvtt::InputOptions & inputOptions) -{ - inputOptions.setNormalMap(true); - inputOptions.setConvertToNormalMap(false); - inputOptions.setGamma(1.0f, 1.0f); - inputOptions.setNormalizeMipmaps(true); -} - -// Set options for color maps. -void setColorMap(nvtt::InputOptions & inputOptions) -{ - inputOptions.setNormalMap(false); - inputOptions.setConvertToNormalMap(false); - inputOptions.setGamma(2.2f, 2.2f); - inputOptions.setNormalizeMipmaps(false); -} - - - -int main(int argc, char *argv[]) -{ - MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - bool alpha = false; - bool normal = false; - bool color2normal = false; - bool wrapRepeat = false; - bool noMipmaps = false; - bool fast = false; - bool nocuda = false; - bool silent = false; - bool bc1n = false; - nvtt::Format format = nvtt::Format_BC1; - - const char * externalCompressor = NULL; - - nv::Path input; - nv::Path output; - - - // Parse arguments. - for (int i = 1; i < argc; i++) - { - // Input options. - if (strcmp("-color", argv[i]) == 0) - { - } - else if (strcmp("-alpha", argv[i]) == 0) - { - alpha = true; - } - else if (strcmp("-normal", argv[i]) == 0) - { - normal = true; - } - else if (strcmp("-tonormal", argv[i]) == 0) - { - color2normal = true; - } - else if (strcmp("-clamp", argv[i]) == 0) - { - } - else if (strcmp("-repeat", argv[i]) == 0) - { - wrapRepeat = true; - } - else if (strcmp("-nomips", argv[i]) == 0) - { - noMipmaps = true; - } - - // Compression options. - else if (strcmp("-fast", argv[i]) == 0) - { - fast = true; - } - else if (strcmp("-nocuda", argv[i]) == 0) - { - nocuda = true; - } - else if (strcmp("-rgb", argv[i]) == 0) - { - format = nvtt::Format_RGB; - } - else if (strcmp("-bc1", argv[i]) == 0) - { - format = nvtt::Format_BC1; - } - else if (strcmp("-bc1n", argv[i]) == 0) - { - format = nvtt::Format_BC1; - bc1n = true; - } - else if (strcmp("-bc1a", argv[i]) == 0) - { - format = nvtt::Format_BC1a; - } - else if (strcmp("-bc2", argv[i]) == 0) - { - format = nvtt::Format_BC2; - } - else if (strcmp("-bc3", argv[i]) == 0) - { - format = nvtt::Format_BC3; - } - else if (strcmp("-bc3n", argv[i]) == 0) - { - format = nvtt::Format_BC3n; - } - else if (strcmp("-bc4", argv[i]) == 0) - { - format = nvtt::Format_BC4; - } - else if (strcmp("-bc5", argv[i]) == 0) - { - format = nvtt::Format_BC5; - } - - // Undocumented option. Mainly used for testing. - else if (strcmp("-ext", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - externalCompressor = argv[i+1]; - i++; - } - } - - // Misc options - else if (strcmp("-silent", argv[i]) == 0) - { - silent = true; - } - - else if (argv[i][0] != '-') - { - input = argv[i]; - - if (i+1 < argc && argv[i+1][0] != '-') { - output = argv[i+1]; - } - else - { - output.copy(input.str()); - output.stripExtension(); - output.append(".dds"); - } - - break; - } - } - - const uint version = nvtt::version(); - const uint major = version / 100; - const uint minor = version % 100; - - - printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007\n\n", major, minor); - - if (input.isNull()) - { - printf("usage: nvcompress [options] infile [outfile]\n\n"); - - printf("Input options:\n"); - printf(" -color \tThe input image is a color map (default).\n"); - printf(" -alpha \tThe input image has an alpha channel used for transparency.\n"); - printf(" -normal \tThe input image is a normal map.\n"); - printf(" -tonormal\tConvert input to normal map.\n"); - printf(" -clamp \tClamp wrapping mode (default).\n"); - printf(" -repeat \tRepeat wrapping mode.\n"); - printf(" -nomips \tDisable mipmap generation.\n\n"); - - printf("Compression options:\n"); - printf(" -fast \tFast compression.\n"); - printf(" -nocuda \tDo not use cuda compressor.\n"); - printf(" -rgb \tRGBA format\n"); - printf(" -bc1 \tBC1 format (DXT1)\n"); - printf(" -bc1n \tBC1 normal map format (DXT1nm)\n"); - printf(" -bc1a \tBC1 format with binary alpha (DXT1a)\n"); - printf(" -bc2 \tBC2 format (DXT3)\n"); - printf(" -bc3 \tBC3 format (DXT5)\n"); - printf(" -bc3n \tBC3 normal map format (DXT5nm)\n"); - printf(" -bc4 \tBC4 format (ATI1)\n"); - printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n"); - - return EXIT_FAILURE; - } - - // @@ Make sure input file exists. - - // Set input options. - nvtt::InputOptions inputOptions; - - if (nv::strCaseCmp(input.extension(), ".dds") == 0) - { - // Load surface. - nv::DirectDrawSurface dds(input); - if (!dds.isValid()) - { - fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str()); - return EXIT_FAILURE; - } - - if (!dds.isSupported() || dds.isTexture3D()) - { - fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str()); - return EXIT_FAILURE; - } - - uint faceCount; - if (dds.isTexture2D()) - { - inputOptions.setTextureLayout(nvtt::TextureType_2D, dds.width(), dds.height()); - faceCount = 1; - } - else - { - nvDebugCheck(dds.isTextureCube()); - inputOptions.setTextureLayout(nvtt::TextureType_Cube, dds.width(), dds.height()); - faceCount = 6; - } - - uint mipmapCount = dds.mipmapCount(); - - nv::Image mipmap; - - for (uint f = 0; f < faceCount; f++) - { - for (uint m = 0; m < mipmapCount; m++) - { - dds.mipmap(&mipmap, f, m); - - inputOptions.setMipmapData(mipmap.pixels(), mipmap.width(), mipmap.height(), 1, f, m); - } - } - } - else - { - // Regular image. - nv::Image image; - if (!image.load(input)) - { - fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str()); - return EXIT_FAILURE; - } - - inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height()); - inputOptions.setMipmapData(image.pixels(), image.width(), image.height()); - } - - if (wrapRepeat) - { - inputOptions.setWrapMode(nvtt::WrapMode_Repeat); - } - else - { - inputOptions.setWrapMode(nvtt::WrapMode_Clamp); - } - - if (alpha) - { - inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency); - } - else - { - inputOptions.setAlphaMode(nvtt::AlphaMode_None); - } - - if (normal) - { - setNormalMap(inputOptions); - } - else if (color2normal) - { - setColorToNormalMap(inputOptions); - } - else - { - setColorMap(inputOptions); - } - - if (noMipmaps) - { - inputOptions.setMipmapGeneration(false); - } - - nvtt::CompressionOptions compressionOptions; - compressionOptions.setFormat(format); - if (fast) - { - compressionOptions.setQuality(nvtt::Quality_Fastest); - } - else - { - compressionOptions.setQuality(nvtt::Quality_Normal); - //compressionOptions.setQuality(nvtt::Quality_Production); - //compressionOptions.setQuality(nvtt::Quality_Highest); - } - - if (bc1n) - { - compressionOptions.setColorWeights(1, 1, 0); - } - - if (externalCompressor != NULL) - { - compressionOptions.setExternalCompressor(externalCompressor); - } - - - MyErrorHandler errorHandler; - MyOutputHandler outputHandler(output); - if (outputHandler.stream->isError()) - { - fprintf(stderr, "Error opening '%s' for writting\n", output.str()); - return EXIT_FAILURE; - } - - nvtt::Compressor compressor; - compressor.enableCudaAcceleration(!nocuda); - - printf("CUDA acceleration "); - if (compressor.isCudaAccelerationEnabled()) - { - printf("ENABLED\n\n"); - } - else - { - printf("DISABLED\n\n"); - } - - outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions)); - outputHandler.setDisplayProgress(!silent); - - nvtt::OutputOptions outputOptions; - //outputOptions.setFileName(output); - outputOptions.setOutputHandler(&outputHandler); - outputOptions.setErrorHandler(&errorHandler); - -// printf("Press ENTER.\n"); -// fflush(stdout); -// getchar(); - - clock_t start = clock(); - - if (!compressor.process(inputOptions, compressionOptions, outputOptions)) - { - return EXIT_FAILURE; - } - - clock_t end = clock(); - printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); - - return EXIT_SUCCESS; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/configdialog.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/configdialog.h +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/configdialog.h @@ -1,69 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef CONFIGDIALOG_H -#define CONFIGDIALOG_H - -#include - -#include "ui_configdialog.h" - -#include - - -class ConfigDialog : public QDialog -{ - Q_OBJECT -public: - ConfigDialog(QWidget *parent = 0); - ConfigDialog(const char * fileName, QWidget *parent = 0); - -protected slots: - - void openClicked(); - void generateMipmapsChanged(int state); - void mipmapFilterChanged(QString name); - - void colorWeightChanged(); - void uniformWeightToggled(bool checked); - void luminanceWeightToggled(bool checked); - - void normalMapModeChanged(bool checked); - - bool open(QString fileName); - -private: - - void init(); - -private: - Ui::ConfigDialog ui; - - nvtt::InputOptions inputOptions; - nvtt::CompressionOptions compressionOptions; - nvtt::OutputOptions outputOptions; - -}; - - -#endif // CONFIGDIALOG_H Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/configdialog.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/configdialog.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/configdialog.cpp @@ -1,170 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "configdialog.h" - -#include - -#include - - -ConfigDialog::ConfigDialog(QWidget *parent/*=0*/) : QDialog(parent) -{ - init(); -} - -ConfigDialog::ConfigDialog(const char * fileName, QWidget *parent/*=0*/) : QDialog(parent) -{ - init(); - - open(fileName); -} - -void ConfigDialog::init() -{ - ui.setupUi(this); - - connect(ui.openButton, SIGNAL(clicked()), this, SLOT(openClicked())); - connect(ui.generateMipmapsCheckBox, SIGNAL(stateChanged(int)), this, SLOT(generateMipmapsChanged(int))); - connect(ui.mipmapFilterComboBox, SIGNAL(activated(QString)), this, SLOT(mipmapFilterChanged(QString))); - //connect(ui.mipmapFilterSettings, SIGNAL(clicked()), this, SLOT(mipmapFilterSettingsShow())); - - connect(ui.redSpinBox, SIGNAL(valueChanged(double)), this, SLOT(colorWeightChanged())); - connect(ui.greenSpinBox, SIGNAL(valueChanged(double)), this, SLOT(colorWeightChanged())); - connect(ui.blueSpinBox, SIGNAL(valueChanged(double)), this, SLOT(colorWeightChanged())); - connect(ui.uniformButton, SIGNAL(toggled(bool)), this, SLOT(uniformWeightToggled(bool))); - connect(ui.luminanceButton, SIGNAL(toggled(bool)), this, SLOT(luminanceWeightToggled(bool))); - - //connect(ui.rgbMapRadioButton, SIGNAL(toggled(bool)), this, SLOT(colorModeChanged())); - connect(ui.normalMapRadioButton, SIGNAL(toggled(bool)), this, SLOT(normalMapModeChanged(bool))); -} - - -void ConfigDialog::openClicked() -{ - // @@ Open file dialog. - - QString fileName; - - open(fileName); -} - -void ConfigDialog::generateMipmapsChanged(int state) -{ - Q_UNUSED(state); - - bool generateMipmapEnabled = ui.generateMipmapsCheckBox->isChecked(); - - ui.mipmapFilterLabel->setEnabled(generateMipmapEnabled); - ui.mipmapFilterComboBox->setEnabled(generateMipmapEnabled); - ui.limitMipmapsCheckBox->setEnabled(generateMipmapEnabled); - - bool enableFilterSettings = (ui.mipmapFilterComboBox->currentText() == "Kaiser"); - ui.mipmapFilterSettings->setEnabled(generateMipmapEnabled && enableFilterSettings); - - bool enableMaxLevel = ui.limitMipmapsCheckBox->isChecked(); - ui.maxLevelLabel->setEnabled(generateMipmapEnabled && enableMaxLevel); - ui.maxLevelSpinBox->setEnabled(generateMipmapEnabled && enableMaxLevel); -} - -void ConfigDialog::mipmapFilterChanged(QString name) -{ - bool enableFilterSettings = (name == "Kaiser"); - ui.mipmapFilterSettings->setEnabled(enableFilterSettings); -} - - -void ConfigDialog::colorWeightChanged() -{ - double r = ui.redSpinBox->value(); - double g = ui.greenSpinBox->value(); - double b = ui.blueSpinBox->value(); - - bool uniform = (r == 1.0 && g == 1.0 && b == 1.0); - bool luminance = (r == 0.3 && g == 0.59 && b == 0.11); - - ui.uniformButton->setChecked(uniform); - ui.luminanceButton->setChecked(luminance); -} - -void ConfigDialog::uniformWeightToggled(bool checked) -{ - if (checked) - { - ui.redSpinBox->setValue(1.0); - ui.greenSpinBox->setValue(1.0); - ui.blueSpinBox->setValue(1.0); - } -} - -void ConfigDialog::luminanceWeightToggled(bool checked) -{ - if (checked) - { - ui.redSpinBox->setValue(0.3); - ui.greenSpinBox->setValue(0.59); - ui.blueSpinBox->setValue(0.11); - } -} - -void ConfigDialog::normalMapModeChanged(bool checked) -{ - ui.alphaModeGroupBox->setEnabled(!checked); - ui.inputGammaSpinBox->setEnabled(!checked); - ui.inputGammaLabel->setEnabled(!checked); - ui.outputGammaSpinBox->setEnabled(!checked); - ui.outputGammaLabel->setEnabled(!checked); -} - - -bool ConfigDialog::open(QString fileName) -{ - // @@ Load image. - QImage image; - - // @@ If success. - { - ui.imagePathLineEdit->setText(fileName); - - // @@ Set image in graphics view. - - // @@ Set image description. - - // @@ Provide image to nvtt. - - int w = image.width(); - int h = image.height(); - void * data = NULL; - - inputOptions.setTextureLayout(nvtt::TextureType_2D, w, h); - inputOptions.setMipmapData(data, w, h); - - return true; - } - - return false; -} - - - - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/configdialog.ui =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/configdialog.ui +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/configdialog.ui @@ -1,1046 +0,0 @@ - - ConfigDialog - - - - 0 - 0 - 674 - 475 - - - - NVIDIA Texture Tools - - - - - - true - - - - - - - - - - - 64 - 0 - - - - - 128 - 16777215 - - - - Qt::ScrollBarAlwaysOff - - - QListView::Static - - - QListView::TopToBottom - - - false - - - QListView::Adjust - - - QListView::ListMode - - - - Input Options - - - - - Compression Options - - - - - Output Options - - - - - Preview - - - ../../../../../../castano-stuff/qshaderedit/src/images/colorpicker.png - - - - - - - - - - - 0 - 0 - - - - 3 - - - - - -1 - - - 0 - - - 0 - - - 0 - - - 0 - - - - - 0 - - - Qt::ElideNone - - - false - - - - File Path - - - - - - - - - - - - 0 - 0 - - - - Open - - - - - - - - - - - - - Image Settings - - - - - - - - - 0 - 0 - - - - Color Mode - - - Qt::AlignHCenter - - - - 0 - - - - - RGB - - - true - - - - - - - Normal Map - - - - - - - - - - - 0 - 0 - - - - Alpha Mode - - - Qt::AlignHCenter - - - - 0 - - - - - None - - - false - - - - - - - Transparency - - - true - - - - - - - Premultiplied - - - - - - - - - - - - 0 - - - - - - 0 - 0 - - - - Wrap Mode: - - - mipmapFilterComboBox - - - - - - - - 16777215 - 26 - - - - - Mirror - - - - - Repeat - - - - - Clamp - - - - - - - - - - - - - 0 - 0 - - - - Input Gamma: - - - inputGammaSpinBox - - - - - - - - 0 - 0 - - - - QAbstractSpinBox::UpDownArrows - - - 0.050000000000000 - - - 4.000000000000000 - - - 0.050000000000000 - - - 2.200000000000000 - - - - - - - - - - - - 0 - 0 - - - - Output Gamma: - - - inputGammaSpinBox - - - - - - - - 0 - 0 - - - - QAbstractSpinBox::UpDownArrows - - - 0.050000000000000 - - - 4.000000000000000 - - - 0.050000000000000 - - - 2.200000000000000 - - - - - - - - - Qt::Vertical - - - - 433 - 16 - - - - - - - - - Mipmaps - - - - - - Generate mipmaps - - - true - - - - - - - 1 - - - - - - 0 - 0 - - - - Mipmap filter: - - - mipmapFilterComboBox - - - - - - - - 16777215 - 26 - - - - - Box - - - - - Triangle - - - - - Kaiser - - - - - - - - false - - - - 0 - 0 - - - - - 16777215 - 24 - - - - false - - - ... - - - Qt::ToolButtonTextOnly - - - - - - - - - - - - 0 - 0 - - - - Limit Mipmaps - - - - - - - false - - - - 0 - 0 - - - - Max Level: - - - - - - - false - - - - 0 - 0 - - - - - 80 - 16777215 - - - - - - - - - - Qt::Vertical - - - - 204 - 71 - - - - - - - - - Normal Map - - - - - - - - - - 0 - - - 0 - - - 0 - - - 0 - - - - - - - - 0 - 0 - - - - Format: - - - Qt::PlainText - - - Qt::NoTextInteraction - - - formatComboBox - - - - - - - - Uncompressed - - - - - BC1 (DXT1) - - - - - BC1a (DXT1a) - - - - - BC2 (DXT3) - - - - - BC3 (DXT5) - - - - - BC4 - - - - - BC5 - - - - - - - - - - - - - 0 - 0 - - - - Quality: - - - Qt::PlainText - - - Qt::NoTextInteraction - - - formatComboBox - - - - - - - 1 - - - - Fastest - - - - - Normal - - - - - Production - - - - - Highest - - - - - - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - - - 0 - 0 - - - - Color Weights - - - Qt::AlignHCenter - - - - - - - - Red - - - redSpinBox - - - - - - - 1.000000000000000 - - - 0.050000000000000 - - - 1.000000000000000 - - - - - - - - - - - Green - - - greenSpinBox - - - - - - - 1.000000000000000 - - - 0.050000000000000 - - - 1.000000000000000 - - - - - - - - - - - Blue - - - blueSpinBox - - - - - - - 1.000000000000000 - - - 0.050000000000000 - - - 1.000000000000000 - - - - - - - - - - - - 16777215 - 22 - - - - Uniform Weights - - - true - - - true - - - - - - - - 16777215 - 22 - - - - Luminance Weights - - - true - - - - - - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - - - - Qt::Vertical - - - - 484 - 31 - - - - - - - - - - - 0 - - - 0 - - - 0 - - - 0 - - - - - - - - - - - - - - Bilinear Filter - - - true - - - - - - - View difference - - - - - - - - - - - - - - - Qt::Horizontal - - - - - - - - - Default - - - - - - - true - - - 0 - - - true - - - Qt::Horizontal - - - false - - - - - - - Quit - - - - - - - - - - - listWidget - currentRowChanged(int) - stackedWidget - setCurrentIndex(int) - - - 118 - 193 - - - 154 - 220 - - - - - pushButton - clicked() - ConfigDialog - accept() - - - 565 - 491 - - - 582 - 506 - - - - - limitMipmapsCheckBox - clicked(bool) - maxLevelSpinBox - setEnabled(bool) - - - 451 - 120 - - - 524 - 120 - - - - - limitMipmapsCheckBox - clicked(bool) - maxLevelLabel - setEnabled(bool) - - - 337 - 120 - - - 482 - 124 - - - - - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/ddsinfo.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/ddsinfo.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/ddsinfo.cpp @@ -1,57 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include - -#include -#include - -#include "cmdline.h" - - -int main(int argc, char *argv[]) -{ - MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - if (argc != 2) - { - printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); - printf("usage: nvddsinfo ddsfile\n\n"); - return 1; - } - - // Load surface. - nv::DirectDrawSurface dds(argv[1]); - if (!dds.isValid()) - { - printf("The file '%s' is not a valid DDS file.\n", argv[1]); - return 1; - } - - dds.printInfo(); - - return 0; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/decompress.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/decompress.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/decompress.cpp @@ -1,71 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include - -#include -#include - -#include - -#include "cmdline.h" - -int main(int argc, char *argv[]) -{ - MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - if (argc != 2) - { - printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); - printf("usage: nvdecompress 'ddsfile'\n\n"); - return 1; - } - - // Load surface. - nv::DirectDrawSurface dds(argv[1]); - if (!dds.isValid()) - { - printf("The file '%s' is not a valid DDS file.\n", argv[1]); - return 1; - } - - nv::Path name(argv[1]); - name.stripExtension(); - name.append(".tga"); - - nv::StdOutputStream stream(name.str()); - if (stream.isError()) { - printf("Error opening '%s' for writting\n", name.str()); - return 1; - } - - // @@ TODO: Add command line options to output mipmaps, cubemap faces, etc. - nv::Image img; - dds.mipmap(&img, 0, 0); // get first image - nv::ImageIO::saveTGA(stream, &img); - - return 0; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/imgdiff.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/imgdiff.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/imgdiff.cpp @@ -1,296 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include - -#include -#include - -#include -#include - -#include - -#include "cmdline.h" - -static bool loadImage(nv::Image & image, const char * fileName) -{ - if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0) - { - nv::DirectDrawSurface dds(fileName); - if (!dds.isValid()) - { - printf("The file '%s' is not a valid DDS file.\n", fileName); - return false; - } - - dds.mipmap(&image, 0, 0); // get first image - } - else - { - // Regular image. - if (!image.load(fileName)) - { - printf("The file '%s' is not a supported image type.\n", fileName); - return false; - } - } - - return true; -} - -// @@ Compute per-tile errors. -struct Error -{ - Error() - { - samples = 0; - mabse = 0.0f; - maxabse = 0.0f; - mse = 0.0f; - } - - void addSample(float e) - { - samples++; - mabse += fabsf(e); - maxabse = nv::max(maxabse, fabsf(e)); - mse += e * e; - } - - void done() - { - mabse /= samples; - mse /= samples; - rmse = sqrtf(mse); - psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse); - } - - void print() - { - printf(" Mean absolute error: %f\n", mabse); - printf(" Max absolute error: %f\n", maxabse); - printf(" Root mean squared error: %f\n", rmse); - printf(" Peak signal to noise ratio in dB: %f\n", psnr); - } - - int samples; - float mabse; - float maxabse; - float mse; - float rmse; - float psnr; -}; - -struct NormalError -{ - NormalError() - { - samples = 0; - ade = 0.0f; - mse = 0.0f; - } - - void addSample(nv::Color32 o, nv::Color32 c) - { - nv::Vector3 vo = nv::Vector3(o.r, o.g, o.b); - nv::Vector3 vc = nv::Vector3(c.r, c.g, c.b); - - // Unpack and normalize. - vo = nv::normalize(2.0f * (vo / 255.0f) - 1.0f); - vc = nv::normalize(2.0f * (vc / 255.0f) - 1.0f); - - ade += acosf(nv::clamp(dot(vo, vc), -1.0f, 1.0f)); - mse += length_squared((vo - vc) * (255 / 2.0f)); - - samples++; - } - - void done() - { - if (samples) - { - ade /= samples; - mse /= samples * 3; - rmse = sqrtf(mse); - psnr = (rmse == 0) ? 999.0f : 20.0f * log10(255.0f / rmse); - } - } - - void print() - { - printf(" Angular deviation error: %f\n", ade); - printf(" Root mean squared error: %f\n", rmse); - printf(" Peak signal to noise ratio in dB: %f\n", psnr); - } - - int samples; - float ade; - float mse; - float rmse; - float psnr; -}; - - -int main(int argc, char *argv[]) -{ - MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - bool compareNormal = false; - bool compareAlpha = false; - - nv::Path input0; - nv::Path input1; - nv::Path output; - - // Parse arguments. - for (int i = 1; i < argc; i++) - { - // Input options. - if (strcmp("-normal", argv[i]) == 0) - { - compareNormal = true; - } - if (strcmp("-alpha", argv[i]) == 0) - { - compareAlpha = true; - } - - else if (argv[i][0] != '-') - { - input0 = argv[i]; - - if (i+1 < argc && argv[i+1][0] != '-') { - input1 = argv[i+1]; - } - - break; - } - } - - if (input0.isNull() || input1.isNull()) - { - printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); - - printf("usage: nvimgdiff [options] original_file updated_file [output]\n\n"); - - printf("Diff options:\n"); - printf(" -normal \tCompare images as if they were normal maps.\n"); - printf(" -alpha \tCompare alpha weighted images.\n"); - - return 1; - } - - nv::Image image0, image1; - if (!loadImage(image0, input0)) return 0; - if (!loadImage(image1, input1)) return 0; - - const uint w0 = image0.width(); - const uint h0 = image0.height(); - const uint w1 = image1.width(); - const uint h1 = image1.height(); - const uint w = nv::min(w0, w1); - const uint h = nv::min(h0, h1); - - // Compute errors. - Error error_r; - Error error_g; - Error error_b; - Error error_a; - Error error_total; - NormalError error_normal; - - for (uint i = 0; i < h; i++) - { - for (uint e = 0; e < w; e++) - { - const nv::Color32 c0(image0.pixel(e, i)); - const nv::Color32 c1(image1.pixel(e, i)); - - float r = float(c0.r - c1.r); - float g = float(c0.g - c1.g); - float b = float(c0.b - c1.b); - float a = float(c0.a - c1.a); - - error_r.addSample(r); - error_g.addSample(g); - error_b.addSample(b); - error_a.addSample(a); - - if (compareNormal) - { - error_normal.addSample(c0, c1); - } - - if (compareAlpha) - { - error_total.addSample(r * c0.a / 255.0f); - error_total.addSample(g * c0.a / 255.0f); - error_total.addSample(b * c0.a / 255.0f); - } - else - { - error_total.addSample(r); - error_total.addSample(g); - error_total.addSample(b); - } - } - } - - error_r.done(); - error_g.done(); - error_b.done(); - error_a.done(); - error_total.done(); - error_normal.done(); - - - printf("Image size compared: %dx%d\n", w, h); - if (w != w0 || w != w1 || h != h0 || h != h1) { - printf("--- NOTE: only the overlap between the 2 images (%d,%d) and (%d,%d) was compared\n", w0, h0, w1, h1); - } - printf("Total pixels: %d\n", w*h); - - printf("Color:\n"); - error_total.print(); - - if (compareNormal) - { - printf("Normal:\n"); - error_normal.print(); - } - - if (compareAlpha) - { - printf("Alpha:\n"); - error_a.print(); - } - - // @@ Write image difference. - - return 0; -} - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/main.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/main.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/main.cpp @@ -1,34 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include "configdialog.h" - -int main(int argc, char *argv[]) -{ - QApplication app(argc, argv); - ConfigDialog dialog; - return dialog.exec(); -} - - Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/resize.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/resize.cpp +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/tools/resize.cpp @@ -1,183 +0,0 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include "cmdline.h" - -static bool loadImage(nv::Image & image, const char * fileName) -{ - if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0) - { - nv::DirectDrawSurface dds(fileName); - if (!dds.isValid()) - { - printf("The file '%s' is not a valid DDS file.\n", fileName); - return false; - } - - dds.mipmap(&image, 0, 0); // get first image - } - else - { - // Regular image. - if (!image.load(fileName)) - { - printf("The file '%s' is not a supported image type.\n", fileName); - return false; - } - } - - return true; -} - - -int main(int argc, char *argv[]) -{ - //MyAssertHandler assertHandler; - MyMessageHandler messageHandler; - - float scale = 0.5f; - float gamma = 2.2f; - nv::AutoPtr filter; - nv::Path input; - nv::Path output; - - nv::FloatImage::WrapMode wrapMode = nv::FloatImage::WrapMode_Mirror; - - // Parse arguments. - for (int i = 1; i < argc; i++) - { - // Input options. - if (strcmp("-s", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - scale = (float)atof(argv[i+1]); - i++; - } - } - else if (strcmp("-g", argv[i]) == 0) - { - if (i+1 < argc && argv[i+1][0] != '-') { - gamma = (float)atof(argv[i+1]); - i++; - } - } - else if (strcmp("-f", argv[i]) == 0) - { - if (i+1 == argc) break; - i++; - - if (strcmp("box", argv[i]) == 0) filter = new nv::BoxFilter(); - else if (strcmp("triangle", argv[i]) == 0) filter = new nv::TriangleFilter(); - else if (strcmp("quadratic", argv[i]) == 0) filter = new nv::QuadraticFilter(); - else if (strcmp("bspline", argv[i]) == 0) filter = new nv::BSplineFilter(); - else if (strcmp("mitchell", argv[i]) == 0) filter = new nv::MitchellFilter(); - else if (strcmp("lanczos", argv[i]) == 0) filter = new nv::LanczosFilter(); - else if (strcmp("kaiser", argv[i]) == 0) { - filter = new nv::KaiserFilter(3); - ((nv::KaiserFilter *)filter.ptr())->setParameters(4.0f, 1.0f); - } - } - else if (strcmp("-w", argv[i]) == 0) - { - if (i+1 == argc) break; - i++; - - if (strcmp("mirror", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Mirror; - else if (strcmp("repeat", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Repeat; - else if (strcmp("clamp", argv[i]) == 0) wrapMode = nv::FloatImage::WrapMode_Clamp; - } - else if (argv[i][0] != '-') - { - input = argv[i]; - - if (i+1 < argc && argv[i+1][0] != '-') { - output = argv[i+1]; - } - - break; - } - } - - if (input.isNull() || output.isNull()) - { - printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); - - printf("usage: nvzoom [options] input [output]\n\n"); - - printf("Options:\n"); - printf(" -s scale Scale factor (default = 0.5)\n"); - printf(" -g gamma Gamma correction (default = 2.2)\n"); - printf(" -f filter One of the following: (default = 'box')\n"); - printf(" * box\n"); - printf(" * triangle\n"); - printf(" * quadratic\n"); - printf(" * bspline\n"); - printf(" * mitchell\n"); - printf(" * lanczos\n"); - printf(" * kaiser\n"); - printf(" -w mode One of the following: (default = 'mirror')\n"); - printf(" * mirror\n"); - printf(" * repeat\n"); - printf(" * clamp\n"); - - return 1; - } - - if (filter == NULL) - { - filter = new nv::BoxFilter(); - } - - nv::Image image; - if (!loadImage(image, input)) return 0; - - nv::FloatImage fimage(&image); - fimage.toLinear(0, 3, gamma); - - nv::AutoPtr fresult(fimage.resize(*filter, uint(image.width() * scale), uint(image.height() * scale), wrapMode)); - - nv::AutoPtr result(fresult->createImageGammaCorrect(gamma)); - result->setFormat(nv::Image::Format_ARGB); - - nv::StdOutputStream stream(output); - nv::ImageIO::saveTGA(stream, result.ptr()); // @@ Add generic save function. Add support for png too. - - return 0; -} - Index: ps/trunk/source/graphics/TextureConverter.cpp =================================================================== --- ps/trunk/source/graphics/TextureConverter.cpp +++ ps/trunk/source/graphics/TextureConverter.cpp @@ -52,6 +52,10 @@ memcpy(&buffer[off], data, size); return true; } + + virtual void endImage() + { + } }; /** @@ -64,8 +68,6 @@ nvtt::InputOptions inputOptions; nvtt::CompressionOptions compressionOptions; nvtt::OutputOptions outputOptions; - bool isDXT1a; // see comment in RunThread - bool is8bpp; }; /** @@ -383,9 +385,6 @@ else request->inputOptions.setAlphaMode(nvtt::AlphaMode_None); - request->isDXT1a = false; - request->is8bpp = false; - if (settings.format == FMT_RGBA) { request->compressionOptions.setFormat(nvtt::Format_RGBA); @@ -396,7 +395,6 @@ { request->compressionOptions.setFormat(nvtt::Format_RGBA); request->compressionOptions.setPixelFormat(8, 0x00, 0x00, 0x00, 0xFF); - request->is8bpp = true; } else if (!hasAlpha) { @@ -406,7 +404,6 @@ else if (settings.format == FMT_DXT1) { request->compressionOptions.setFormat(nvtt::Format_DXT1a); - request->isDXT1a = true; } else if (settings.format == FMT_DXT3) { @@ -578,19 +575,6 @@ result->ret = compressor.process(request->inputOptions, request->compressionOptions, request->outputOptions); } - // Ugly hack: NVTT 2.0 doesn't set DDPF_ALPHAPIXELS for DXT1a, so we can't - // distinguish it from DXT1. (It's fixed in trunk by - // http://code.google.com/p/nvidia-texture-tools/source/detail?r=924&path=/trunk). - // Rather than using a trunk NVTT (unstable, makes packaging harder) - // or patching our copy (makes packaging harder), we'll just manually - // set the flag here. - if (request->isDXT1a && result->ret && result->output.buffer.size() > 80) - result->output.buffer[80] |= 1; // DDPF_ALPHAPIXELS in DDS_PIXELFORMAT.dwFlags - // Ugly hack: NVTT always sets DDPF_RGB, even if we're trying to output 8-bit - // alpha-only DDS with no RGB components. Unset that flag. - if (request->is8bpp) - result->output.buffer[80] &= ~0x40; // DDPF_RGB in DDS_PIXELFORMAT.dwFlags - // Push the result onto the queue std::lock_guard wait_lock(textureConverter->m_WorkerMutex); textureConverter->m_ResultQueue.push_back(result); Index: ps/trunk/source/lib/tex/tex_dds.cpp =================================================================== --- ps/trunk/source/lib/tex/tex_dds.cpp +++ ps/trunk/source/lib/tex/tex_dds.cpp @@ -310,10 +310,10 @@ // DDS_PIXELFORMAT.dwFlags // we've seen some DXT3 files that don't have this set (which is nonsense; -// any image lacking alpha should be stored as DXT1). it's authoritative -// if fourcc is DXT1 (there's no other way to tell DXT1 and DXT1a apart) -// and ignored otherwise. +// any image lacking alpha should be stored as DXT1). #define DDPF_ALPHAPIXELS 0x00000001 +// DDPF_ALPHA is used instead of DDPF_ALPHAPIXELS for DXT1a. +#define DDPF_ALPHA 0x00000002 #define DDPF_FOURCC 0x00000004 #define DDPF_RGB 0x00000040 @@ -326,7 +326,7 @@ u32 dwRBitMask; u32 dwGBitMask; u32 dwBBitMask; - u32 dwABitMask; // (DDPF_ALPHAPIXELS) + u32 dwABitMask; // (DDPF_ALPHA or DDPF_ALPHAPIXELS) }; @@ -435,7 +435,7 @@ RETURN_STATUS_IF_ERR(tex_validate_plain_format(bpp, (int)flags)); } // .. uncompressed 8bpp greyscale - else if(pf_flags & DDPF_ALPHAPIXELS) + else if(pf_flags & DDPF_ALPHA) { const size_t pf_bpp = (size_t)read_le32(&pf->dwRGBBitCount); const size_t pf_a_mask = (size_t)read_le32(&pf->dwABitMask); @@ -460,7 +460,7 @@ { case FOURCC('D','X','T','1'): bpp = 4; - if(pf_flags & DDPF_ALPHAPIXELS) + if(pf_flags & DDPF_ALPHA) flags |= DXT1A | TEX_ALPHA; else flags |= 1;