Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector.h (revision 23380) @@ -1,12 +1,12 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #include "Vector.h" // Vector3, Vector4 #if NV_USE_ALTIVEC # include "SimdVector_VE.h" #endif #if NV_USE_SSE # include "SimdVector_SSE.h" #endif Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.h (revision 23380) @@ -1,181 +1,181 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_THREAD_PARALLELFOR_H #define NV_THREAD_PARALLELFOR_H #include "nvthread.h" //#include "Atomic.h" // atomic namespace nv { class Thread; class ThreadPool; typedef void ForTask(void * context, /*int tid,*/ int idx); // @@ It would be nice to have the thread index as an argument here. struct ParallelFor { ParallelFor(ForTask * task, void * context); ~ParallelFor(); void run(uint count, uint step = 1); // Invariant: ForTask * task; void * context; ThreadPool * pool; // State: uint count; uint step; /*atomic*/ uint idx; }; #if NV_CC_CPP11 template void sequential_for(uint count, F f) { for (uint i = 0; i < count; i++) { f(i); } } template void parallel_for(uint count, uint step, F f) { // Transform lambda into function pointer. auto lambda = [](void* context, /*int tid, */int idx) { F & f = *reinterpret_cast(context); f(/*tid, */idx); }; ParallelFor pf(lambda, &f); pf.run(count, step); } template void parallel_for(uint count, F f) { parallel_for(count, /*step=*/1, f); } template void parallel_for_if(uint count, uint step, bool condition, F f) { if (condition) { parallel_for(count, step, f); } else { sequential_for(count, f); } } #if 0 template void parallel_for_each(Array & array, uint step, F f) { // Transform lambda into function pointer. auto lambda = [](void* context, int idx) { F & f = *reinterpret_cast(context); f(array[idx]); }; ParallelFor pf(lambda, &f); pf.run(count, step); } #endif #endif // NV_CC_CPP11 /* #include "nvthread/Mutex.h" #include "nvcore/Array.inl" template struct ParallelOutputStream { #if 0 // In its most basic implementation the parallel stream is simply a single array protected by a mutex. Parallel_Output_Stream(uint producer_count) {} void reset() { final_array.clear(); } void append(uint producer_id, const T & t) { Lock(mutex); final_array.append(t); } nv::Array & finalize() { return final_array; } nv::Mutex mutex; nv::Array final_array; #elif 0 // Another simple implementation is to have N arrays that are merged at the end. ParallelOutputStream(uint producer_count) : producer_count(producer_count) { partial_array = new Array[producer_count]; } void reset() { for (int i = 0; i < producer_count; i++) { partial_array[i].clear(); } } void append(uint producer_id, const T & t) { nvCheck(producer_id < producer_count); partial_array[producer_id].append(t); } nv::Array & finalize() { for (int i = 1; i < producer_count; i++) { partial_array->append(partial_array[i]); partial_array[i].clear(); } return *partial_array; } uint producer_count; nv::Array * partial_array; #else ParallelOutputStream(uint producer_count) : producer_count(producer_count) { partial_array = new PartialArray[producer_count]; } // But a more sophisticated implementation keeps N short arrays that are merged as they get full. This preserves partial order. struct PartialArray { // Make sure this is aligned to cache lines. We want producers to access their respective arrays without conflicts. uint count; T data[32]; // Pick size to minimize wasted space considering cache line alignment? }; const uint producer_count; PartialArray * partial_array; // @@ Make sure mutex and partial_array are not in the same cache line! nv::Mutex mutex; nv::Array final_array; void append(uint producer_id, const T & t) { if (partial_array[producer_id].count == 32) { partial_array[producer_id].count = 0; Lock(mutex); final_array.append(partial_array[producer_id].data, 32); } partial_array[producer_id].data[partial_array[producer_id].count++] = t; } nv::Array & finalize() { for (int i = 0; i < producer_count; i++) { final_array.append(partial_array[producer_id].data, partial_array[producer_id].count); } return final_array; } #endif }; */ } // nv namespace #endif // NV_THREAD_PARALLELFOR_H Index: ps/trunk/libraries/source/nvtt/src/CMakeLists.txt =================================================================== --- ps/trunk/libraries/source/nvtt/src/CMakeLists.txt (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/CMakeLists.txt (revision 23380) @@ -1,88 +1,88 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0) PROJECT(NV) ENABLE_TESTING() SET(NV_CMAKE_DIR "${NV_SOURCE_DIR}/cmake") SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${NV_CMAKE_DIR}") # GCC check (needs -std:c++11 flag) #if(CMAKE_COMPILER_IS_GNUCC) # ADD_DEFINITIONS("-std=c++11") #ENDIF(CMAKE_COMPILER_IS_GNUCC) set (CMAKE_CXX_STANDARD 11) #IF(WIN32) # gnuwin32 paths: #SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32") #SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include") #SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib") # Set GLUT path: #SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut") # Set FreeImage path: #SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage") #ENDIF(WIN32) INCLUDE(${NV_CMAKE_DIR}/OptimalOptions.cmake) MESSAGE(STATUS "Setting optimal options") MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}") MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}") IF(CMAKE_BUILD_TYPE MATCHES "debug") SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.") ADD_DEFINITIONS(-D_DEBUG=1) ENDIF() IF(NVTT_SHARED) SET(NVCORE_SHARED TRUE) SET(NVMATH_SHARED TRUE) SET(NVIMAGE_SHARED TRUE) ENDIF(NVTT_SHARED) SET(CMAKE_SKIP_BUILD_RPATH TRUE) SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) SET(CMAKE_INSTALL_RPATH "$ORIGIN") SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) SET(CMAKE_INSTALL_NAME_DIR "@executable_path") ADD_SUBDIRECTORY(extern) ADD_SUBDIRECTORY(src) # These files should only be installed when creating packages. INSTALL(FILES LICENSE README.md DESTINATION share/doc/nvtt) # Add packaging support INCLUDE(InstallRequiredSystemLibraries) IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") SET(CPACK_GENERATOR "TGZ;DEB") ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") SET(CPACK_PACKAGE_NAME "nvidia-texture-tools") SET(CPACK_PACKAGE_VERSION_MAJOR "2") SET(CPACK_PACKAGE_VERSION_MINOR "1") SET(CPACK_PACKAGE_VERSION_PATCH "0") SET(CPACK_PACKAGE_VERSION "2.1.0") -SET(CPACK_PACKAGE_CONTACT "Ignacio Castaño ") +SET(CPACK_PACKAGE_CONTACT "Ignacio Castaño ") #SET(CPACK_PACKAGE_VENDOR "NVIDIA Corporation") SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Texture processing tools with support for Direct3D 10 and 11 formats.") SET(CPACK_PACKAGE_DESCRIPTION_FILE "${NV_SOURCE_DIR}/README.md") SET(CPACK_RESOURCE_FILE_LICENSE "${NV_SOURCE_DIR}/LICENSE") # NSIS options: IF(WIN32) SET(CPACK_NSIS_DISPLAY_NAME "${CPACK_PACKAGE_VENDOR}\\\\NVIDIA Texture Tools 2.1") SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${CPACK_PACKAGE_VENDOR}\\\\NVIDIA Texture Tools 2.1") SET(CPACK_PACKAGE_ICON "${NV_SOURCE_DIR}\\\\project\\\\vc8\\\\nvcompress\\\\nvidia.ico") ENDIF(WIN32) INCLUDE(CPack) Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.h (revision 23380) @@ -1,182 +1,182 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_CORE_ARRAY_H #define NV_CORE_ARRAY_H /* This array class requires the elements to be relocable; it uses memmove and realloc. Ideally I should be using swap, but I honestly don't care. The only thing that you should be aware of is that internal pointers are not supported. Note also that push_back and resize does not support inserting arguments elements that are in the same container. This is forbidden to prevent an extra copy. */ #include "Memory.h" #include "Debug.h" #include "ForEach.h" // PseudoIndex namespace nv { class Stream; /** * Replacement for std::vector that is easier to debug and provides * some nice foreach enumerators. */ template class NVCORE_CLASS Array { public: typedef uint size_type; // Default constructor. NV_FORCEINLINE Array() : m_buffer(NULL), m_capacity(0), m_size(0) {} // Copy constructor. NV_FORCEINLINE Array(const Array & a) : m_buffer(NULL), m_capacity(0), m_size(0) { copy(a.m_buffer, a.m_size); } // Constructor that initializes the vector with the given elements. NV_FORCEINLINE Array(const T * ptr, uint num) : m_buffer(NULL), m_capacity(0), m_size(0) { copy(ptr, num); } // Allocate array. NV_FORCEINLINE explicit Array(uint capacity) : m_buffer(NULL), m_capacity(0), m_size(0) { setArrayCapacity(capacity); } // Destructor. NV_FORCEINLINE ~Array() { clear(); free(m_buffer); } /// Const element access. NV_FORCEINLINE const T & operator[]( uint index ) const { nvDebugCheck(index < m_size); return m_buffer[index]; } NV_FORCEINLINE const T & at( uint index ) const { nvDebugCheck(index < m_size); return m_buffer[index]; } /// Element access. NV_FORCEINLINE T & operator[] ( uint index ) { nvDebugCheck(index < m_size); return m_buffer[index]; } NV_FORCEINLINE T & at( uint index ) { nvDebugCheck(index < m_size); return m_buffer[index]; } /// Get vector size. NV_FORCEINLINE uint size() const { return m_size; } /// Get vector size. NV_FORCEINLINE uint count() const { return m_size; } /// Get vector capacity. NV_FORCEINLINE uint capacity() const { return m_capacity; } /// Get const vector pointer. NV_FORCEINLINE const T * buffer() const { return m_buffer; } /// Get vector pointer. NV_FORCEINLINE T * buffer() { return m_buffer; } /// Provide begin/end pointers for C++11 range-based for loops. NV_FORCEINLINE T * begin() { return m_buffer; } NV_FORCEINLINE T * end() { return m_buffer + m_size; } NV_FORCEINLINE const T * begin() const { return m_buffer; } NV_FORCEINLINE const T * end() const { return m_buffer + m_size; } /// Is vector empty. NV_FORCEINLINE bool isEmpty() const { return m_size == 0; } /// Is a null vector. NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; } T & append(); void push_back( const T & val ); void pushBack( const T & val ); Array & append( const T & val ); Array & operator<< ( T & t ); void pop_back(); void popBack(uint count = 1); void popFront(uint count = 1); const T & back() const; T & back(); const T & front() const; T & front(); bool contains(const T & e) const; bool find(const T & element, uint * indexPtr) const; bool find(const T & element, uint begin, uint end, uint * indexPtr) const; void removeAt(uint index); bool remove(const T & element); void insertAt(uint index, const T & val = T()); void append(const Array & other); void append(const T other[], uint count); void replaceWithLast(uint index); void resize(uint new_size); void resize(uint new_size, const T & elem); void fill(const T & elem); void clear(); void shrink(); void reserve(uint desired_size); void copy(const T * data, uint count); Array & operator=( const Array & a ); T * release(); // Array enumerator. typedef uint PseudoIndex; NV_FORCEINLINE PseudoIndex start() const { return 0; } NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; } NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; } #if NV_CC_MSVC NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) { return m_buffer[i(this)]; } NV_FORCEINLINE const T & operator[]( const PseudoIndexWrapper & i ) const { return m_buffer[i(this)]; } #endif // Friends. template friend Stream & operator<< ( Stream & s, Array & p ); template friend void swap(Array & a, Array & b); protected: void setArraySize(uint new_size); void setArrayCapacity(uint new_capacity); T * m_buffer; uint m_capacity; uint m_size; }; } // nv namespace #endif // NV_CORE_ARRAY_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.inl =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.inl (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.inl (revision 23380) @@ -1,438 +1,438 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_CORE_ARRAY_INL #define NV_CORE_ARRAY_INL #include "Array.h" #include "Stream.h" #include "Utils.h" // swap #include // memmove #include // for placement new namespace nv { template NV_FORCEINLINE T & Array::append() { uint old_size = m_size; uint new_size = m_size + 1; setArraySize(new_size); construct_range(m_buffer, new_size, old_size); return m_buffer[old_size]; // Return reference to last element. } // Push an element at the end of the vector. template NV_FORCEINLINE void Array::push_back( const T & val ) { #if 1 nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size); uint old_size = m_size; uint new_size = m_size + 1; setArraySize(new_size); construct_range(m_buffer, new_size, old_size, val); #else uint new_size = m_size + 1; if (new_size > m_capacity) { // @@ Is there any way to avoid this copy? // @@ Can we create a copy without side effects? Ie. without calls to constructor/destructor. Use alloca + memcpy? // @@ Assert instead of copy? const T copy(val); // create a copy in case value is inside of this array. setArraySize(new_size); new (m_buffer+new_size-1) T(copy); } else { m_size = new_size; new(m_buffer+new_size-1) T(val); } #endif // 0/1 } template NV_FORCEINLINE void Array::pushBack( const T & val ) { push_back(val); } template NV_FORCEINLINE Array & Array::append( const T & val ) { push_back(val); return *this; } // Qt like push operator. template NV_FORCEINLINE Array & Array::operator<< ( T & t ) { push_back(t); return *this; } // Pop the element at the end of the vector. template NV_FORCEINLINE void Array::pop_back() { nvDebugCheck( m_size > 0 ); resize( m_size - 1 ); } template NV_FORCEINLINE void Array::popBack(uint count) { nvDebugCheck(m_size >= count); resize(m_size - count); } template NV_FORCEINLINE void Array::popFront(uint count) { nvDebugCheck(m_size >= count); //resize(m_size - count); if (m_size == count) { clear(); } else { destroy_range(m_buffer, 0, count); memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count)); m_size -= count; } } // Get back element. template NV_FORCEINLINE const T & Array::back() const { nvDebugCheck( m_size > 0 ); return m_buffer[m_size-1]; } // Get back element. template NV_FORCEINLINE T & Array::back() { nvDebugCheck( m_size > 0 ); return m_buffer[m_size-1]; } // Get front element. template NV_FORCEINLINE const T & Array::front() const { nvDebugCheck( m_size > 0 ); return m_buffer[0]; } // Get front element. template NV_FORCEINLINE T & Array::front() { nvDebugCheck( m_size > 0 ); return m_buffer[0]; } // Check if the given element is contained in the array. template NV_FORCEINLINE bool Array::contains(const T & e) const { return find(e, NULL); } // Return true if element found. template NV_FORCEINLINE bool Array::find(const T & element, uint * indexPtr) const { return find(element, 0, m_size, indexPtr); } // Return true if element found within the given range. template NV_FORCEINLINE bool Array::find(const T & element, uint begin, uint end, uint * indexPtr) const { return ::nv::find(element, m_buffer, begin, end, indexPtr); } // Remove the element at the given index. This is an expensive operation! template void Array::removeAt(uint index) { nvDebugCheck(index >= 0 && index < m_size); if (m_size == 1) { clear(); } else { m_buffer[index].~T(); memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index)); m_size--; } } // Remove the first instance of the given element. template bool Array::remove(const T & element) { uint index; if (find(element, &index)) { removeAt(index); return true; } return false; } // Insert the given element at the given index shifting all the elements up. template void Array::insertAt(uint index, const T & val/*=T()*/) { nvDebugCheck( index >= 0 && index <= m_size ); setArraySize(m_size + 1); if (index < m_size - 1) { memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index)); } // Copy-construct into the newly opened slot. new(m_buffer+index) T(val); } // Append the given data to our vector. template NV_FORCEINLINE void Array::append(const Array & other) { append(other.m_buffer, other.m_size); } // Append the given data to our vector. template void Array::append(const T other[], uint count) { if (count > 0) { const uint old_size = m_size; setArraySize(m_size + count); for (uint i = 0; i < count; i++ ) { new(m_buffer + old_size + i) T(other[i]); } } } // Remove the given element by replacing it with the last one. template void Array::replaceWithLast(uint index) { nvDebugCheck( index < m_size ); nv::swap(m_buffer[index], back()); // @@ Is this OK when index == size-1? (m_buffer+m_size-1)->~T(); m_size--; } // Resize the vector preserving existing elements. template void Array::resize(uint new_size) { uint old_size = m_size; // Destruct old elements (if we're shrinking). destroy_range(m_buffer, new_size, old_size); setArraySize(new_size); // Call default constructors construct_range(m_buffer, new_size, old_size); } // Resize the vector preserving existing elements and initializing the // new ones with the given value. template void Array::resize(uint new_size, const T & elem) { nvDebugCheck(&elem < m_buffer || &elem > m_buffer+m_size); uint old_size = m_size; // Destruct old elements (if we're shrinking). destroy_range(m_buffer, new_size, old_size); setArraySize(new_size); // Call copy constructors construct_range(m_buffer, new_size, old_size, elem); } // Fill array with the given value. template void Array::fill(const T & elem) { fill(m_buffer, m_size, elem); } // Clear the buffer. template NV_FORCEINLINE void Array::clear() { nvDebugCheck(isValidPtr(m_buffer)); // Destruct old elements destroy_range(m_buffer, 0, m_size); m_size = 0; } // Shrink the allocated vector. template NV_FORCEINLINE void Array::shrink() { if (m_size < m_capacity) { setArrayCapacity(m_size); } } // Preallocate space. template NV_FORCEINLINE void Array::reserve(uint desired_size) { if (desired_size > m_capacity) { setArrayCapacity(desired_size); } } // Copy elements to this array. Resizes it if needed. template NV_FORCEINLINE void Array::copy(const T * data, uint count) { #if 1 // More simple, but maybe not be as efficient? destroy_range(m_buffer, 0, m_size); setArraySize(count); construct_range(m_buffer, count, 0, data); #else const uint old_size = m_size; destroy_range(m_buffer, count, old_size); setArraySize(count); copy_range(m_buffer, data, old_size); construct_range(m_buffer, count, old_size, data); #endif } // Assignment operator. template NV_FORCEINLINE Array & Array::operator=( const Array & a ) { copy(a.m_buffer, a.m_size); return *this; } // Release ownership of allocated memory and returns pointer to it. template T * Array::release() { T * tmp = m_buffer; m_buffer = NULL; m_capacity = 0; m_size = 0; return tmp; } // Change array size. template inline void Array::setArraySize(uint new_size) { m_size = new_size; if (new_size > m_capacity) { uint new_buffer_size; if (m_capacity == 0) { // first allocation is exact new_buffer_size = new_size; } else { // following allocations grow array by 25% new_buffer_size = new_size + (new_size >> 2); } setArrayCapacity( new_buffer_size ); } } // Change array capacity. template inline void Array::setArrayCapacity(uint new_capacity) { nvDebugCheck(new_capacity >= m_size); if (new_capacity == 0) { // free the buffer. if (m_buffer != NULL) { free(m_buffer); m_buffer = NULL; } } else { // realloc the buffer m_buffer = realloc(m_buffer, new_capacity); } m_capacity = new_capacity; } // Array serialization. template inline Stream & operator<< ( Stream & s, Array & p ) { if (s.isLoading()) { uint size; s << size; p.resize( size ); } else { s << p.m_size; } for (uint i = 0; i < p.m_size; i++) { s << p.m_buffer[i]; } return s; } // Swap the members of the two given vectors. template inline void swap(Array & a, Array & b) { nv::swap(a.m_buffer, b.m_buffer); nv::swap(a.m_capacity, b.m_capacity); nv::swap(a.m_size, b.m_size); } } // nv namespace #endif // NV_CORE_ARRAY_INL Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp (revision 23380) @@ -1,1270 +1,1270 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #include "Debug.h" #include "Array.inl" #include "StrLib.h" // StringBuilder #include "StdStream.h" // fileOpen #include // Extern #if NV_OS_WIN32 //&& NV_CC_MSVC # define WIN32_LEAN_AND_MEAN # define VC_EXTRALEAN # include # include # if NV_CC_MSVC # include # if _MSC_VER < 1300 # define DECLSPEC_DEPRECATED // VC6: change this path to your Platform SDK headers # include // must be XP version of file // include "M:\\dev7\\vs\\devtools\\common\\win32sdk\\include\\dbghelp.h" # else // VC7: ships with updated headers # include # endif # endif # pragma comment(lib,"dbghelp.lib") #endif #if NV_OS_XBOX # include # ifdef _DEBUG # include # endif //_DEBUG #endif //NV_OS_XBOX #if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) # include #endif #if NV_OS_UNIX # include // getpid #endif #if NV_OS_LINUX && defined(HAVE_EXECINFO_H) # include // backtrace # if NV_CC_GNUC // defined(HAVE_CXXABI_H) # include # endif #endif #if NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD # include # include # include // sysctl # if !defined(NV_OS_OPENBSD) # include # endif # if defined(HAVE_EXECINFO_H) // only after OSX 10.5 # include // backtrace # if NV_CC_GNUC // defined(HAVE_CXXABI_H) # include # endif # endif #endif #if NV_OS_ORBIS #include #endif #define NV_USE_SEPARATE_THREAD 1 using namespace nv; namespace { static MessageHandler * s_message_handler = NULL; static AssertHandler * s_assert_handler = NULL; static bool s_sig_handler_enabled = false; static bool s_interactive = true; #if NV_OS_WIN32 && NV_CC_MSVC // Old exception filter. static LPTOP_LEVEL_EXCEPTION_FILTER s_old_exception_filter = NULL; #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // Old signal handlers. struct sigaction s_old_sigsegv; struct sigaction s_old_sigtrap; struct sigaction s_old_sigfpe; struct sigaction s_old_sigbus; #endif #if NV_OS_WIN32 && NV_CC_MSVC // We should try to simplify the top level filter as much as possible. // http://www.nynaeve.net/?p=128 #if NV_USE_SEPARATE_THREAD // The critical section enforcing the requirement that only one exception be // handled by a handler at a time. static CRITICAL_SECTION s_handler_critical_section; // Semaphores used to move exception handling between the exception thread // and the handler thread. handler_start_semaphore_ is signalled by the // exception thread to wake up the handler thread when an exception occurs. // handler_finish_semaphore_ is signalled by the handler thread to wake up // the exception thread when handling is complete. static HANDLE s_handler_start_semaphore = NULL; static HANDLE s_handler_finish_semaphore = NULL; // The exception handler thread. static HANDLE s_handler_thread = NULL; static DWORD s_requesting_thread_id = 0; static EXCEPTION_POINTERS * s_exception_info = NULL; #endif // NV_USE_SEPARATE_THREAD struct MinidumpCallbackContext { ULONG64 memory_base; ULONG memory_size; bool finished; }; // static static BOOL CALLBACK miniDumpWriteDumpCallback(PVOID context, const PMINIDUMP_CALLBACK_INPUT callback_input, PMINIDUMP_CALLBACK_OUTPUT callback_output) { switch (callback_input->CallbackType) { case MemoryCallback: { MinidumpCallbackContext* callback_context = reinterpret_cast(context); if (callback_context->finished) return FALSE; // Include the specified memory region. callback_output->MemoryBase = callback_context->memory_base; callback_output->MemorySize = callback_context->memory_size; callback_context->finished = true; return TRUE; } // Include all modules. case IncludeModuleCallback: case ModuleCallback: return TRUE; // Include all threads. case IncludeThreadCallback: case ThreadCallback: return TRUE; // Stop receiving cancel callbacks. case CancelCallback: callback_output->CheckCancel = FALSE; callback_output->Cancel = FALSE; return TRUE; } // Ignore other callback types. return FALSE; } static bool writeMiniDump(EXCEPTION_POINTERS * pExceptionInfo) { // create the file HANDLE hFile = CreateFileA("crash.dmp", GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); if (hFile == INVALID_HANDLE_VALUE) { //nvDebug("*** Failed to create dump file.\n"); return false; } MINIDUMP_EXCEPTION_INFORMATION * pExInfo = NULL; MINIDUMP_CALLBACK_INFORMATION * pCallback = NULL; if (pExceptionInfo != NULL) { MINIDUMP_EXCEPTION_INFORMATION ExInfo; ExInfo.ThreadId = ::GetCurrentThreadId(); ExInfo.ExceptionPointers = pExceptionInfo; ExInfo.ClientPointers = NULL; pExInfo = &ExInfo; MINIDUMP_CALLBACK_INFORMATION callback; MinidumpCallbackContext context; // Find a memory region of 256 bytes centered on the // faulting instruction pointer. const ULONG64 instruction_pointer = #if defined(_M_IX86) pExceptionInfo->ContextRecord->Eip; #elif defined(_M_AMD64) pExceptionInfo->ContextRecord->Rip; #else #error Unsupported platform #endif MEMORY_BASIC_INFORMATION info; if (VirtualQuery(reinterpret_cast(instruction_pointer), &info, sizeof(MEMORY_BASIC_INFORMATION)) != 0 && info.State == MEM_COMMIT) { // Attempt to get 128 bytes before and after the instruction // pointer, but settle for whatever's available up to the // boundaries of the memory region. const ULONG64 kIPMemorySize = 256; context.memory_base = max(reinterpret_cast(info.BaseAddress), instruction_pointer - (kIPMemorySize / 2)); ULONG64 end_of_range = min(instruction_pointer + (kIPMemorySize / 2), reinterpret_cast(info.BaseAddress) + info.RegionSize); context.memory_size = static_cast(end_of_range - context.memory_base); context.finished = false; callback.CallbackRoutine = miniDumpWriteDumpCallback; callback.CallbackParam = reinterpret_cast(&context); pCallback = &callback; } } MINIDUMP_TYPE miniDumpType = (MINIDUMP_TYPE)(MiniDumpNormal|MiniDumpWithHandleData|MiniDumpWithThreadInfo); // write the dump BOOL ok = MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, miniDumpType, pExInfo, NULL, pCallback) != 0; CloseHandle(hFile); if (ok == FALSE) { //nvDebug("*** Failed to save dump file.\n"); return false; } //nvDebug("\nDump file saved.\n"); return true; } #if NV_USE_SEPARATE_THREAD static DWORD WINAPI ExceptionHandlerThreadMain(void* lpParameter) { nvDebugCheck(s_handler_start_semaphore != NULL); nvDebugCheck(s_handler_finish_semaphore != NULL); while (true) { if (WaitForSingleObject(s_handler_start_semaphore, INFINITE) == WAIT_OBJECT_0) { writeMiniDump(s_exception_info); // Allow the requesting thread to proceed. ReleaseSemaphore(s_handler_finish_semaphore, 1, NULL); } } // This statement is not reached when the thread is unconditionally // terminated by the ExceptionHandler destructor. return 0; } #endif // NV_USE_SEPARATE_THREAD static bool hasStackTrace() { return true; } /*static NV_NOINLINE int backtrace(void * trace[], int maxcount) { // In Windows XP and Windows Server 2003, the sum of the FramesToSkip and FramesToCapture parameters must be less than 63. int xp_maxcount = min(63-1, maxcount); int count = RtlCaptureStackBackTrace(1, xp_maxcount, trace, NULL); nvDebugCheck(count <= maxcount); return count; }*/ static NV_NOINLINE int backtraceWithSymbols(CONTEXT * ctx, void * trace[], int maxcount, int skip = 0) { // Init the stack frame for this function STACKFRAME64 stackFrame = { 0 }; #if NV_CPU_X86_64 DWORD dwMachineType = IMAGE_FILE_MACHINE_AMD64; stackFrame.AddrPC.Offset = ctx->Rip; stackFrame.AddrFrame.Offset = ctx->Rbp; stackFrame.AddrStack.Offset = ctx->Rsp; #elif NV_CPU_X86 DWORD dwMachineType = IMAGE_FILE_MACHINE_I386; stackFrame.AddrPC.Offset = ctx->Eip; stackFrame.AddrFrame.Offset = ctx->Ebp; stackFrame.AddrStack.Offset = ctx->Esp; #else #error "Platform not supported!" #endif stackFrame.AddrPC.Mode = AddrModeFlat; stackFrame.AddrFrame.Mode = AddrModeFlat; stackFrame.AddrStack.Mode = AddrModeFlat; // Walk up the stack const HANDLE hThread = GetCurrentThread(); const HANDLE hProcess = GetCurrentProcess(); int i; for (i = 0; i < maxcount; i++) { // walking once first makes us skip self if (!StackWalk64(dwMachineType, hProcess, hThread, &stackFrame, ctx, NULL, &SymFunctionTableAccess64, &SymGetModuleBase64, NULL)) { break; } /*if (stackFrame.AddrPC.Offset == stackFrame.AddrReturn.Offset || stackFrame.AddrPC.Offset == 0) { break; }*/ if (i >= skip) { trace[i - skip] = (PVOID)stackFrame.AddrPC.Offset; } } return i - skip; } #pragma warning(push) #pragma warning(disable:4748) static NV_NOINLINE int backtrace(void * trace[], int maxcount) { CONTEXT ctx = { 0 }; #if NV_CPU_X86 && !NV_CPU_X86_64 ctx.ContextFlags = CONTEXT_CONTROL; _asm { call x x: pop eax mov ctx.Eip, eax mov ctx.Ebp, ebp mov ctx.Esp, esp } #else RtlCaptureContext(&ctx); // Not implemented correctly in x86. #endif return backtraceWithSymbols(&ctx, trace, maxcount, 1); } #pragma warning(pop) static NV_NOINLINE void writeStackTrace(void * trace[], int size, int start, Array & lines) { StringBuilder builder(512); HANDLE hProcess = GetCurrentProcess(); // Resolve PC to function names for (int i = start; i < size; i++) { // Check for end of stack walk DWORD64 ip = (DWORD64)trace[i]; if (ip == NULL) break; // Get function name #define MAX_STRING_LEN (512) unsigned char byBuffer[sizeof(IMAGEHLP_SYMBOL64) + MAX_STRING_LEN] = { 0 }; IMAGEHLP_SYMBOL64 * pSymbol = (IMAGEHLP_SYMBOL64*)byBuffer; pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64); pSymbol->MaxNameLength = MAX_STRING_LEN; DWORD64 dwDisplacement; if (SymGetSymFromAddr64(hProcess, ip, &dwDisplacement, pSymbol)) { pSymbol->Name[MAX_STRING_LEN-1] = 0; /* // Make the symbol readable for humans UnDecorateSymbolName( pSym->Name, lpszNonUnicodeUnDSymbol, BUFFERSIZE, UNDNAME_COMPLETE | UNDNAME_NO_THISTYPE | UNDNAME_NO_SPECIAL_SYMS | UNDNAME_NO_MEMBER_TYPE | UNDNAME_NO_MS_KEYWORDS | UNDNAME_NO_ACCESS_SPECIFIERS ); */ // pSymbol->Name const char * pFunc = pSymbol->Name; // Get file/line number IMAGEHLP_LINE64 theLine = { 0 }; theLine.SizeOfStruct = sizeof(theLine); DWORD dwDisplacement; if (!SymGetLineFromAddr64(hProcess, ip, &dwDisplacement, &theLine)) { // Do not print unknown symbols anymore. break; //builder.format("unknown(%08X) : %s\n", (uint32)ip, pFunc); } else { /* const char* pFile = strrchr(theLine.FileName, '\\'); if ( pFile == NULL ) pFile = theLine.FileName; else pFile++; */ const char * pFile = theLine.FileName; int line = theLine.LineNumber; builder.format("%s(%d) : %s\n", pFile, line, pFunc); } lines.append(builder.release()); if (pFunc != NULL && strcmp(pFunc, "WinMain") == 0) { break; } } } } // Write mini dump and print stack trace. static LONG WINAPI handleException(EXCEPTION_POINTERS * pExceptionInfo) { EnterCriticalSection(&s_handler_critical_section); #if NV_USE_SEPARATE_THREAD s_requesting_thread_id = GetCurrentThreadId(); s_exception_info = pExceptionInfo; // This causes the handler thread to call writeMiniDump. ReleaseSemaphore(s_handler_start_semaphore, 1, NULL); // Wait until WriteMinidumpWithException is done and collect its return value. WaitForSingleObject(s_handler_finish_semaphore, INFINITE); //bool status = s_handler_return_value; // Clean up. s_requesting_thread_id = 0; s_exception_info = NULL; #else // First of all, write mini dump. writeMiniDump(pExceptionInfo); #endif LeaveCriticalSection(&s_handler_critical_section); nvDebug("\nDump file saved.\n"); // Try to attach to debugger. if (s_interactive && debug::attachToDebugger()) { nvDebugBreak(); return EXCEPTION_CONTINUE_EXECUTION; } // If that fails, then try to pretty print a stack trace and terminate. void * trace[64]; int size = backtraceWithSymbols(pExceptionInfo->ContextRecord, trace, 64); // @@ Use win32's CreateFile? FILE * fp = fileOpen("crash.txt", "wb"); if (fp != NULL) { Array lines; writeStackTrace(trace, size, 0, lines); for (uint i = 0; i < lines.count(); i++) { fputs(lines[i], fp); delete lines[i]; } // @@ Add more info to crash.txt? fclose(fp); } // This should terminate the process and set the error exit code. TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 2); return EXCEPTION_EXECUTE_HANDLER; // Terminate app. In case terminate process did not succeed. } static void handlePureVirtualCall() { nvDebugBreak(); TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8); } static void handleInvalidParameter(const wchar_t * wexpresion, const wchar_t * wfunction, const wchar_t * wfile, unsigned int line, uintptr_t reserved) { size_t convertedCharCount = 0; StringBuilder expresion; if (wexpresion != NULL) { uint size = U32(wcslen(wexpresion) + 1); expresion.reserve(size); wcstombs_s(&convertedCharCount, expresion.str(), size, wexpresion, _TRUNCATE); } StringBuilder file; if (wfile != NULL) { uint size = U32(wcslen(wfile) + 1); file.reserve(size); wcstombs_s(&convertedCharCount, file.str(), size, wfile, _TRUNCATE); } StringBuilder function; if (wfunction != NULL) { uint size = U32(wcslen(wfunction) + 1); function.reserve(size); wcstombs_s(&convertedCharCount, function.str(), size, wfunction, _TRUNCATE); } int result = nvAbort(expresion.str(), file.str(), line, function.str()); if (result == NV_ABORT_DEBUG) { nvDebugBreak(); } } #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN #if defined(HAVE_EXECINFO_H) static bool hasStackTrace() { return true; } static void writeStackTrace(void * trace[], int size, int start, Array & lines) { StringBuilder builder(512); char ** string_array = backtrace_symbols(trace, size); for(int i = start; i < size-1; i++ ) { # if NV_CC_GNUC // defined(HAVE_CXXABI_H) // @@ Write a better parser for the possible formats. char * begin = strchr(string_array[i], '('); char * end = strrchr(string_array[i], '+'); char * module = string_array[i]; if (begin == 0 && end != 0) { *(end - 1) = '\0'; begin = strrchr(string_array[i], ' '); module = NULL; // Ignore module. } if (begin != 0 && begin < end) { int stat; *end = '\0'; *begin = '\0'; char * name = abi::__cxa_demangle(begin+1, 0, 0, &stat); if (module == NULL) { if (name == NULL || stat != 0) { builder.format(" In: '%s'\n", begin+1); } else { builder.format(" In: '%s'\n", name); } } else { if (name == NULL || stat != 0) { builder.format(" In: [%s] '%s'\n", module, begin+1); } else { builder.format(" In: [%s] '%s'\n", module, name); } } free(name); } else { builder.format(" In: '%s'\n", string_array[i]); } # else builder.format(" In: '%s'\n", string_array[i]); # endif lines.append(builder.release()); } free(string_array); } static void printStackTrace(void * trace[], int size, int start=0) { nvDebug( "\nDumping stacktrace:\n" ); Array lines; writeStackTrace(trace, size, 1, lines); for (uint i = 0; i < lines.count(); i++) { nvDebug("%s", lines[i]); delete lines[i]; } nvDebug("\n"); } #endif // defined(HAVE_EXECINFO_H) static void * callerAddress(void * secret) { #if NV_OS_DARWIN # if defined(_STRUCT_MCONTEXT) # if NV_CPU_PPC ucontext_t * ucp = (ucontext_t *)secret; return (void *) ucp->uc_mcontext->__ss.__srr0; # elif NV_CPU_X86_64 ucontext_t * ucp = (ucontext_t *)secret; return (void *) ucp->uc_mcontext->__ss.__rip; # elif NV_CPU_X86 ucontext_t * ucp = (ucontext_t *)secret; return (void *) ucp->uc_mcontext->__ss.__eip; # elif NV_CPU_ARM ucontext_t * ucp = (ucontext_t *)secret; return (void *) ucp->uc_mcontext->__ss.__pc; # else # error "Unknown CPU" # endif # else # if NV_CPU_PPC ucontext_t * ucp = (ucontext_t *)secret; return (void *) ucp->uc_mcontext->ss.srr0; # elif NV_CPU_X86 ucontext_t * ucp = (ucontext_t *)secret; return (void *) ucp->uc_mcontext->ss.eip; # else # error "Unknown CPU" # endif # endif #elif NV_OS_FREEBSD # if NV_CPU_X86_64 ucontext_t * ucp = (ucontext_t *)secret; return (void *)ucp->uc_mcontext.mc_rip; # elif NV_CPU_X86 ucontext_t * ucp = (ucontext_t *)secret; return (void *)ucp->uc_mcontext.mc_eip; # else # error "Unknown CPU" # endif #elif NV_OS_NETBSD # if NV_CPU_X86_64 ucontext_t * ucp = (ucontext_t *)secret; return (void *)ucp->uc_mcontext.__gregs[_REG_RIP]; # elif NV_CPU_X86 ucontext_t * ucp = (ucontext_t *)secret; return (void *)ucp->uc_mcontext.__gregs[_REG_EIP]; # elif NV_CPU_PPC ucontext_t * ucp = (ucontext_t *)secret; return (void *) ucp->uc_mcontext.__gregs[_REG_PC]; # else # error "Unknown CPU" # endif #elif NV_OS_OPENBSD # if NV_CPU_X86_64 ucontext_t * ucp = (ucontext_t *)secret; return (void *)ucp->sc_rip; # elif NV_CPU_X86 ucontext_t * ucp = (ucontext_t *)secret; return (void *)ucp->sc_eip; # else # error "Unknown CPU" # endif #else # if NV_CPU_X86_64 // #define REG_RIP REG_INDEX(rip) // seems to be 16 ucontext_t * ucp = (ucontext_t *)secret; return (void *)ucp->uc_mcontext.gregs[REG_RIP]; # elif NV_CPU_X86 ucontext_t * ucp = (ucontext_t *)secret; return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/]; # elif NV_CPU_PPC ucontext_t * ucp = (ucontext_t *)secret; return (void *) ucp->uc_mcontext.regs->nip; # elif NV_CPU_AARCH64 ucontext_t * ucp = (ucontext_t *)secret; return (void *) ucp->uc_mcontext.pc; # else # error "Unknown CPU" # endif #endif // How to obtain the instruction pointers in different platforms, from mlton's source code. // http://mlton.org/ // OpenBSD // ucp->sc_eip // FreeBSD: // ucp->uc_mcontext.mc_eip // HPUX: // ucp->uc_link // Solaris: // ucp->uc_mcontext.gregs[REG_PC] // Linux hppa: // uc->uc_mcontext.sc_iaoq[0] & ~0x3UL // Linux sparc: // ((struct sigcontext*) secret)->sigc_regs.tpc // Linux sparc64: // ((struct sigcontext*) secret)->si_regs.pc // potentially correct for other archs: // Linux alpha: ucp->m_context.sc_pc // Linux arm: ucp->m_context.ctx.arm_pc // Linux ia64: ucp->m_context.sc_ip & ~0x3UL // Linux mips: ucp->m_context.sc_pc // Linux s390: ucp->m_context.sregs->regs.psw.addr } static void nvSigHandler(int sig, siginfo_t *info, void *secret) { void * pnt = callerAddress(secret); // Do something useful with siginfo_t if (sig == SIGSEGV) { if (pnt != NULL) nvDebug("Got signal %d, faulty address is %p, from %p\n", sig, info->si_addr, pnt); else nvDebug("Got signal %d, faulty address is %p\n", sig, info->si_addr); } else if(sig == SIGTRAP) { nvDebug("Breakpoint hit.\n"); } else { nvDebug("Got signal %d\n", sig); } #if defined(HAVE_EXECINFO_H) if (hasStackTrace()) // in case of weak linking { void * trace[64]; int size = backtrace(trace, 64); if (pnt != NULL) { // Overwrite sigaction with caller's address. trace[1] = pnt; } printStackTrace(trace, size, 1); } #endif // defined(HAVE_EXECINFO_H) exit(0); } #endif // defined(HAVE_SIGNAL_H) #if NV_OS_WIN32 //&& NV_CC_MSVC /** Win32 assert handler. */ struct Win32AssertHandler : public AssertHandler { // Flush the message queue. This is necessary for the message box to show up. static void flushMessageQueue() { MSG msg; while( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) { //if( msg.message == WM_QUIT ) break; TranslateMessage( &msg ); DispatchMessage( &msg ); } } // Assert handler method. virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) { int ret = NV_ABORT_EXIT; StringBuilder error_string; error_string.format("*** Assertion failed: %s\n On file: %s\n On line: %d\n", exp, file, line ); if (func != NULL) { error_string.appendFormat(" On function: %s\n", func); } if (msg != NULL) { error_string.append(" Message: "); va_list tmp; va_copy(tmp, arg); error_string.appendFormatList(msg, tmp); va_end(tmp); error_string.append("\n"); } nvDebug( error_string.str() ); // Print stack trace: debug::dumpInfo(); if (debug::isDebuggerPresent()) { return NV_ABORT_DEBUG; } if (s_interactive) { flushMessageQueue(); int action = MessageBoxA(NULL, error_string.str(), "Assertion failed", MB_ABORTRETRYIGNORE | MB_ICONERROR | MB_TOPMOST); switch( action ) { case IDRETRY: ret = NV_ABORT_DEBUG; break; case IDIGNORE: ret = NV_ABORT_IGNORE; break; case IDABORT: default: ret = NV_ABORT_EXIT; break; } /*if( _CrtDbgReport( _CRT_ASSERT, file, line, module, exp ) == 1 ) { return NV_ABORT_DEBUG; }*/ } if (ret == NV_ABORT_EXIT) { // Exit cleanly. exit(EXIT_FAILURE + 1); } return ret; } }; #elif NV_OS_XBOX /** Xbox360 assert handler. */ struct Xbox360AssertHandler : public AssertHandler { // Assert handler method. virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) { int ret = NV_ABORT_EXIT; StringBuilder error_string; if( func != NULL ) { error_string.format( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); nvDebug( error_string.str() ); } else { error_string.format( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); nvDebug( error_string.str() ); } if (debug::isDebuggerPresent()) { return NV_ABORT_DEBUG; } if( ret == NV_ABORT_EXIT ) { // Exit cleanly. exit(EXIT_FAILURE + 1); } return ret; } }; #elif NV_OS_ORBIS /** Orbis assert handler. */ struct OrbisAssertHandler : public AssertHandler { // Assert handler method. virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) { if( func != NULL ) { nvDebug( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); } else { nvDebug( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); } //SBtodoORBIS print stack trace /*if (hasStackTrace()) { void * trace[64]; int size = backtrace(trace, 64); printStackTrace(trace, size, 2); }*/ if (debug::isDebuggerPresent()) return NV_ABORT_DEBUG; return NV_ABORT_IGNORE; } }; #else /** Unix assert handler. */ struct UnixAssertHandler : public AssertHandler { // Assert handler method. virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) { int ret = NV_ABORT_EXIT; if( func != NULL ) { nvDebug( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); } else { nvDebug( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); } #if _DEBUG if (debug::isDebuggerPresent()) { return NV_ABORT_DEBUG; } #endif #if defined(HAVE_EXECINFO_H) if (hasStackTrace()) { void * trace[64]; int size = backtrace(trace, 64); printStackTrace(trace, size, 2); } #endif if( ret == NV_ABORT_EXIT ) { // Exit cleanly. exit(EXIT_FAILURE + 1); } return ret; } }; #endif } // namespace /// Handle assertion through the assert handler. int nvAbort(const char * exp, const char * file, int line, const char * func/*=NULL*/, const char * msg/*= NULL*/, ...) { #if NV_OS_WIN32 //&& NV_CC_MSVC static Win32AssertHandler s_default_assert_handler; #elif NV_OS_XBOX static Xbox360AssertHandler s_default_assert_handler; #elif NV_OS_ORBIS static OrbisAssertHandler s_default_assert_handler; #else static UnixAssertHandler s_default_assert_handler; #endif va_list arg; va_start(arg,msg); AssertHandler * handler = s_assert_handler != NULL ? s_assert_handler : &s_default_assert_handler; int result = handler->assertion(exp, file, line, func, msg, arg); va_end(arg); return result; } // Abnormal termination. Create mini dump and output call stack. void debug::terminate(int code) { #if NV_OS_WIN32 EnterCriticalSection(&s_handler_critical_section); writeMiniDump(NULL); const int max_stack_size = 64; void * trace[max_stack_size]; int size = backtrace(trace, max_stack_size); // @@ Use win32's CreateFile? FILE * fp = fileOpen("crash.txt", "wb"); if (fp != NULL) { Array lines; writeStackTrace(trace, size, 0, lines); for (uint i = 0; i < lines.count(); i++) { fputs(lines[i], fp); delete lines[i]; } // @@ Add more info to crash.txt? fclose(fp); } LeaveCriticalSection(&s_handler_critical_section); #endif exit(code); } /// Shows a message through the message handler. void NV_CDECL nvDebugPrint(const char *msg, ...) { va_list arg; va_start(arg,msg); if (s_message_handler != NULL) { s_message_handler->log( msg, arg ); } va_end(arg); } /// Dump debug info. void debug::dumpInfo() { #if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)) if (hasStackTrace()) { void * trace[64]; int size = backtrace(trace, 64); nvDebug( "\nDumping stacktrace:\n" ); Array lines; writeStackTrace(trace, size, 1, lines); for (uint i = 0; i < lines.count(); i++) { nvDebug("%s", lines[i]); delete lines[i]; } } #endif } /// Dump callstack using the specified handler. void debug::dumpCallstack(MessageHandler *messageHandler, int callstackLevelsToSkip /*= 0*/) { #if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)) if (hasStackTrace()) { void * trace[64]; int size = backtrace(trace, 64); Array lines; writeStackTrace(trace, size, callstackLevelsToSkip + 1, lines); // + 1 to skip the call to dumpCallstack for (uint i = 0; i < lines.count(); i++) { messageHandler->log(lines[i], NULL); delete lines[i]; } } #endif } /// Set the debug message handler. void debug::setMessageHandler(MessageHandler * message_handler) { s_message_handler = message_handler; } /// Reset the debug message handler. void debug::resetMessageHandler() { s_message_handler = NULL; } /// Set the assert handler. void debug::setAssertHandler(AssertHandler * assert_handler) { s_assert_handler = assert_handler; } /// Reset the assert handler. void debug::resetAssertHandler() { s_assert_handler = NULL; } #if NV_OS_WIN32 #if NV_USE_SEPARATE_THREAD static void initHandlerThread() { static const int kExceptionHandlerThreadInitialStackSize = 64 * 1024; // Set synchronization primitives and the handler thread. Each // ExceptionHandler object gets its own handler thread because that's the // only way to reliably guarantee sufficient stack space in an exception, // and it allows an easy way to get a snapshot of the requesting thread's // context outside of an exception. InitializeCriticalSection(&s_handler_critical_section); s_handler_start_semaphore = CreateSemaphore(NULL, 0, 1, NULL); nvDebugCheck(s_handler_start_semaphore != NULL); s_handler_finish_semaphore = CreateSemaphore(NULL, 0, 1, NULL); nvDebugCheck(s_handler_finish_semaphore != NULL); // Don't attempt to create the thread if we could not create the semaphores. if (s_handler_finish_semaphore != NULL && s_handler_start_semaphore != NULL) { DWORD thread_id; s_handler_thread = CreateThread(NULL, // lpThreadAttributes kExceptionHandlerThreadInitialStackSize, ExceptionHandlerThreadMain, NULL, // lpParameter 0, // dwCreationFlags &thread_id); nvDebugCheck(s_handler_thread != NULL); } /* @@ We should avoid loading modules in the exception handler! dbghelp_module_ = LoadLibrary(L"dbghelp.dll"); if (dbghelp_module_) { minidump_write_dump_ = reinterpret_cast(GetProcAddress(dbghelp_module_, "MiniDumpWriteDump")); } */ } static void shutHandlerThread() { // @@ Free stuff. Terminate thread. } #endif // NV_USE_SEPARATE_THREAD #endif // NV_OS_WIN32 // Enable signal handler. void debug::enableSigHandler(bool interactive) { nvCheck(s_sig_handler_enabled != true); s_sig_handler_enabled = true; s_interactive = interactive; #if NV_OS_WIN32 && NV_CC_MSVC if (interactive) { // Do not display message boxes on error. // http://msdn.microsoft.com/en-us/library/windows/desktop/ms680621(v=vs.85).aspx SetErrorMode(SEM_FAILCRITICALERRORS|SEM_NOGPFAULTERRORBOX|SEM_NOOPENFILEERRORBOX); // CRT reports errors to debug output only. // http://msdn.microsoft.com/en-us/library/1y71x448(v=vs.80).aspx _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_DEBUG); _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_DEBUG); _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_DEBUG); } #if NV_USE_SEPARATE_THREAD initHandlerThread(); #endif s_old_exception_filter = ::SetUnhandledExceptionFilter( handleException ); #if _MSC_VER >= 1400 // MSVC 2005/8 _set_invalid_parameter_handler(handleInvalidParameter); #endif // _MSC_VER >= 1400 _set_purecall_handler(handlePureVirtualCall); // SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_FAIL_CRITICAL_ERRORS|SYMOPT_LOAD_LINES|SYMOPT_UNDNAME); if (!SymInitialize(GetCurrentProcess(), NULL, TRUE)) { DWORD error = GetLastError(); nvDebug("SymInitialize returned error : %d\n", error); } #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // Install our signal handler struct sigaction sa; sa.sa_sigaction = nvSigHandler; sigemptyset (&sa.sa_mask); sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO; sigaction(SIGSEGV, &sa, &s_old_sigsegv); sigaction(SIGTRAP, &sa, &s_old_sigtrap); sigaction(SIGFPE, &sa, &s_old_sigfpe); sigaction(SIGBUS, &sa, &s_old_sigbus); #endif } /// Disable signal handler. void debug::disableSigHandler() { nvCheck(s_sig_handler_enabled == true); s_sig_handler_enabled = false; #if NV_OS_WIN32 && NV_CC_MSVC ::SetUnhandledExceptionFilter( s_old_exception_filter ); s_old_exception_filter = NULL; SymCleanup(GetCurrentProcess()); #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) sigaction(SIGSEGV, &s_old_sigsegv, NULL); sigaction(SIGTRAP, &s_old_sigtrap, NULL); sigaction(SIGFPE, &s_old_sigfpe, NULL); sigaction(SIGBUS, &s_old_sigbus, NULL); #endif } bool debug::isDebuggerPresent() { #if NV_OS_WIN32 HINSTANCE kernel32 = GetModuleHandleA("kernel32.dll"); if (kernel32) { FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent"); if (IsDebuggerPresent != NULL && IsDebuggerPresent()) { return true; } } return false; #elif NV_OS_XBOX #ifdef _DEBUG return DmIsDebuggerPresent() == TRUE; #else return false; #endif #elif NV_OS_ORBIS #if PS4_FINAL_REQUIREMENTS return false; #else return sceDbgIsDebuggerAttached() == 1; #endif #elif NV_OS_DARWIN int mib[4]; struct kinfo_proc info; size_t size; mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_PID; mib[3] = getpid(); size = sizeof(info); info.kp_proc.p_flag = 0; sysctl(mib,4,&info,&size,NULL,0); return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED); #else // if ppid != sid, some process spawned our app, probably a debugger. return getsid(getpid()) != getppid(); #endif } bool debug::attachToDebugger() { #if NV_OS_WIN32 if (isDebuggerPresent() == FALSE) { Path process(1024); process.copy("\""); GetSystemDirectoryA(process.str() + 1, 1024 - 1); process.appendSeparator(); process.appendFormat("VSJitDebugger.exe\" -p %lu", ::GetCurrentProcessId()); STARTUPINFOA sSi; memset(&sSi, 0, sizeof(sSi)); PROCESS_INFORMATION sPi; memset(&sPi, 0, sizeof(sPi)); BOOL b = CreateProcessA(NULL, process.str(), NULL, NULL, FALSE, 0, NULL, NULL, &sSi, &sPi); if (b != FALSE) { ::WaitForSingleObject(sPi.hProcess, INFINITE); DWORD dwExitCode; ::GetExitCodeProcess(sPi.hProcess, &dwExitCode); if (dwExitCode != 0) //if exit code is zero, a debugger was selected b = FALSE; } if (sPi.hThread != NULL) ::CloseHandle(sPi.hThread); if (sPi.hProcess != NULL) ::CloseHandle(sPi.hProcess); if (b == FALSE) return false; for (int i = 0; i < 5*60; i++) { if (isDebuggerPresent()) break; ::Sleep(200); } } #endif // NV_OS_WIN32 return true; } Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h (revision 23380) @@ -1,217 +1,217 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_CORE_DEBUG_H #define NV_CORE_DEBUG_H #include "nvcore.h" #include // va_list // Make sure we are using our assert. #undef assert #define NV_ABORT_DEBUG 1 #define NV_ABORT_IGNORE 2 #define NV_ABORT_EXIT 3 #define nvNoAssert(exp) \ NV_MULTI_LINE_MACRO_BEGIN \ (void)sizeof(exp); \ NV_MULTI_LINE_MACRO_END #if NV_NO_ASSERT # define nvAssert(exp) nvNoAssert(exp) # define nvCheck(exp) nvNoAssert(exp) # define nvDebugAssert(exp) nvNoAssert(exp) # define nvDebugCheck(exp) nvNoAssert(exp) # define nvDebugBreak() nvNoAssert(0) #else // NV_NO_ASSERT # if NV_CC_MSVC // @@ Does this work in msvc-6 and earlier? # define nvDebugBreak() __debugbreak() //# define nvDebugBreak() __asm { int 3 } # elif NV_OS_ORBIS # define nvDebugBreak() __debugbreak() # elif NV_CC_GNUC # define nvDebugBreak() __builtin_trap() # else # error "No nvDebugBreak()!" # endif /* # elif NV_CC_GNUC || NV_CPU_PPC && NV_OS_DARWIN // @@ Use __builtin_trap() on GCC # define nvDebugBreak() __asm__ volatile ("trap") # elif (NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64) && NV_OS_DARWIN # define nvDebugBreak() __asm__ volatile ("int3") # elif NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64 # define nvDebugBreak() __asm__ ( "int %0" : :"I"(3) ) # else # include # define nvDebugBreak() raise(SIGTRAP) # endif */ #define nvDebugBreakOnce() \ NV_MULTI_LINE_MACRO_BEGIN \ static bool firstTime = true; \ if (firstTime) { firstTime = false; nvDebugBreak(); } \ NV_MULTI_LINE_MACRO_END #define nvAssertMacro(exp) \ NV_MULTI_LINE_MACRO_BEGIN \ if (!(exp)) { \ if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \ nvDebugBreak(); \ } \ } \ NV_MULTI_LINE_MACRO_END // GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care #define nvAssertMacroWithIgnoreAll(exp,...) \ NV_MULTI_LINE_MACRO_BEGIN \ static bool ignoreAll = false; \ if (!ignoreAll && !(exp)) { \ int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \ if (result == NV_ABORT_DEBUG) { \ nvDebugBreak(); \ } else if (result == NV_ABORT_IGNORE) { \ ignoreAll = true; \ } \ } \ NV_MULTI_LINE_MACRO_END // Interesting assert macro from Insomniac: // http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to // Used as follows: // if (nvCheck(i < count)) { // normal path // } else { // fixup code. // } // This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely. #define nvCheckMacro(exp) \ (\ (exp) ? true : ( \ (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \ ) \ ) #define nvAssert(exp) nvAssertMacro(exp) #define nvCheck(exp) nvAssertMacro(exp) #if defined(_DEBUG) # define nvDebugAssert(exp) nvAssertMacro(exp) # define nvDebugCheck(exp) nvAssertMacro(exp) #else // _DEBUG # define nvDebugAssert(exp) nvNoAssert(exp) # define nvDebugCheck(exp) nvNoAssert(exp) #endif // _DEBUG #endif // NV_NO_ASSERT // Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc. /*#if !defined(_DEBUG) # if NV_CC_MSVC # define nvAssume(exp) __assume(exp) # else # define nvAssume(exp) nvCheck(exp) # endif #else # define nvAssume(exp) nvCheck(exp) #endif*/ #if defined(_DEBUG) # if NV_CC_MSVC # define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0) # else # define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable() # endif #else # if NV_CC_MSVC # define nvUnreachable() __assume(0) # else # define nvUnreachable() __builtin_unreachable() # endif #endif #define nvError(x) nvAbort(x, __FILE__, __LINE__, __FUNC__) #define nvWarning(x) nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x)) #ifndef NV_DEBUG_PRINT #define NV_DEBUG_PRINT 1 //defined(_DEBUG) #endif #if NV_DEBUG_PRINT #define nvDebug(...) nvDebugPrint(__VA_ARGS__) #else #if NV_CC_MSVC #define nvDebug(...) __noop(__VA_ARGS__) #else #define nvDebug(...) ((void)0) // Non-msvc platforms do not evaluate arguments? #endif #endif NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6))); NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2))); namespace nv { inline bool isValidPtr(const void * ptr) { #if NV_CPU_X86_64 || POSH_CPU_PPC64 if (ptr == NULL) return true; if (reinterpret_cast(ptr) < 0x10000ULL) return false; if (reinterpret_cast(ptr) >= 0x000007FFFFFEFFFFULL) return false; #else if (reinterpret_cast(ptr) == 0xcccccccc) return false; if (reinterpret_cast(ptr) == 0xcdcdcdcd) return false; if (reinterpret_cast(ptr) == 0xdddddddd) return false; if (reinterpret_cast(ptr) == 0xffffffff) return false; #endif return true; } // Message handler interface. struct MessageHandler { virtual void log(const char * str, va_list arg) = 0; virtual ~MessageHandler() {} }; // Assert handler interface. struct AssertHandler { virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0; virtual ~AssertHandler() {} }; namespace debug { NVCORE_API void dumpInfo(); NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 ); NVCORE_API void setMessageHandler( MessageHandler * messageHandler ); NVCORE_API void resetMessageHandler(); NVCORE_API void setAssertHandler( AssertHandler * assertHanlder ); NVCORE_API void resetAssertHandler(); NVCORE_API void enableSigHandler(bool interactive); NVCORE_API void disableSigHandler(); NVCORE_API bool isDebuggerPresent(); NVCORE_API bool attachToDebugger(); NVCORE_API void terminate(int code); } } // nv namespace #endif // NV_CORE_DEBUG_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/ForEach.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/ForEach.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/ForEach.h (revision 23380) @@ -1,68 +1,68 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_CORE_FOREACH_H #define NV_CORE_FOREACH_H /* These foreach macros are very non-standard and somewhat confusing, but I like them. */ #include "nvcore.h" #if NV_CC_GNUC // If typeof or decltype is available: #if !NV_CC_CPP11 # define NV_DECLTYPE typeof // Using a non-standard extension over typeof that behaves as C++11 decltype #else # define NV_DECLTYPE decltype #endif /* Ideally we would like to write this: #define NV_FOREACH(i, container) \ for(NV_DECLTYPE(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i)) But gcc versions prior to 4.7 required an intermediate type. See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709 */ #define NV_FOREACH(i, container) \ typedef NV_DECLTYPE(container) NV_STRING_JOIN2(cont,__LINE__); \ for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i)) #else // If typeof not available: #include // placement new struct PseudoIndexWrapper { template PseudoIndexWrapper(const T & container) { nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory)); new (memory) typename T::PseudoIndex(container.start()); } // PseudoIndex cannot have a dtor! template typename T::PseudoIndex & operator()(const T * /*container*/) { return *reinterpret_cast(memory); } template const typename T::PseudoIndex & operator()(const T * /*container*/) const { return *reinterpret_cast(memory); } uint8 memory[4]; // Increase the size if we have bigger enumerators. }; #define NV_FOREACH(i, container) \ for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container)))) #endif // Declare foreach keyword. #if !defined NV_NO_USE_KEYWORDS # define foreach NV_FOREACH # define foreach_index NV_FOREACH #endif #endif // NV_CORE_FOREACH_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Hash.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Hash.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Hash.h (revision 23380) @@ -1,83 +1,83 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_CORE_HASH_H #define NV_CORE_HASH_H #include "nvcore.h" namespace nv { inline uint sdbmHash(const void * data_in, uint size, uint h = 5381) { const uint8 * data = (const uint8 *) data_in; uint i = 0; while (i < size) { h = (h << 16) + (h << 6) - h + (uint) data[i++]; } return h; } // Note that this hash does not handle NaN properly. inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381) { for (uint i = 0; i < count; i++) { //nvDebugCheck(nv::isFinite(*f)); union { float f; uint32 i; } x = { f[i] }; if (x.i == 0x80000000) x.i = 0; h = sdbmHash(&x, 4, h); } return h; } template inline uint hash(const T & t, uint h = 5381) { return sdbmHash(&t, sizeof(T), h); } template <> inline uint hash(const float & f, uint h) { return sdbmFloatHash(&f, 1, h); } // Functors for hash table: template struct Hash { uint operator()(const Key & k) const { return hash(k); } }; template struct Equal { bool operator()(const Key & k0, const Key & k1) const { return k0 == k1; } }; // @@ Move to Utils.h? template struct Pair { T1 first; T2 second; }; template bool operator==(const Pair & p0, const Pair & p1) { return p0.first == p1.first && p0.second == p1.second; } template uint hash(const Pair & p, uint h = 5381) { return hash(p.second, hash(p.first)); } } // nv namespace #endif // NV_CORE_HASH_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.cpp (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.cpp (revision 23380) @@ -1,119 +1,119 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #include "Memory.h" #include "Debug.h" #include #define USE_EFENCE 0 #if USE_EFENCE extern "C" void *EF_malloc(size_t size); extern "C" void *EF_realloc(void * oldBuffer, size_t newSize); extern "C" void EF_free(void * address); #endif using namespace nv; #if NV_OVERRIDE_ALLOC void * malloc(size_t size) { #if USE_EFENCE return EF_malloc(size); #else return ::malloc(size); #endif } void * debug_malloc(size_t size, const char * file, int line) { NV_UNUSED(file); NV_UNUSED(line); #if USE_EFENCE return EF_malloc(size); #else return ::malloc(size); #endif } void free(void * ptr) { #if USE_EFENCE return EF_free(const_cast(ptr)); #else ::free(const_cast(ptr)); #endif } void * realloc(void * ptr, size_t size) { nvDebugCheck(ptr != NULL || size != 0); // undefined realloc behavior. #if USE_EFENCE return EF_realloc(ptr, size); #else return ::realloc(ptr, size); #endif } /* No need to override this unless we want line info. void * operator new (size_t size) throw() { return malloc(size); } void operator delete (void *p) throw() { free(p); } void * operator new [] (size_t size) throw() { return malloc(size); } void operator delete [] (void * p) throw() { free(p); } */ #if 0 // Code from Apple: void* operator new(std::size_t sz) throw (std::bad_alloc) { void *result = std::malloc (sz == 0 ? 1 : sz); if (result == NULL) throw std::bad_alloc(); gNewCounter++; return result; } void operator delete(void* p) throw() { if (p == NULL) return; std::free (p); gDeleteCounter++; } /* These are the 'nothrow' versions of the above operators. The system version will try to call a std::new_handler if they fail, but your overriding versions are not required to do this. */ void* operator new(std::size_t sz, const std::nothrow_t&) throw() { try { void * result = ::operator new (sz); // calls our overridden operator new return result; } catch (std::bad_alloc &) { return NULL; } } void operator delete(void* p, const std::nothrow_t&) throw() { ::operator delete (p); } #endif // 0 #endif // NV_OVERRIDE_ALLOC Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Ptr.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Ptr.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Ptr.h (revision 23380) @@ -1,322 +1,322 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #ifndef NV_CORE_PTR_H #define NV_CORE_PTR_H #include "nvcore.h" #include "Debug.h" #include "RefCounted.h" namespace nv { class WeakProxy; /** Simple auto pointer template class. * * This is very similar to the standard auto_ptr class, but with some * additional limitations to make its use less error prone: * - Copy constructor and assignment operator are disabled. * - reset method is removed. * * The semantics of the standard auto_ptr are not clear and change depending * on the std implementation. For a discussion of the problems of auto_ptr read: * http://www.awprofessional.com/content/images/020163371X/autoptrupdate\auto_ptr_update.html */ template class AutoPtr { NV_FORBID_COPY(AutoPtr); NV_FORBID_HEAPALLOC(); public: /// Ctor. AutoPtr(T * p = NULL) : m_ptr(p) { } template AutoPtr(Q * p) : m_ptr(static_cast(p)) { } /// Dtor. Deletes owned pointer. ~AutoPtr() { delete m_ptr; m_ptr = NULL; } /// Delete owned pointer and assign new one. void operator=( T * p ) { if (p != m_ptr) { delete m_ptr; m_ptr = p; } } template void operator=( Q * p ) { if (p != m_ptr) { delete m_ptr; m_ptr = static_cast(p); } } /// Member access. T * operator -> () const { nvDebugCheck(m_ptr != NULL); return m_ptr; } /// Get reference. T & operator*() const { nvDebugCheck(m_ptr != NULL); return *m_ptr; } /// Get pointer. T * ptr() const { return m_ptr; } /// Relinquish ownership of the underlying pointer and returns that pointer. T * release() { T * tmp = m_ptr; m_ptr = NULL; return tmp; } /// Const pointer equal comparation. friend bool operator == (const AutoPtr & ap, const T * const p) { return (ap.ptr() == p); } /// Const pointer nequal comparation. friend bool operator != (const AutoPtr & ap, const T * const p) { return (ap.ptr() != p); } /// Const pointer equal comparation. friend bool operator == (const T * const p, const AutoPtr & ap) { return (ap.ptr() == p); } /// Const pointer nequal comparation. friend bool operator != (const T * const p, const AutoPtr & ap) { return (ap.ptr() != p); } private: T * m_ptr; }; /// Smart pointer template class. template class SmartPtr { public: // BaseClass must implement addRef() and release(). typedef SmartPtr ThisType; /// Default ctor. SmartPtr() : m_ptr(NULL) { } /// Other type assignment. template SmartPtr( const SmartPtr & tc ) { m_ptr = static_cast( tc.ptr() ); if (m_ptr) { m_ptr->addRef(); } } /// Copy ctor. SmartPtr( const ThisType & bc ) { m_ptr = bc.ptr(); if (m_ptr) { m_ptr->addRef(); } } /// Copy cast ctor. SmartPtr(NULL) is valid. explicit SmartPtr( BaseClass * bc ) { m_ptr = bc; if (m_ptr) { m_ptr->addRef(); } } /// Dtor. ~SmartPtr() { set(NULL); } /// -> operator. BaseClass * operator -> () const { nvCheck( m_ptr != NULL ); return m_ptr; } /// * operator. BaseClass & operator*() const { nvCheck( m_ptr != NULL ); return *m_ptr; } /// Get pointer. BaseClass * ptr() const { return m_ptr; } /// Other type assignment. template void operator = ( const SmartPtr & tc ) { set( static_cast(tc.ptr()) ); } /// This type assignment. void operator = ( const ThisType & bc ) { set( bc.ptr() ); } /// Pointer assignment. void operator = ( BaseClass * bc ) { set( bc ); } /// Other type equal comparation. template bool operator == ( const SmartPtr & other ) const { return m_ptr == other.ptr(); } /// This type equal comparation. bool operator == ( const ThisType & bc ) const { return m_ptr == bc.ptr(); } /// Const pointer equal comparation. bool operator == ( const BaseClass * const bc ) const { return m_ptr == bc; } /// Other type not equal comparation. template bool operator != ( const SmartPtr & other ) const { return m_ptr != other.ptr(); } /// Other type not equal comparation. bool operator != ( const ThisType & bc ) const { return m_ptr != bc.ptr(); } /// Const pointer not equal comparation. bool operator != (const BaseClass * const bc) const { return m_ptr != bc; } /// This type lower than comparation. bool operator < (const ThisType & p) const { return m_ptr < p.ptr(); } bool isValid() const { return isValidPtr(m_ptr); } private: // Set this pointer. void set( BaseClass * p ) { if (p) p->addRef(); if (m_ptr) m_ptr->release(); m_ptr = p; } private: BaseClass * m_ptr; }; /// Smart pointer template class. template class WeakPtr { public: WeakPtr() {} WeakPtr(T * p) { operator=(p); } WeakPtr(const SmartPtr & p) { operator=(p.ptr()); } // Default constructor and assignment from weak_ptr are OK. void operator=(T * p) { if (p) { m_proxy = p->getWeakProxy(); nvDebugCheck(m_proxy != NULL); nvDebugCheck(m_proxy->ptr() == p); } else { m_proxy = NULL; } } void operator=(const SmartPtr & ptr) { operator=(ptr.ptr()); } bool operator==(const SmartPtr & p) const { return ptr() == p.ptr(); } bool operator!=(const SmartPtr & p) const { return ptr() != p.ptr(); } bool operator==(const WeakPtr & p) const { return ptr() == p.ptr(); } bool operator!=(const WeakPtr & p) const { return ptr() != p.ptr(); } bool operator==(T * p) const { return ptr() == p; } bool operator!=(T * p) const { return ptr() != p; } T * operator->() const { T * p = ptr(); nvDebugCheck(p != NULL); return p; } T * ptr() const { if (m_proxy != NULL) { return static_cast(m_proxy->ptr()); } return NULL; } private: mutable SmartPtr m_proxy; }; } // nv namespace #endif // NV_CORE_PTR_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/RefCounted.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/RefCounted.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/RefCounted.h (revision 23380) @@ -1,149 +1,149 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #ifndef NV_CORE_REFCOUNTED_H #define NV_CORE_REFCOUNTED_H #include "nvcore.h" #include "Debug.h" #define NV_DECLARE_PTR(Class) \ template class SmartPtr; \ typedef SmartPtr Class ## Ptr; \ typedef SmartPtr Class ## ConstPtr namespace nv { /// Weak proxy. class WeakProxy { NV_FORBID_COPY(WeakProxy); public: /// Ctor. WeakProxy(void * ptr) : m_count(0), m_ptr(ptr) { } /// Dtor. ~WeakProxy() { nvCheck( m_count == 0 ); } /// Increase reference count. uint addRef() const { m_count++; return m_count; } /// Decrease reference count and remove when 0. uint release() const { nvCheck( m_count > 0 ); m_count--; if( m_count == 0 ) { delete this; return 0; } return m_count; } /// WeakPtr's call this to determine if their pointer is valid or not. bool isAlive() const { return m_ptr != NULL; } /// Only the actual object should call this. void notifyObjectDied() { m_ptr = NULL; } /// Return proxy pointer. void * ptr() const { return m_ptr; } private: mutable int m_count; void * m_ptr; }; /// Reference counted base class to be used with SmartPtr and WeakPtr. class RefCounted { NV_FORBID_COPY(RefCounted); public: /// Ctor. RefCounted() : m_count(0), m_weak_proxy(NULL) { } /// Virtual dtor. virtual ~RefCounted() { nvCheck( m_count == 0 ); releaseWeakProxy(); } /// Increase reference count. uint addRef() const { m_count++; return m_count; } /// Decrease reference count and remove when 0. uint release() const { nvCheck( m_count > 0 ); m_count--; if( m_count == 0 ) { delete this; return 0; } return m_count; } /// Get weak proxy. WeakProxy * getWeakProxy() const { if (m_weak_proxy == NULL) { m_weak_proxy = new WeakProxy((void *)this); m_weak_proxy->addRef(); } return m_weak_proxy; } /// Release the weak proxy. void releaseWeakProxy() const { if (m_weak_proxy != NULL) { m_weak_proxy->notifyObjectDied(); m_weak_proxy->release(); m_weak_proxy = NULL; } } /// Get reference count. int refCount() const { return m_count; } private: mutable int m_count; mutable WeakProxy * m_weak_proxy; }; } // nv namespace #endif // NV_CORE_REFCOUNTED_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/StdStream.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/StdStream.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/StdStream.h (revision 23380) @@ -1,463 +1,463 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño //#pragma once //#ifndef NV_CORE_STDSTREAM_H //#define NV_CORE_STDSTREAM_H #include "nvcore.h" #include "Stream.h" #include "Array.h" #include // fopen #include // memcpy namespace nv { // Portable version of fopen. inline FILE * fileOpen(const char * fileName, const char * mode) { nvCheck(fileName != NULL); #if NV_CC_MSVC && _MSC_VER >= 1400 FILE * fp; if (fopen_s(&fp, fileName, mode) == 0) { return fp; } return NULL; #else return fopen(fileName, mode); #endif } /// Base stdio stream. class NVCORE_CLASS StdStream : public Stream { NV_FORBID_COPY(StdStream); public: /// Ctor. StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { } /// Dtor. virtual ~StdStream() { if( m_fp != NULL && m_autoclose ) { #if NV_OS_WIN32 _fclose_nolock( m_fp ); #else fclose( m_fp ); #endif } } /** @name Stream implementation. */ //@{ virtual void seek( uint pos ) { nvDebugCheck(m_fp != NULL); nvDebugCheck(pos <= size()); #if NV_OS_WIN32 _fseek_nolock(m_fp, pos, SEEK_SET); #else fseek(m_fp, pos, SEEK_SET); #endif } virtual uint tell() const { nvDebugCheck(m_fp != NULL); #if NV_OS_WIN32 return _ftell_nolock(m_fp); #else return (uint)ftell(m_fp); #endif } virtual uint size() const { nvDebugCheck(m_fp != NULL); #if NV_OS_WIN32 uint pos = _ftell_nolock(m_fp); _fseek_nolock(m_fp, 0, SEEK_END); uint end = _ftell_nolock(m_fp); _fseek_nolock(m_fp, pos, SEEK_SET); #else uint pos = (uint)ftell(m_fp); fseek(m_fp, 0, SEEK_END); uint end = (uint)ftell(m_fp); fseek(m_fp, pos, SEEK_SET); #endif return end; } virtual bool isError() const { return m_fp == NULL || ferror( m_fp ) != 0; } virtual void clearError() { nvDebugCheck(m_fp != NULL); clearerr(m_fp); } // @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream. // That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better // implementation uses use ftell and fseek to determine our location within the file. virtual bool isAtEnd() const { if (m_fp == NULL) return true; //nvDebugCheck(m_fp != NULL); //return feof( m_fp ) != 0; #if NV_OS_WIN32 uint pos = _ftell_nolock(m_fp); _fseek_nolock(m_fp, 0, SEEK_END); uint end = _ftell_nolock(m_fp); _fseek_nolock(m_fp, pos, SEEK_SET); #else uint pos = (uint)ftell(m_fp); fseek(m_fp, 0, SEEK_END); uint end = (uint)ftell(m_fp); fseek(m_fp, pos, SEEK_SET); #endif return pos == end; } /// Always true. virtual bool isSeekable() const { return true; } //@} protected: FILE * m_fp; bool m_autoclose; }; /// Standard output stream. class NVCORE_CLASS StdOutputStream : public StdStream { NV_FORBID_COPY(StdOutputStream); public: /// Construct stream by file name. StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { } /// Construct stream by file handle. StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose) { } /** @name Stream implementation. */ //@{ /// Write data. virtual uint serialize( void * data, uint len ) { nvDebugCheck(data != NULL); nvDebugCheck(m_fp != NULL); #if NV_OS_WIN32 return (uint)_fwrite_nolock(data, 1, len, m_fp); #elif NV_OS_LINUX return (uint)fwrite_unlocked(data, 1, len, m_fp); #elif NV_OS_DARWIN // @@ No error checking, always returns len. for (uint i = 0; i < len; i++) { putc_unlocked(((char *)data)[i], m_fp); } return len; #else return (uint)fwrite(data, 1, len, m_fp); #endif } virtual bool isLoading() const { return false; } virtual bool isSaving() const { return true; } //@} }; /// Standard input stream. class NVCORE_CLASS StdInputStream : public StdStream { NV_FORBID_COPY(StdInputStream); public: /// Construct stream by file name. StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { } /// Construct stream by file handle. StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose) { } /** @name Stream implementation. */ //@{ /// Read data. virtual uint serialize( void * data, uint len ) { nvDebugCheck(data != NULL); nvDebugCheck(m_fp != NULL); #if NV_OS_WIN32 return (uint)_fread_nolock(data, 1, len, m_fp); #elif NV_OS_LINUX return (uint)fread_unlocked(data, 1, len, m_fp); #elif NV_OS_DARWIN // @@ No error checking, always returns len. for (uint i = 0; i < len; i++) { ((char *)data)[i] = getc_unlocked(m_fp); } return len; #else return (uint)fread(data, 1, len, m_fp); #endif } virtual bool isLoading() const { return true; } virtual bool isSaving() const { return false; } //@} }; /// Memory input stream. class NVCORE_CLASS MemoryInputStream : public Stream { NV_FORBID_COPY(MemoryInputStream); public: /// Ctor. MemoryInputStream( const uint8 * mem, uint size ) : m_mem(mem), m_ptr(mem), m_size(size) { } /** @name Stream implementation. */ //@{ /// Read data. virtual uint serialize( void * data, uint len ) { nvDebugCheck(data != NULL); nvDebugCheck(!isError()); uint left = m_size - tell(); if (len > left) len = left; memcpy( data, m_ptr, len ); m_ptr += len; return len; } virtual void seek( uint pos ) { nvDebugCheck(!isError()); m_ptr = m_mem + pos; nvDebugCheck(!isError()); } virtual uint tell() const { nvDebugCheck(m_ptr >= m_mem); return uint(m_ptr - m_mem); } virtual uint size() const { return m_size; } virtual bool isError() const { return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem; } virtual void clearError() { // Nothing to do. } virtual bool isAtEnd() const { return m_ptr == m_mem + m_size; } /// Always true. virtual bool isSeekable() const { return true; } virtual bool isLoading() const { return true; } virtual bool isSaving() const { return false; } //@} const uint8 * ptr() const { return m_ptr; } private: const uint8 * m_mem; const uint8 * m_ptr; uint m_size; }; /// Buffer output stream. class NVCORE_CLASS BufferOutputStream : public Stream { NV_FORBID_COPY(BufferOutputStream); public: BufferOutputStream(Array & buffer) : m_buffer(buffer) { } virtual uint serialize( void * data, uint len ) { nvDebugCheck(data != NULL); m_buffer.append((uint8 *)data, len); return len; } virtual void seek( uint /*pos*/ ) { /*Not implemented*/ } virtual uint tell() const { return m_buffer.size(); } virtual uint size() const { return m_buffer.size(); } virtual bool isError() const { return false; } virtual void clearError() {} virtual bool isAtEnd() const { return true; } virtual bool isSeekable() const { return false; } virtual bool isLoading() const { return false; } virtual bool isSaving() const { return true; } private: Array & m_buffer; }; /// Protected input stream. class NVCORE_CLASS ProtectedStream : public Stream { NV_FORBID_COPY(ProtectedStream); public: /// Ctor. ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false) { } /// Ctor. ProtectedStream( Stream * s, bool autodelete = true ) : m_s(s), m_autodelete(autodelete) { nvDebugCheck(m_s != NULL); } /// Dtor. virtual ~ProtectedStream() { if( m_autodelete ) { delete m_s; } } /** @name Stream implementation. */ //@{ /// Read data. virtual uint serialize( void * data, uint len ) { nvDebugCheck(data != NULL); len = m_s->serialize( data, len ); if( m_s->isError() ) { throw; } return len; } virtual void seek( uint pos ) { m_s->seek( pos ); if( m_s->isError() ) { throw; } } virtual uint tell() const { return m_s->tell(); } virtual uint size() const { return m_s->size(); } virtual bool isError() const { return m_s->isError(); } virtual void clearError() { m_s->clearError(); } virtual bool isAtEnd() const { return m_s->isAtEnd(); } virtual bool isSeekable() const { return m_s->isSeekable(); } virtual bool isLoading() const { return m_s->isLoading(); } virtual bool isSaving() const { return m_s->isSaving(); } //@} private: Stream * const m_s; bool const m_autodelete; }; } // nv namespace //#endif // NV_CORE_STDSTREAM_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.cpp (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.cpp (revision 23380) @@ -1,772 +1,772 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #include "StrLib.h" #include "Memory.h" #include "Utils.h" // swap #include // log #include // vsnprintf #include // strlen, strcmp, etc. #if NV_CC_MSVC #include // vsnprintf #endif using namespace nv; namespace { static char * strAlloc(uint size) { return malloc(size); } static char * strReAlloc(char * str, uint size) { return realloc(str, size); } static void strFree(const char * str) { return free(str); } /*static char * strDup( const char * str ) { nvDebugCheck( str != NULL ); uint len = uint(strlen( str ) + 1); char * dup = strAlloc( len ); memcpy( dup, str, len ); return dup; }*/ // helper function for integer to string conversion. static char * i2a( uint i, char *a, uint r ) { if( i / r > 0 ) { a = i2a( i / r, a, r ); } *a = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % r]; return a + 1; } // Locale independent functions. static inline char toUpper( char c ) { return (c<'a' || c>'z') ? (c) : (c+'A'-'a'); } static inline char toLower( char c ) { return (c<'A' || c>'Z') ? (c) : (c+'a'-'A'); } static inline bool isAlpha( char c ) { return (c>='a' && c<='z') || (c>='A' && c<='Z'); } static inline bool isDigit( char c ) { return c>='0' && c<='9'; } static inline bool isAlnum( char c ) { return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9'); } } uint nv::strLen(const char * str) { nvDebugCheck(str != NULL); return U32(strlen(str)); } int nv::strDiff(const char * s1, const char * s2) { nvDebugCheck(s1 != NULL); nvDebugCheck(s2 != NULL); return strcmp(s1, s2); } int nv::strCaseDiff(const char * s1, const char * s2) { nvDebugCheck(s1 != NULL); nvDebugCheck(s1 != NULL); #if NV_CC_MSVC return _stricmp(s1, s2); #else return strcasecmp(s1, s2); #endif } bool nv::strEqual(const char * s1, const char * s2) { if (s1 == s2) return true; if (s1 == NULL || s2 == NULL) return false; return strcmp(s1, s2) == 0; } bool nv::strCaseEqual(const char * s1, const char * s2) { if (s1 == s2) return true; if (s1 == NULL || s2 == NULL) return false; return strCaseDiff(s1, s2) == 0; } bool nv::strBeginsWith(const char * str, const char * prefix) { //return strstr(str, prefix) == dst; return strncmp(str, prefix, strlen(prefix)) == 0; } bool nv::strEndsWith(const char * str, const char * suffix) { uint ml = strLen(str); uint sl = strLen(suffix); if (ml < sl) return false; return strncmp(str + ml - sl, suffix, sl) == 0; } // @@ Add asserts to detect overlap between dst and src? void nv::strCpy(char * dst, uint size, const char * src) { nvDebugCheck(dst != NULL); nvDebugCheck(src != NULL); #if NV_CC_MSVC && _MSC_VER >= 1400 strcpy_s(dst, size, src); #else NV_UNUSED(size); strcpy(dst, src); #endif } void nv::strCpy(char * dst, uint size, const char * src, uint len) { nvDebugCheck(dst != NULL); nvDebugCheck(src != NULL); #if NV_CC_MSVC && _MSC_VER >= 1400 strncpy_s(dst, size, src, len); #else int n = min(len+1, size); strncpy(dst, src, n); dst[n-1] = '\0'; #endif } void nv::strCat(char * dst, uint size, const char * src) { nvDebugCheck(dst != NULL); nvDebugCheck(src != NULL); #if NV_CC_MSVC && _MSC_VER >= 1400 strcat_s(dst, size, src); #else NV_UNUSED(size); strcat(dst, src); #endif } NVCORE_API const char * nv::strSkipWhiteSpace(const char * str) { nvDebugCheck(str != NULL); while (*str == ' ') str++; return str; } NVCORE_API char * nv::strSkipWhiteSpace(char * str) { nvDebugCheck(str != NULL); while (*str == ' ') str++; return str; } /** Pattern matching routine. I don't remember where did I get this. */ bool nv::strMatch(const char * str, const char * pat) { nvDebugCheck(str != NULL); nvDebugCheck(pat != NULL); char c2; while (true) { if (*pat==0) { if (*str==0) return true; else return false; } if ((*str==0) && (*pat!='*')) return false; if (*pat=='*') { pat++; if (*pat==0) return true; while (true) { if (strMatch(str, pat)) return true; if (*str==0) return false; str++; } } if (*pat=='?') goto match; if (*pat=='[') { pat++; while (true) { if ((*pat==']') || (*pat==0)) return false; if (*pat==*str) break; if (pat[1] == '-') { c2 = pat[2]; if (c2==0) return false; if ((*pat<=*str) && (c2>=*str)) break; if ((*pat>=*str) && (c2<=*str)) break; pat+=2; } pat++; } while (*pat!=']') { if (*pat==0) { pat--; break; } pat++; } goto match; } if (*pat == NV_PATH_SEPARATOR) { pat++; if (*pat==0) return false; } if (*pat!=*str) return false; match: pat++; str++; } } bool nv::isNumber(const char * str) { while(*str != '\0') { if (!isDigit(*str)) return false; str++; } return true; } /** Empty string. */ StringBuilder::StringBuilder() : m_size(0), m_str(NULL) { } /** Preallocate space. */ StringBuilder::StringBuilder( uint size_hint ) : m_size(size_hint) { nvDebugCheck(m_size > 0); m_str = strAlloc(m_size); *m_str = '\0'; } /** Copy ctor. */ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL) { copy(s); } /** Copy string. */ StringBuilder::StringBuilder(const char * s) : m_size(0), m_str(NULL) { if (s != NULL) { copy(s); } } /** Copy string. */ StringBuilder::StringBuilder(const char * s, uint len) : m_size(0), m_str(NULL) { copy(s, len); } /** Delete the string. */ StringBuilder::~StringBuilder() { strFree(m_str); } /** Format a string safely. */ StringBuilder & StringBuilder::format( const char * fmt, ... ) { nvDebugCheck(fmt != NULL); va_list arg; va_start( arg, fmt ); formatList( fmt, arg ); va_end( arg ); return *this; } /** Format a string safely. */ StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg ) { nvDebugCheck(fmt != NULL); if (m_size == 0) { m_size = 64; m_str = strAlloc( m_size ); } va_list tmp; va_copy(tmp, arg); #if NV_CC_MSVC && _MSC_VER >= 1400 int n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp); #else int n = vsnprintf(m_str, m_size, fmt, tmp); #endif va_end(tmp); while( n < 0 || n >= int(m_size) ) { if( n > -1 ) { m_size = n + 1; } else { m_size *= 2; } m_str = strReAlloc(m_str, m_size); va_copy(tmp, arg); #if NV_CC_MSVC && _MSC_VER >= 1400 n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp); #else n = vsnprintf(m_str, m_size, fmt, tmp); #endif va_end(tmp); } nvDebugCheck(n < int(m_size)); // Make sure it's null terminated. nvDebugCheck(m_str[n] == '\0'); //str[n] = '\0'; return *this; } /** Append a string. */ StringBuilder & StringBuilder::append( const char * s ) { return append(s, U32(strlen( s ))); } /** Append a string. */ StringBuilder & StringBuilder::append(const char * s, uint len) { nvDebugCheck(s != NULL); uint offset = length(); const uint size = offset + len + 1; reserve(size); strCpy(m_str + offset, len + 1, s, len); return *this; } /** Append a formatted string. */ StringBuilder & StringBuilder::appendFormat( const char * fmt, ... ) { nvDebugCheck( fmt != NULL ); va_list arg; va_start( arg, fmt ); appendFormatList( fmt, arg ); va_end( arg ); return *this; } /** Append a formatted string. */ StringBuilder & StringBuilder::appendFormatList( const char * fmt, va_list arg ) { nvDebugCheck( fmt != NULL ); va_list tmp; va_copy(tmp, arg); if (m_size == 0) { formatList(fmt, arg); } else { StringBuilder tmp_str; tmp_str.formatList( fmt, tmp ); append( tmp_str.str() ); } va_end(tmp); return *this; } // Append n spaces. StringBuilder & StringBuilder::appendSpace(uint n) { if (m_str == NULL) { m_size = n + 1; m_str = strAlloc(m_size); memset(m_str, ' ', m_size); m_str[n] = '\0'; } else { const uint len = strLen(m_str); if (m_size < len + n + 1) { m_size = len + n + 1; m_str = strReAlloc(m_str, m_size); } memset(m_str + len, ' ', n); m_str[len+n] = '\0'; } return *this; } /** Convert number to string in the given base. */ StringBuilder & StringBuilder::number( int i, int base ) { nvCheck( base >= 2 ); nvCheck( base <= 36 ); // @@ This needs to be done correctly. // length = floor(log(i, base)); uint len = uint(log(float(i)) / log(float(base)) + 2); // one more if negative reserve(len); if( i < 0 ) { *m_str = '-'; *i2a(uint(-i), m_str+1, base) = 0; } else { *i2a(i, m_str, base) = 0; } return *this; } /** Convert number to string in the given base. */ StringBuilder & StringBuilder::number( uint i, int base ) { nvCheck( base >= 2 ); nvCheck( base <= 36 ); // @@ This needs to be done correctly. // length = floor(log(i, base)); uint len = uint(log(float(i)) / log(float(base)) - 0.5f + 1); reserve(len); *i2a(i, m_str, base) = 0; return *this; } /** Resize the string preserving the contents. */ StringBuilder & StringBuilder::reserve( uint size_hint ) { nvCheck(size_hint != 0); if (size_hint > m_size) { m_str = strReAlloc(m_str, size_hint); m_size = size_hint; } return *this; } /** Copy a string safely. */ StringBuilder & StringBuilder::copy(const char * s) { nvCheck( s != NULL ); const uint str_size = uint(strlen( s )) + 1; reserve(str_size); memcpy(m_str, s, str_size); return *this; } /** Copy a string safely. */ StringBuilder & StringBuilder::copy(const char * s, uint len) { nvCheck( s != NULL ); const uint str_size = len + 1; reserve(str_size); strCpy(m_str, str_size, s, len); return *this; } /** Copy an StringBuilder. */ StringBuilder & StringBuilder::copy( const StringBuilder & s ) { if (s.m_str == NULL) { nvCheck( s.m_size == 0 ); reset(); } else { reserve( s.m_size ); strCpy( m_str, s.m_size, s.m_str ); } return *this; } bool StringBuilder::endsWith(const char * str) const { uint l = uint(strlen(str)); uint ml = uint(strlen(m_str)); if (ml < l) return false; return strncmp(m_str + ml - l, str, l) == 0; } bool StringBuilder::beginsWith(const char * str) const { size_t l = strlen(str); return strncmp(m_str, str, l) == 0; } // Find given char starting from the end. char * StringBuilder::reverseFind(char c) { int length = (int)strlen(m_str) - 1; while (length >= 0 && m_str[length] != c) { length--; } if (length >= 0) { return m_str + length; } else { return NULL; } } /** Reset the string. */ void StringBuilder::reset() { m_size = 0; strFree( m_str ); m_str = NULL; } /** Release the allocated string. */ char * StringBuilder::release() { char * str = m_str; m_size = 0; m_str = NULL; return str; } // Swap strings. void nv::swap(StringBuilder & a, StringBuilder & b) { swap(a.m_size, b.m_size); swap(a.m_str, b.m_str); } /// Get the file name from a path. const char * Path::fileName() const { return fileName(m_str); } /// Get the extension from a file path. const char * Path::extension() const { return extension(m_str); } /*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) { nvCheck(path != NULL); for (int i = 0;; i++) { if (path[i] == '\0') break; if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator; } } /// Toggles path separators (ie. \\ into /). void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/) { nvCheck(!isNull()); translatePath(m_str, pathSeparator); } void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/) { nvCheck(!isNull()); const uint l = length(); if (m_str[l] != '\\' && m_str[l] != '/') { char separatorString[] = { pathSeparator, '\0' }; append(separatorString); } } /** * Strip the file name from a path. * @warning path cannot end with '/' o '\\', can't it? */ void Path::stripFileName() { nvCheck( m_str != NULL ); int length = (int)strlen(m_str) - 1; while (length > 0 && m_str[length] != '/' && m_str[length] != '\\'){ length--; } if( length ) { m_str[length+1] = 0; } else { m_str[0] = 0; } } /// Strip the extension from a path name. void Path::stripExtension() { nvCheck( m_str != NULL ); int length = (int)strlen(m_str) - 1; while (length > 0 && m_str[length] != '.') { length--; if( m_str[length] == NV_PATH_SEPARATOR ) { return; // no extension } } if (length > 0) { m_str[length] = 0; } } /// Get the path separator. // static char Path::separator() { return NV_PATH_SEPARATOR; } // static const char * Path::fileName(const char * str) { nvCheck( str != NULL ); int length = (int)strlen(str) - 1; while (length >= 0 && str[length] != '\\' && str[length] != '/') { length--; } return &str[length+1]; } // static const char * Path::extension(const char * str) { nvCheck( str != NULL ); int length, l; l = length = (int)strlen( str ); while (length > 0 && str[length] != '.') { length--; if (str[length] == '\\' || str[length] == '/') { return &str[l]; // no extension } } if (length == 0) { return &str[l]; } return &str[length]; } /// Clone this string String String::clone() const { String str(data); return str; } void String::setString(const char * str) { if (str == NULL) { data = NULL; } else { allocString( str ); addRef(); } } void String::setString(const char * str, uint length) { nvDebugCheck(str != NULL); allocString(str, length); addRef(); } void String::setString(const StringBuilder & str) { if (str.str() == NULL) { data = NULL; } else { allocString(str.str()); addRef(); } } // Add reference count. void String::addRef() { if (data != NULL) { setRefCount(getRefCount() + 1); } } // Decrease reference count. void String::release() { if (data != NULL) { const uint16 count = getRefCount(); setRefCount(count - 1); if (count - 1 == 0) { free(data - 2); data = NULL; } } } void String::allocString(const char * str, uint len) { const char * ptr = malloc(2 + len + 1); setData( ptr ); setRefCount( 0 ); // Copy string. strCpy(const_cast(data), len+1, str, len); // Add terminating character. const_cast(data)[len] = '\0'; } void nv::swap(String & a, String & b) { swap(a.data, b.data); } Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.h (revision 23380) @@ -1,430 +1,430 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_CORE_STRING_H #define NV_CORE_STRING_H #include "Debug.h" #include "Hash.h" // hash //#include // strlen, etc. #if NV_OS_WIN32 #define NV_PATH_SEPARATOR '\\' #else #define NV_PATH_SEPARATOR '/' #endif namespace nv { NVCORE_API uint strHash(const char * str, uint h) NV_PURE; /// String hash based on Bernstein's hash. inline uint strHash(const char * data, uint h = 5381) { uint i = 0; while(data[i] != 0) { h = (33 * h) ^ uint(data[i]); i++; } return h; } template <> struct Hash { uint operator()(const char * str) const { return strHash(str); } }; NVCORE_API uint strLen(const char * str) NV_PURE; // Asserts on NULL strings. NVCORE_API int strDiff(const char * s1, const char * s2) NV_PURE; // Asserts on NULL strings. NVCORE_API int strCaseDiff(const char * s1, const char * s2) NV_PURE; // Asserts on NULL strings. NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings. NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings. template <> struct Equal { bool operator()(const char * a, const char * b) const { return strEqual(a, b); } }; NVCORE_API bool strBeginsWith(const char * dst, const char * prefix) NV_PURE; NVCORE_API bool strEndsWith(const char * dst, const char * suffix) NV_PURE; NVCORE_API void strCpy(char * dst, uint size, const char * src); NVCORE_API void strCpy(char * dst, uint size, const char * src, uint len); NVCORE_API void strCat(char * dst, uint size, const char * src); NVCORE_API const char * strSkipWhiteSpace(const char * str); NVCORE_API char * strSkipWhiteSpace(char * str); NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE; NVCORE_API bool isNumber(const char * str) NV_PURE; /* @@ Implement these two functions and modify StringBuilder to use them? NVCORE_API void strFormat(const char * dst, const char * fmt, ...); NVCORE_API void strFormatList(const char * dst, const char * fmt, va_list arg); template void strFormatSafe(char (&buffer)[count], const char *fmt, ...) __attribute__((format (printf, 2, 3))); template void strFormatSafe(char (&buffer)[count], const char *fmt, ...) { va_list args; va_start(args, fmt); strFormatList(buffer, count, fmt, args); va_end(args); } template void strFormatListSafe(char (&buffer)[count], const char *fmt, va_list arg) { va_list tmp; va_copy(tmp, args); strFormatList(buffer, count, fmt, tmp); va_end(tmp); }*/ template void strCpySafe(char (&buffer)[count], const char *src) { strCpy(buffer, count, src); } template void strCatSafe(char (&buffer)[count], const char * src) { strCat(buffer, count, src); } /// String builder. class NVCORE_CLASS StringBuilder { public: StringBuilder(); explicit StringBuilder( uint size_hint ); StringBuilder(const char * str); StringBuilder(const char * str, uint len); StringBuilder(const StringBuilder & other); ~StringBuilder(); StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3))); StringBuilder & formatList( const char * format, va_list arg ); StringBuilder & append(const char * str); StringBuilder & append(const char * str, uint len); StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3))); StringBuilder & appendFormatList(const char * format, va_list arg); StringBuilder & appendSpace(uint n); StringBuilder & number( int i, int base = 10 ); StringBuilder & number( uint i, int base = 10 ); StringBuilder & reserve(uint size_hint); StringBuilder & copy(const char * str); StringBuilder & copy(const char * str, uint len); StringBuilder & copy(const StringBuilder & str); StringBuilder & toLower(); StringBuilder & toUpper(); bool endsWith(const char * str) const; bool beginsWith(const char * str) const; char * reverseFind(char c); void reset(); bool isNull() const { return m_size == 0; } // const char * accessors //operator const char * () const { return m_str; } //operator char * () { return m_str; } const char * str() const { return m_str; } char * str() { return m_str; } char * release(); /// Implement value semantics. StringBuilder & operator=( const StringBuilder & s ) { return copy(s); } /// Implement value semantics. StringBuilder & operator=( const char * s ) { return copy(s); } /// Equal operator. bool operator==( const StringBuilder & s ) const { return strMatch(s.m_str, m_str); } /// Return the exact length. uint length() const { return isNull() ? 0 : strLen(m_str); } /// Return the size of the string container. uint capacity() const { return m_size; } /// Return the hash of the string. uint hash() const { return isNull() ? 0 : strHash(m_str); } // Swap strings. friend void swap(StringBuilder & a, StringBuilder & b); protected: /// Size of the string container. uint m_size; /// String. char * m_str; }; /// Path string. @@ This should be called PathBuilder. class NVCORE_CLASS Path : public StringBuilder { public: Path() : StringBuilder() {} explicit Path(int size_hint) : StringBuilder(size_hint) {} Path(const char * str) : StringBuilder(str) {} Path(const Path & path) : StringBuilder(path) {} const char * fileName() const; const char * extension() const; void translatePath(char pathSeparator = NV_PATH_SEPARATOR); void appendSeparator(char pathSeparator = NV_PATH_SEPARATOR); void stripFileName(); void stripExtension(); // statics static char separator(); static const char * fileName(const char *); static const char * extension(const char *); static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR); }; /// String class. class NVCORE_CLASS String { public: /// Constructs a null string. @sa isNull() String() { data = NULL; } /// Constructs a shared copy of str. String(const String & str) { data = str.data; if (data != NULL) addRef(); } /// Constructs a shared string from a standard string. String(const char * str) { setString(str); } /// Constructs a shared string from a standard string. String(const char * str, int length) { setString(str, length); } /// Constructs a shared string from a StringBuilder. String(const StringBuilder & str) { setString(str); } /// Dtor. ~String() { release(); } String clone() const; /// Release the current string and allocate a new one. const String & operator=( const char * str ) { release(); setString( str ); return *this; } /// Release the current string and allocate a new one. const String & operator=( const StringBuilder & str ) { release(); setString( str ); return *this; } /// Implement value semantics. String & operator=( const String & str ) { if (str.data != data) { release(); data = str.data; addRef(); } return *this; } /// Equal operator. bool operator==( const String & str ) const { return strMatch(str.data, data); } /// Equal operator. bool operator==( const char * str ) const { return strMatch(str, data); } /// Not equal operator. bool operator!=( const String & str ) const { return !strMatch(str.data, data); } /// Not equal operator. bool operator!=( const char * str ) const { return !strMatch(str, data); } /// Returns true if this string is the null string. bool isNull() const { return data == NULL; } /// Return the exact length. uint length() const { nvDebugCheck(data != NULL); return strLen(data); } /// Return the hash of the string. uint hash() const { nvDebugCheck(data != NULL); return strHash(data); } /// const char * cast operator. operator const char * () const { return data; } /// Get string pointer. const char * str() const { return data; } private: // Add reference count. void addRef(); // Decrease reference count. void release(); uint16 getRefCount() const { nvDebugCheck(data != NULL); return *reinterpret_cast(data - 2); } void setRefCount(uint16 count) { nvDebugCheck(data != NULL); nvCheck(count < 0xFFFF); *reinterpret_cast(const_cast(data - 2)) = uint16(count); } void setData(const char * str) { data = str + 2; } void allocString(const char * str) { allocString(str, strLen(str)); } void allocString(const char * str, uint length); void setString(const char * str); void setString(const char * str, uint length); void setString(const StringBuilder & str); // Swap strings. friend void swap(String & a, String & b); private: const char * data; }; template <> struct Hash { uint operator()(const String & str) const { return str.hash(); } }; // Like AutoPtr, but for const char strings. class AutoString { NV_FORBID_COPY(AutoString); NV_FORBID_HEAPALLOC(); public: // Ctor. AutoString(const char * p = NULL) : m_ptr(p) { } #if NV_CC_CPP11 // Move ctor. AutoString(AutoString && ap) : m_ptr(ap.m_ptr) { ap.m_ptr = NULL; } #endif // Dtor. Deletes owned pointer. ~AutoString() { delete [] m_ptr; m_ptr = NULL; } // Delete owned pointer and assign new one. void operator=(const char * p) { if (p != m_ptr) { delete [] m_ptr; m_ptr = p; } } // Get pointer. const char * ptr() const { return m_ptr; } operator const char *() const { return m_ptr; } // Relinquish ownership of the underlying pointer and returns that pointer. const char * release() { const char * tmp = m_ptr; m_ptr = NULL; return tmp; } // comparison operators. friend bool operator == (const AutoString & ap, const char * const p) { return (ap.ptr() == p); } friend bool operator != (const AutoString & ap, const char * const p) { return (ap.ptr() != p); } friend bool operator == (const char * const p, const AutoString & ap) { return (ap.ptr() == p); } friend bool operator != (const char * const p, const AutoString & ap) { return (ap.ptr() != p); } private: const char * m_ptr; }; } // nv namespace #endif // NV_CORE_STRING_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Stream.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Stream.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Stream.h (revision 23380) @@ -1,164 +1,164 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_CORE_STREAM_H #define NV_CORE_STREAM_H #include "nvcore.h" #include "Debug.h" namespace nv { /// Base stream class. class NVCORE_CLASS Stream { public: enum ByteOrder { LittleEndian = false, BigEndian = true, }; /// Get the byte order of the system. static ByteOrder getSystemByteOrder() { #if NV_LITTLE_ENDIAN return LittleEndian; #else return BigEndian; #endif } /// Ctor. Stream() : m_byteOrder(LittleEndian) { } /// Virtual destructor. virtual ~Stream() {} /// Set byte order. void setByteOrder(ByteOrder bo) { m_byteOrder = bo; } /// Get byte order. ByteOrder byteOrder() const { return m_byteOrder; } /// Serialize the given data. virtual uint serialize( void * data, uint len ) = 0; /// Move to the given position in the archive. virtual void seek( uint pos ) = 0; /// Return the current position in the archive. virtual uint tell() const = 0; /// Return the current size of the archive. virtual uint size() const = 0; /// Determine if there has been any error. virtual bool isError() const = 0; /// Clear errors. virtual void clearError() = 0; /// Return true if the stream is at the end. virtual bool isAtEnd() const = 0; /// Return true if the stream is seekable. virtual bool isSeekable() const = 0; /// Return true if this is an input stream. virtual bool isLoading() const = 0; /// Return true if this is an output stream. virtual bool isSaving() const = 0; void advance(uint offset) { seek(tell() + offset); } // friends friend Stream & operator<<( Stream & s, bool & c ) { #if NV_OS_DARWIN && !NV_CC_CPP11 nvStaticCheck(sizeof(bool) == 4); uint8 b = c ? 1 : 0; s.serialize( &b, 1 ); c = (b == 1); #else nvStaticCheck(sizeof(bool) == 1); s.serialize( &c, 1 ); #endif return s; } friend Stream & operator<<( Stream & s, char & c ) { nvStaticCheck(sizeof(char) == 1); s.serialize( &c, 1 ); return s; } friend Stream & operator<<( Stream & s, uint8 & c ) { nvStaticCheck(sizeof(uint8) == 1); s.serialize( &c, 1 ); return s; } friend Stream & operator<<( Stream & s, int8 & c ) { nvStaticCheck(sizeof(int8) == 1); s.serialize( &c, 1 ); return s; } friend Stream & operator<<( Stream & s, uint16 & c ) { nvStaticCheck(sizeof(uint16) == 2); return s.byteOrderSerialize( &c, 2 ); } friend Stream & operator<<( Stream & s, int16 & c ) { nvStaticCheck(sizeof(int16) == 2); return s.byteOrderSerialize( &c, 2 ); } friend Stream & operator<<( Stream & s, uint32 & c ) { nvStaticCheck(sizeof(uint32) == 4); return s.byteOrderSerialize( &c, 4 ); } friend Stream & operator<<( Stream & s, int32 & c ) { nvStaticCheck(sizeof(int32) == 4); return s.byteOrderSerialize( &c, 4 ); } friend Stream & operator<<( Stream & s, uint64 & c ) { nvStaticCheck(sizeof(uint64) == 8); return s.byteOrderSerialize( &c, 8 ); } friend Stream & operator<<( Stream & s, int64 & c ) { nvStaticCheck(sizeof(int64) == 8); return s.byteOrderSerialize( &c, 8 ); } friend Stream & operator<<( Stream & s, float & c ) { nvStaticCheck(sizeof(float) == 4); return s.byteOrderSerialize( &c, 4 ); } friend Stream & operator<<( Stream & s, double & c ) { nvStaticCheck(sizeof(double) == 8); return s.byteOrderSerialize( &c, 8 ); } protected: /// Serialize in the stream byte order. Stream & byteOrderSerialize( void * v, uint len ) { if( m_byteOrder == getSystemByteOrder() ) { serialize( v, len ); } else { for( uint i = len; i > 0; i-- ) { serialize( (uint8 *)v + i - 1, 1 ); } } return *this; } private: ByteOrder m_byteOrder; }; } // nv namespace #endif // NV_CORE_STREAM_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.cpp (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.cpp (revision 23380) @@ -1,45 +1,45 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #include "TextWriter.h" using namespace nv; /// Constructor TextWriter::TextWriter(Stream * s) : s(s), str(1024) { nvCheck(s != NULL); nvCheck(s->isSaving()); } void TextWriter::writeString(const char * str) { nvDebugCheck(s != NULL); s->serialize(const_cast(str), strLen(str)); } void TextWriter::writeString(const char * str, uint len) { nvDebugCheck(s != NULL); s->serialize(const_cast(str), len); } void TextWriter::format(const char * format, ...) { va_list arg; va_start(arg,format); str.formatList(format, arg); writeString(str.str(), str.length()); va_end(arg); } void TextWriter::formatList(const char * format, va_list arg) { va_list tmp; va_copy(tmp, arg); str.formatList(format, arg); writeString(str.str(), str.length()); va_end(tmp); } Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.h (revision 23380) @@ -1,62 +1,62 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NVCORE_TEXTWRITER_H #define NVCORE_TEXTWRITER_H #include "nvcore.h" #include "Stream.h" #include "StrLib.h" namespace nv { /// Text writer. class NVCORE_CLASS TextWriter { public: TextWriter(Stream * s); void writeString(const char * str); void writeString(const char * str, uint len); void format(const char * format, ...) __attribute__((format (printf, 2, 3))); void formatList(const char * format, va_list arg); private: Stream * s; // Temporary string. StringBuilder str; }; inline TextWriter & operator<<( TextWriter & tw, int i) { tw.format("%d", i); return tw; } inline TextWriter & operator<<( TextWriter & tw, uint i) { tw.format("%u", i); return tw; } inline TextWriter & operator<<( TextWriter & tw, float f) { tw.format("%f", f); return tw; } inline TextWriter & operator<<( TextWriter & tw, const char * str) { tw.writeString(str); return tw; } } // nv namespace #endif // NVCORE_TEXTWRITER_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Utils.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/Utils.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Utils.h (revision 23380) @@ -1,282 +1,282 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_CORE_UTILS_H #define NV_CORE_UTILS_H #include "Debug.h" // nvDebugCheck #include // for placement new // Just in case. Grrr. #undef min #undef max #define NV_INT8_MIN (-128) #define NV_INT8_MAX 127 #define NV_UINT8_MAX 255 #define NV_INT16_MIN (-32767-1) #define NV_INT16_MAX 32767 #define NV_UINT16_MAX 0xffff #define NV_INT32_MIN (-2147483647-1) #define NV_INT32_MAX 2147483647 #define NV_UINT32_MAX 0xffffffff #define NV_INT64_MAX POSH_I64(9223372036854775807) #define NV_INT64_MIN (-POSH_I64(9223372036854775807)-1) #define NV_UINT64_MAX POSH_U64(0xffffffffffffffff) #define NV_HALF_MAX 65504.0F #define NV_FLOAT_MAX 3.402823466e+38F #define NV_INTEGER_TO_FLOAT_MAX 16777217 // Largest integer such that it and all smaller integers can be stored in a 32bit float. namespace nv { // Less error prone than casting. From CB: // http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html // These intentionally look like casts. // uint32 casts: template inline uint32 U32(T x) { return x; } template <> inline uint32 U32(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; } template <> inline uint32 U32(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; } //template <> inline uint32 U32(uint32 x) { return x; } template <> inline uint32 U32(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; } //template <> inline uint32 U32(uint16 x) { return x; } template <> inline uint32 U32(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; } //template <> inline uint32 U32(uint8 x) { return x; } template <> inline uint32 U32(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; } // int32 casts: template inline int32 I32(T x) { return x; } template <> inline int32 I32(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; } template <> inline int32 I32(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; } template <> inline int32 I32(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; } //template <> inline int32 I32(int32 x) { return x; } //template <> inline int32 I32(uint16 x) { return x; } //template <> inline int32 I32(int16 x) { return x; } //template <> inline int32 I32(uint8 x) { return x; } //template <> inline int32 I32(int8 x) { return x; } // uint16 casts: template inline uint16 U16(T x) { return x; } template <> inline uint16 U16(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; } template <> inline uint16 U16(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; } template <> inline uint16 U16(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; } template <> inline uint16 U16(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; } //template <> inline uint16 U16(uint16 x) { return x; } template <> inline uint16 U16(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; } //template <> inline uint16 U16(uint8 x) { return x; } template <> inline uint16 U16(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; } // int16 casts: template inline int16 I16(T x) { return x; } template <> inline int16 I16(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; } template <> inline int16 I16(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; } template <> inline int16 I16(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; } template <> inline int16 I16(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; } template <> inline int16 I16(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; } //template <> inline int16 I16(int16 x) { return x; } //template <> inline int16 I16(uint8 x) { return x; } //template <> inline int16 I16(int8 x) { return x; } // uint8 casts: template inline uint8 U8(T x) { return x; } template <> inline uint8 U8(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; } template <> inline uint8 U8(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; } template <> inline uint8 U8(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; } template <> inline uint8 U8(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; } template <> inline uint8 U8(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; } template <> inline uint8 U8(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; } //template <> inline uint8 U8(uint8 x) { return x; } template <> inline uint8 U8(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; } //template <> inline uint8 U8(int8 x) { nvDebugCheck(x >= 0.0f && x <= 255.0f); return (uint8)x; } // int8 casts: template inline int8 I8(T x) { return x; } template <> inline int8 I8(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; } template <> inline int8 I8(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; } template <> inline int8 I8(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; } template <> inline int8 I8(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; } template <> inline int8 I8(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; } template <> inline int8 I8(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; } template <> inline int8 I8(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; } //template <> inline int8 I8(int8 x) { return x; } // float casts: template inline float F32(T x) { return x; } template <> inline float F32(uint64 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; } template <> inline float F32(int64 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; } template <> inline float F32(uint32 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; } template <> inline float F32(int32 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; } // The compiler should not complain about these conversions: //template <> inline float F32(uint16 x) { nvDebugCheck(return (float)x; } //template <> inline float F32(int16 x) { nvDebugCheck(return (float)x; } //template <> inline float F32(uint8 x) { nvDebugCheck(return (float)x; } //template <> inline float F32(int8 x) { nvDebugCheck(return (float)x; } /// Swap two values. template inline void swap(T & a, T & b) { T temp(a); a = b; b = temp; } /// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN. template //inline const T & max(const T & a, const T & b) inline T max(const T & a, const T & b) { return (b < a) ? a : b; } /// Return the maximum of the four arguments. template //inline const T & max4(const T & a, const T & b, const T & c) inline T max4(const T & a, const T & b, const T & c, const T & d) { return max(max(a, b), max(c, d)); } /// Return the maximum of the three arguments. template //inline const T & max3(const T & a, const T & b, const T & c) inline T max3(const T & a, const T & b, const T & c) { return max(a, max(b, c)); } /// Return the minimum of two values. template //inline const T & min(const T & a, const T & b) inline T min(const T & a, const T & b) { return (a < b) ? a : b; } /// Return the maximum of the three arguments. template //inline const T & min3(const T & a, const T & b, const T & c) inline T min3(const T & a, const T & b, const T & c) { return min(a, min(b, c)); } /// Clamp between two values. template //inline const T & clamp(const T & x, const T & a, const T & b) inline T clamp(const T & x, const T & a, const T & b) { return min(max(x, a), b); } /** Return the next power of two. * @see http://graphics.stanford.edu/~seander/bithacks.html * @warning Behaviour for 0 is undefined. * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x * @note nextPowerOfTwo(x) = 2 << log2(x-1) */ inline uint nextPowerOfTwo( uint x ) { nvDebugCheck( x != 0 ); #if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction. x--; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; return x+1; #else uint p = 1; while( x > p ) { p += p; } return p; #endif } /// Return true if @a n is a power of two. inline bool isPowerOfTwo( uint n ) { return (n & (n-1)) == 0; } // @@ Move this to utils? /// Delete all the elements of a container. template void deleteAll(T & container) { for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i)) { delete container[i]; } } // @@ Specialize these methods for numeric, pointer, and pod types. template void construct_range(T * restrict ptr, uint new_size, uint old_size) { for (uint i = old_size; i < new_size; i++) { new(ptr+i) T; // placement new } } template void construct_range(T * restrict ptr, uint new_size, uint old_size, const T & elem) { for (uint i = old_size; i < new_size; i++) { new(ptr+i) T(elem); // placement new } } template void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) { for (uint i = old_size; i < new_size; i++) { new(ptr+i) T(src[i]); // placement new } } template void destroy_range(T * restrict ptr, uint new_size, uint old_size) { for (uint i = new_size; i < old_size; i++) { (ptr+i)->~T(); // Explicit call to the destructor } } template void fill(T * restrict dst, uint count, const T & value) { for (uint i = 0; i < count; i++) { dst[i] = value; } } template void copy_range(T * restrict dst, const T * restrict src, uint count) { for (uint i = 0; i < count; i++) { dst[i] = src[i]; } } template bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) { for (uint i = begin; i < end; i++) { if (ptr[i] == element) { if (index != NULL) *index = i; return true; } } return false; } } // nv namespace #endif // NV_CORE_UTILS_H Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h (revision 23380) @@ -1,307 +1,307 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_CORE_H #define NV_CORE_H // Function linkage #if NVCORE_SHARED #ifdef NVCORE_EXPORTS #define NVCORE_API DLL_EXPORT #define NVCORE_CLASS DLL_EXPORT_CLASS #else #define NVCORE_API DLL_IMPORT #define NVCORE_CLASS DLL_IMPORT #endif #else // NVCORE_SHARED #define NVCORE_API #define NVCORE_CLASS #endif // NVCORE_SHARED // Platform definitions #include // OS: // NV_OS_WIN32 // NV_OS_WIN64 // NV_OS_MINGW // NV_OS_CYGWIN // NV_OS_LINUX // NV_OS_UNIX // NV_OS_DARWIN // NV_OS_XBOX // NV_OS_ORBIS // NV_OS_IOS #define NV_OS_STRING POSH_OS_STRING #if defined POSH_OS_LINUX # define NV_OS_LINUX 1 # define NV_OS_UNIX 1 #elif defined POSH_OS_ORBIS # define NV_OS_ORBIS 1 #elif defined POSH_OS_FREEBSD # define NV_OS_FREEBSD 1 # define NV_OS_UNIX 1 #elif defined POSH_OS_NETBSD # define NV_OS_NETBSD 1 # define NV_OS_UNIX 1 #elif defined POSH_OS_OPENBSD # define NV_OS_OPENBSD 1 # define NV_OS_UNIX 1 #elif defined POSH_OS_CYGWIN32 # define NV_OS_CYGWIN 1 #elif defined POSH_OS_MINGW # define NV_OS_MINGW 1 # define NV_OS_WIN32 1 #elif defined POSH_OS_OSX # define NV_OS_DARWIN 1 # define NV_OS_UNIX 1 #elif defined POSH_OS_IOS # define NV_OS_DARWIN 1 //ACS should we keep this on IOS? # define NV_OS_UNIX 1 # define NV_OS_IOS 1 #elif defined POSH_OS_UNIX # define NV_OS_UNIX 1 #elif defined POSH_OS_WIN64 # define NV_OS_WIN32 1 # define NV_OS_WIN64 1 #elif defined POSH_OS_WIN32 # define NV_OS_WIN32 1 #elif defined POSH_OS_XBOX # define NV_OS_XBOX 1 #else # error "Unsupported OS" #endif // Threading: // some platforms don't implement __thread or similar for thread-local-storage #if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios? # define NV_OS_USE_PTHREAD 1 # if NV_OS_DARWIN || NV_OS_IOS # define NV_OS_HAS_TLS_QUALIFIER 0 # else # define NV_OS_HAS_TLS_QUALIFIER 1 # endif #else # define NV_OS_USE_PTHREAD 0 # define NV_OS_HAS_TLS_QUALIFIER 1 #endif // CPUs: // NV_CPU_X86 // NV_CPU_X86_64 // NV_CPU_PPC // NV_CPU_ARM // NV_CPU_AARCH64 #define NV_CPU_STRING POSH_CPU_STRING #if defined POSH_CPU_X86_64 //# define NV_CPU_X86 1 # define NV_CPU_X86_64 1 #elif defined POSH_CPU_X86 # define NV_CPU_X86 1 #elif defined POSH_CPU_PPC # define NV_CPU_PPC 1 #elif defined POSH_CPU_STRONGARM # define NV_CPU_ARM 1 #elif defined POSH_CPU_AARCH64 # define NV_CPU_AARCH64 1 #else # error "Unsupported CPU" #endif // Compiler: // NV_CC_GNUC // NV_CC_MSVC // NV_CC_CLANG #if defined POSH_COMPILER_CLANG # define NV_CC_CLANG 1 # define NV_CC_GNUC 1 // Clang is compatible with GCC. # define NV_CC_STRING "clang" #elif defined POSH_COMPILER_GCC # define NV_CC_GNUC 1 # define NV_CC_STRING "gcc" #elif defined POSH_COMPILER_MSVC # define NV_CC_MSVC 1 # define NV_CC_STRING "msvc" #else # error "Unsupported compiler" #endif #if NV_CC_MSVC #define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet. #else // @@ IC: This works in CLANG, about GCC? // @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4. #ifdef __clang__ #define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert)) #elif defined __GNUC__ #define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) #endif #endif // Endiannes: #define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN #define NV_BIG_ENDIAN POSH_BIG_ENDIAN #define NV_ENDIAN_STRING POSH_ENDIAN_STRING // Define the right printf prefix for size_t arguments: #if POSH_64BIT_POINTER # define NV_SIZET_PRINTF_PREFIX POSH_I64_PRINTF_PREFIX #else # define NV_SIZET_PRINTF_PREFIX #endif // cmake config #include "nvconfig.h" // Type definitions: typedef posh_u8_t uint8; typedef posh_i8_t int8; typedef posh_u16_t uint16; typedef posh_i16_t int16; typedef posh_u32_t uint32; typedef posh_i32_t int32; typedef posh_u64_t uint64; typedef posh_i64_t int64; // Aliases typedef uint32 uint; // Version string: #define NV_VERSION_STRING \ NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \ NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__ // Disable copy constructor and assignment operator. #if NV_CC_CPP11 #define NV_FORBID_COPY(C) \ C( const C & ) = delete; \ C &operator=( const C & ) = delete #else #define NV_FORBID_COPY(C) \ private: \ C( const C & ); \ C &operator=( const C & ) #endif // Disable dynamic allocation on the heap. // See Prohibiting Heap-Based Objects in More Effective C++. #define NV_FORBID_HEAPALLOC() \ private: \ void *operator new(size_t size); \ void *operator new[](size_t size) //static void *operator new(size_t size); \ //static void *operator new[](size_t size); // String concatenation macros. #define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2) #define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2 #define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3) #define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3 #define NV_STRING2(x) #x #define NV_STRING(x) NV_STRING2(x) #if NV_CC_MSVC #define NV_MULTI_LINE_MACRO_BEGIN do { #define NV_MULTI_LINE_MACRO_END \ __pragma(warning(push)) \ __pragma(warning(disable:4127)) \ } while(false) \ __pragma(warning(pop)) #else #define NV_MULTI_LINE_MACRO_BEGIN do { #define NV_MULTI_LINE_MACRO_END } while(false) #endif #if NV_CC_CPP11 #define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed") #else #define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)] #endif #define NV_COMPILER_CHECK(x) nvStaticCheck(x) // I like this name best. // Make sure type definitions are fine. NV_COMPILER_CHECK(sizeof(int8) == 1); NV_COMPILER_CHECK(sizeof(uint8) == 1); NV_COMPILER_CHECK(sizeof(int16) == 2); NV_COMPILER_CHECK(sizeof(uint16) == 2); NV_COMPILER_CHECK(sizeof(int32) == 4); NV_COMPILER_CHECK(sizeof(uint32) == 4); NV_COMPILER_CHECK(sizeof(int32) == 4); NV_COMPILER_CHECK(sizeof(uint32) == 4); #define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) #if 0 // Disabled in The Witness. #if NV_CC_MSVC #define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x) #else #define NV_MESSAGE(x) message(x) #endif #else #define NV_MESSAGE(x) #endif // Startup initialization macro. #define NV_AT_STARTUP(some_code) \ namespace { \ static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \ NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \ } \ NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \ } // Indicate the compiler that the parameter is not used to suppress compier warnings. #define NV_UNUSED(a) ((a)=(a)) // Null index. @@ Move this somewhere else... it's only used by nvmesh. //const unsigned int NIL = unsigned int(~0); //#define NIL uint(~0) // Null pointer. #ifndef NULL #define NULL 0 #endif // Platform includes #if NV_CC_MSVC # if NV_OS_WIN32 # include "DefsVcWin32.h" # elif NV_OS_XBOX # include "DefsVcXBox.h" # else # error "MSVC: Platform not supported" # endif #elif NV_CC_GNUC # if NV_OS_LINUX # include "DefsGnucLinux.h" # elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD # include "DefsGnucDarwin.h" # elif NV_OS_MINGW # include "DefsGnucWin32.h" # elif NV_OS_CYGWIN # error "GCC: Cygwin not supported" # else # error "GCC: Platform not supported" # endif #endif #endif // NV_CORE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.cpp (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.cpp (revision 23380) @@ -1,83 +1,83 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #include "KtxFile.h" using namespace nv; static const uint8 fileIdentifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; KtxHeader::KtxHeader() { memcpy(identifier, fileIdentifier, 12); endianness = 0x04030201; glType = 0; glTypeSize = 1; glFormat = 0; glInternalFormat = KTX_RGBA; glBaseInternalFormat = KTX_RGBA; pixelWidth = 0; pixelHeight = 0; pixelDepth = 0; numberOfArrayElements = 0; numberOfFaces = 1; numberOfMipmapLevels = 0; bytesOfKeyValueData = 0; } Stream & operator<< (Stream & s, DDSHeader & header) { s.serialize(header.identifier, 12); s << header.endiannes << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat; s << header.pixelWidth << header.pixelHeight << header.pixelDepth; s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels; s << header.bytesOfKeyValueData; return s; } KtxFile::KtxFile() { } KtxFile::~KtxFile() { } void KtxFile::addKeyValue(const char * key, const char * value) { keyArray.append(key); valueArray.append(value); bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1; } Stream & operator<< (Stream & s, KtxFile & file) { s << header; if (s.isSaving()) { int keyValueCount = keyArray.count(); for (int i = 0; i < keyValueCount; i++) { const String & key = keyArray[i]; const String & value = valueArray[i]; uint keySize = key.length() + 1; uint valueSize = value.length() + 1; uint keyValueSize = keySize + valueSize; s << keyValueSize; s.serialize(key.str(), keySize); s.serialize(value.str(), valueSize); } } else { // @@ Read key value pairs. } return s; } Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.h (revision 23380) @@ -1,102 +1,102 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_IMAGE_KTXFILE_H #define NV_IMAGE_KTXFILE_H #include "nvimage.h" #include "nvcore/StrLib.h" // KTX File format specification: // http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key namespace nv { class Stream; // GL types (Table 3.2) const uint KTX_UNSIGNED_BYTE; const uint KTX_UNSIGNED_SHORT_5_6_5; // ... // GL formats (Table 3.3) // ... // GL internal formats (Table 3.12, 3.13) // ... // GL base internal format. (Table 3.11) const uint KTX_RGB; const uint KTX_RGBA; const uint KTX_ALPHA; // ... struct KtxHeader { uint8 identifier[12]; uint32 endianness; uint32 glType; uint32 glTypeSize; uint32 glFormat; uint32 glInternalFormat; uint32 glBaseInternalFormat; uint32 pixelWidth; uint32 pixelHeight; uint32 pixelDepth; uint32 numberOfArrayElements; uint32 numberOfFaces; uint32 numberOfMipmapLevels; uint32 bytesOfKeyValueData; KtxHeader(); }; NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); struct KtxFile { KtxFile(); ~KtxFile(); void addKeyValue(const char * key, const char * value); private: KtxHeader header; Array keyArray; Array valueArray; }; NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file); /* for each keyValuePair that fits in bytesOfKeyValueData UInt32 keyAndValueByteSize Byte keyAndValue[keyAndValueByteSize] Byte valuePadding[3 - ((keyAndValueByteSize + 3) % 4)] end for each mipmap_level in numberOfMipmapLevels* UInt32 imageSize; for each array_element in numberOfArrayElements* for each face in numberOfFaces for each z_slice in pixelDepth* for each row or row_of_blocks in pixelHeight* for each pixel or block_of_pixels in pixelWidth Byte data[format-specific-number-of-bytes]** end end end Byte cubePadding[0-3] end end Byte mipPadding[3 - ((imageSize + 3) % 4)] end */ } // nv namespace #endif // NV_IMAGE_KTXFILE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp (revision 23380) @@ -1,787 +1,787 @@ // Branch-free implementation of half-precision (16 bit) floating point // Copyright 2006 Mike Acton // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), // to deal in the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE // // Half-precision floating point format // ------------------------------------ // // | Field | Last | First | Note // |----------|------|-------|---------- // | Sign | 15 | 15 | // | Exponent | 14 | 10 | Bias = 15 // | Mantissa | 9 | 0 | // // Compiling // --------- // // Preferred compile flags for GCC: // -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing // // This file is a C99 source file, intended to be compiled with a C99 // compliant compiler. However, for the moment it remains combatible // with C++98. Therefore if you are using a compiler that poorly implements // C standards (e.g. MSVC), it may be compiled as C++. This is not // guaranteed for future versions. // // Features // -------- // // * QNaN + = QNaN // * + +INF = +INF // * - -INF = -INF // * INF - INF = SNaN // * Denormalized values // * Difference of ZEROs is always +ZERO // * Sum round with guard + round + sticky bit (grs) // * And of course... no branching // // Precision of Sum // ---------------- // // (SUM) uint16 z = half_add( x, y ); // (DIFFERENCE) uint16 z = half_add( x, -y ); // // Will have exactly (0 ulps difference) the same result as: // (For 32 bit IEEE 784 floating point and same rounding mode) // // union FLOAT_32 // { // float f32; // uint32 u32; // }; // // union FLOAT_32 fx = { .u32 = half_to_float( x ) }; // union FLOAT_32 fy = { .u32 = half_to_float( y ) }; // union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32 }; // uint16 z = float_to_half( fz ); // #include "Half.h" #include // Load immediate static inline uint32 _uint32_li( uint32 a ) { return (a); } // Decrement static inline uint32 _uint32_dec( uint32 a ) { return (a - 1); } // Increment static inline uint32 _uint32_inc( uint32 a ) { return (a + 1); } // Complement static inline uint32 _uint32_not( uint32 a ) { return (~a); } // Negate static inline uint32 _uint32_neg( uint32 a ) { #pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned return (-a); #pragma warning(default : 4146) } // Extend sign static inline uint32 _uint32_ext( uint32 a ) { return (((int32)a)>>31); } // And static inline uint32 _uint32_and( uint32 a, uint32 b ) { return (a & b); } // And with Complement static inline uint32 _uint32_andc( uint32 a, uint32 b ) { return (a & ~b); } // Or static inline uint32 _uint32_or( uint32 a, uint32 b ) { return (a | b); } // Shift Right Logical static inline uint32 _uint32_srl( uint32 a, int sa ) { return (a >> sa); } // Shift Left Logical static inline uint32 _uint32_sll( uint32 a, int sa ) { return (a << sa); } // Add static inline uint32 _uint32_add( uint32 a, uint32 b ) { return (a + b); } // Subtract static inline uint32 _uint32_sub( uint32 a, uint32 b ) { return (a - b); } // Select on Sign bit static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b ) { const uint32 mask = _uint32_ext( test ); const uint32 sel_a = _uint32_and( a, mask ); const uint32 sel_b = _uint32_andc( b, mask ); const uint32 result = _uint32_or( sel_a, sel_b ); return (result); } // Load Immediate static inline uint16 _uint16_li( uint16 a ) { return (a); } // Extend sign static inline uint16 _uint16_ext( uint16 a ) { return (((int16)a)>>15); } // Negate static inline uint16 _uint16_neg( uint16 a ) { return (-a); } // Complement static inline uint16 _uint16_not( uint16 a ) { return (~a); } // Decrement static inline uint16 _uint16_dec( uint16 a ) { return (a - 1); } // Shift Left Logical static inline uint16 _uint16_sll( uint16 a, int sa ) { return (a << sa); } // Shift Right Logical static inline uint16 _uint16_srl( uint16 a, int sa ) { return (a >> sa); } // Add static inline uint16 _uint16_add( uint16 a, uint16 b ) { return (a + b); } // Subtract static inline uint16 _uint16_sub( uint16 a, uint16 b ) { return (a - b); } // And static inline uint16 _uint16_and( uint16 a, uint16 b ) { return (a & b); } // Or static inline uint16 _uint16_or( uint16 a, uint16 b ) { return (a | b); } // Exclusive Or static inline uint16 _uint16_xor( uint16 a, uint16 b ) { return (a ^ b); } // And with Complement static inline uint16 _uint16_andc( uint16 a, uint16 b ) { return (a & ~b); } // And then Shift Right Logical static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa ) { return ((a & b) >> sa); } // Shift Right Logical then Mask static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask ) { return ((a >> sa) & mask); } // Add then Mask static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask ) { return ((a + b) & mask); } // Select on Sign bit static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b ) { const uint16 mask = _uint16_ext( test ); const uint16 sel_a = _uint16_and( a, mask ); const uint16 sel_b = _uint16_andc( b, mask ); const uint16 result = _uint16_or( sel_a, sel_b ); return (result); } #if NV_OS_XBOX #include #elif NV_CC_MSVC #include #pragma intrinsic(_BitScanReverse) uint32 _uint32_nlz( uint32 x ) { unsigned long index; _BitScanReverse(&index, x); return 31 - index; } #endif // Count Leading Zeros static inline uint32 _uint32_cntlz( uint32 x ) { #if NV_CC_GCC /* On PowerPC, this will map to insn: cntlzw */ /* On Pentium, this will map to insn: clz */ uint32 is_x_nez_msb = _uint32_neg( x ); uint32 nlz = __builtin_clz( x ); uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); return (result); #elif NV_OS_XBOX // Xbox PPC has this as an intrinsic. return _CountLeadingZeros(x); #elif NV_CC_MSVC uint32 is_x_nez_msb = _uint32_neg( x ); uint32 nlz = _uint32_nlz( x ); uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); return (result); #else const uint32 x0 = _uint32_srl( x, 1 ); const uint32 x1 = _uint32_or( x, x0 ); const uint32 x2 = _uint32_srl( x1, 2 ); const uint32 x3 = _uint32_or( x1, x2 ); const uint32 x4 = _uint32_srl( x3, 4 ); const uint32 x5 = _uint32_or( x3, x4 ); const uint32 x6 = _uint32_srl( x5, 8 ); const uint32 x7 = _uint32_or( x5, x6 ); const uint32 x8 = _uint32_srl( x7, 16 ); const uint32 x9 = _uint32_or( x7, x8 ); const uint32 xA = _uint32_not( x9 ); const uint32 xB = _uint32_srl( xA, 1 ); const uint32 xC = _uint32_and( xB, 0x55555555 ); const uint32 xD = _uint32_sub( xA, xC ); const uint32 xE = _uint32_and( xD, 0x33333333 ); const uint32 xF = _uint32_srl( xD, 2 ); const uint32 x10 = _uint32_and( xF, 0x33333333 ); const uint32 x11 = _uint32_add( xE, x10 ); const uint32 x12 = _uint32_srl( x11, 4 ); const uint32 x13 = _uint32_add( x11, x12 ); const uint32 x14 = _uint32_and( x13, 0x0f0f0f0f ); const uint32 x15 = _uint32_srl( x14, 8 ); const uint32 x16 = _uint32_add( x14, x15 ); const uint32 x17 = _uint32_srl( x16, 16 ); const uint32 x18 = _uint32_add( x16, x17 ); const uint32 x19 = _uint32_and( x18, 0x0000003f ); return ( x19 ); #endif } // Count Leading Zeros static inline uint16 _uint16_cntlz( uint16 x ) { #ifdef __GNUC__ /* On PowerPC, this will map to insn: cntlzw */ /* On Pentium, this will map to insn: clz */ uint16 nlz32 = (uint16)_uint32_cntlz( (uint32)x ); uint32 nlz = _uint32_sub( nlz32, 16 ); return (nlz); #elif _NV_OS_XBOX_ uint16 nlz32 = (uint16)_CountLeadingZeros( (uint32)x ); return _uint32_sub( nlz32, 16); #else const uint16 x0 = _uint16_srl( x, 1 ); const uint16 x1 = _uint16_or( x, x0 ); const uint16 x2 = _uint16_srl( x1, 2 ); const uint16 x3 = _uint16_or( x1, x2 ); const uint16 x4 = _uint16_srl( x3, 4 ); const uint16 x5 = _uint16_or( x3, x4 ); const uint16 x6 = _uint16_srl( x5, 8 ); const uint16 x7 = _uint16_or( x5, x6 ); const uint16 x8 = _uint16_not( x7 ); const uint16 x9 = _uint16_srlm( x8, 1, 0x5555 ); const uint16 xA = _uint16_sub( x8, x9 ); const uint16 xB = _uint16_and( xA, 0x3333 ); const uint16 xC = _uint16_srlm( xA, 2, 0x3333 ); const uint16 xD = _uint16_add( xB, xC ); const uint16 xE = _uint16_srl( xD, 4 ); const uint16 xF = _uint16_addm( xD, xE, 0x0f0f ); const uint16 x10 = _uint16_srl( xF, 8 ); const uint16 x11 = _uint16_addm( xF, x10, 0x001f ); return ( x11 ); #endif } uint16 nv::half_from_float( uint32 f ) { const uint32 one = _uint32_li( 0x00000001 ); const uint32 f_s_mask = _uint32_li( 0x80000000 ); const uint32 f_e_mask = _uint32_li( 0x7f800000 ); const uint32 f_m_mask = _uint32_li( 0x007fffff ); const uint32 f_m_hidden_bit = _uint32_li( 0x00800000 ); const uint32 f_m_round_bit = _uint32_li( 0x00001000 ); const uint32 f_snan_mask = _uint32_li( 0x7fc00000 ); const uint32 f_e_pos = _uint32_li( 0x00000017 ); const uint32 h_e_pos = _uint32_li( 0x0000000a ); const uint32 h_e_mask = _uint32_li( 0x00007c00 ); const uint32 h_snan_mask = _uint32_li( 0x00007e00 ); const uint32 h_e_mask_value = _uint32_li( 0x0000001f ); const uint32 f_h_s_pos_offset = _uint32_li( 0x00000010 ); const uint32 f_h_bias_offset = _uint32_li( 0x00000070 ); const uint32 f_h_m_pos_offset = _uint32_li( 0x0000000d ); const uint32 h_nan_min = _uint32_li( 0x00007c01 ); const uint32 f_h_e_biased_flag = _uint32_li( 0x0000008f ); const uint32 f_s = _uint32_and( f, f_s_mask ); const uint32 f_e = _uint32_and( f, f_e_mask ); const uint16 h_s = _uint32_srl( f_s, f_h_s_pos_offset ); const uint32 f_m = _uint32_and( f, f_m_mask ); const uint16 f_e_amount = _uint32_srl( f_e, f_e_pos ); const uint32 f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset ); const uint32 f_snan = _uint32_and( f, f_snan_mask ); const uint32 f_m_round_mask = _uint32_and( f_m, f_m_round_bit ); const uint32 f_m_round_offset = _uint32_sll( f_m_round_mask, one ); const uint32 f_m_rounded = _uint32_add( f_m, f_m_round_offset ); const uint32 f_m_denorm_sa = _uint32_sub( one, f_e_half_bias ); const uint32 f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit ); const uint32 f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa ); const uint32 h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset ); const uint32 f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit ); const uint32 m_nan = _uint32_srl( f_m, f_h_m_pos_offset ); const uint32 h_em_nan = _uint32_or( h_e_mask, m_nan ); const uint32 h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias ); const uint32 h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos ); const uint32 h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos ); const uint32 h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset ); const uint32 h_em_norm = _uint32_or( h_e_norm, h_m_norm ); const uint32 is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount ); const uint32 is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias ); const uint32 is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb ); const uint32 is_f_m_eqz_msb = _uint32_dec( f_m ); const uint32 is_h_nan_eqz_msb = _uint32_dec( m_nan ); const uint32 is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb ); const uint32 is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb ); const uint32 is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias ); const uint32 is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb ); const uint32 is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask ); const uint32 is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow ); const uint32 is_f_snan_msb = _uint32_not( is_f_nsnan_msb ); const uint32 h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm ); const uint32 h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result ); const uint32 h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result ); const uint32 h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result ); const uint32 h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result ); const uint32 h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result ); const uint32 h_result = _uint32_or( h_s, h_em_snan_result ); return (uint16)(h_result); } uint32 nv::half_to_float( uint16 h ) { const uint32 h_e_mask = _uint32_li( 0x00007c00 ); const uint32 h_m_mask = _uint32_li( 0x000003ff ); const uint32 h_s_mask = _uint32_li( 0x00008000 ); const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 ); const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d ); const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 ); const uint32 f_e_mask = _uint32_li( 0x7f800000 ); const uint32 f_m_mask = _uint32_li( 0x007fffff ); const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e ); const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 ); const uint32 f_e_pos = _uint32_li( 0x00000017 ); const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff ); const uint32 h_e = _uint32_and( h, h_e_mask ); const uint32 h_m = _uint32_and( h, h_m_mask ); const uint32 h_s = _uint32_and( h, h_s_mask ); const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset ); const uint32 h_m_nlz = _uint32_cntlz( h_m ); const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset ); const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset ); const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset ); const uint32 f_em = _uint32_or( f_e, f_m ); const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias ); const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa ); const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa ); const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask ); const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos ); const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm ); const uint32 f_em_nan = _uint32_or( f_e_mask, f_m ); const uint32 is_e_eqz_msb = _uint32_dec( h_e ); const uint32 is_m_nez_msb = _uint32_neg( h_m ); const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e ); const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb ); const uint32 is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb ); const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb ); const uint32 is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb ); const uint32 is_zero = _uint32_ext( is_zero_msb ); const uint32 f_zero_result = _uint32_andc( f_em, is_zero ); const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result ); const uint32 f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result ); const uint32 f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result ); const uint32 f_result = _uint32_or( f_s, f_nan_result ); return (f_result); } #if !NV_OS_IOS && (defined(__i386__) || defined(__x86_64__)) #if NV_CC_GNUC #if defined(__i386__) || defined(__x86_64__) #include #endif #endif #include "nvcore/Memory.h" // NV_ALIGN_16 static __m128 half_to_float4_SSE2(__m128i h) { #define SSE_CONST4(name, val) static const NV_ALIGN_16 uint name[4] = { (val), (val), (val), (val) } #define CONST(name) *(const __m128i *)&name SSE_CONST4(mask_nosign, 0x7fff); SSE_CONST4(mask_justsign, 0x8000); SSE_CONST4(mask_shifted_exp, 0x7c00 << 13); SSE_CONST4(expadjust_normal, (127 - 15) << 23); SSE_CONST4(expadjust_infnan, (128 - 16) << 23); SSE_CONST4(expadjust_denorm, 1 << 23); SSE_CONST4(magic_denorm, 113 << 23); __m128i mnosign = CONST(mask_nosign); __m128i expmant = _mm_and_si128(mnosign, h); __m128i justsign = _mm_and_si128(h, CONST(mask_justsign)); __m128i mshiftexp = CONST(mask_shifted_exp); __m128i eadjust = CONST(expadjust_normal); __m128i shifted = _mm_slli_epi32(expmant, 13); __m128i adjusted = _mm_add_epi32(eadjust, shifted); __m128i justexp = _mm_and_si128(shifted, mshiftexp); __m128i zero = _mm_setzero_si128(); __m128i b_isinfnan = _mm_cmpeq_epi32(mshiftexp, justexp); __m128i b_isdenorm = _mm_cmpeq_epi32(zero, justexp); __m128i adj_infnan = _mm_and_si128(b_isinfnan, CONST(expadjust_infnan)); __m128i adjusted2 = _mm_add_epi32(adjusted, adj_infnan); __m128i adj_den = CONST(expadjust_denorm); __m128i den1 = _mm_add_epi32(adj_den, adjusted2); __m128 den2 = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm); __m128 adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm)); __m128 adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2)); __m128 adjusted5 = _mm_or_ps(adjusted3, adjusted4); __m128i sign = _mm_slli_epi32(justsign, 16); __m128 final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign)); // ~21 SSE2 ops. return final; #undef SSE_CONST4 #undef CONST } void nv::half_to_float_array_SSE2(const uint16 * vin, float * vout, int count) { nvDebugCheck((intptr_t(vin) & 15) == 0); nvDebugCheck((intptr_t(vout) & 15) == 0); nvDebugCheck((count & 7) == 0); __m128i zero = _mm_setzero_si128(); for (int i = 0; i < count; i += 8) { __m128i in = _mm_loadu_si128((const __m128i *)(vin + i)); __m128i a = _mm_unpacklo_epi16(in, zero); __m128i b = _mm_unpackhi_epi16(in, zero); __m128 outa = half_to_float4_SSE2(a); _mm_storeu_ps((float *)(vout + i), outa); __m128 outb = half_to_float4_SSE2(b); _mm_storeu_ps((float *)(vout + i + 4), outb); } } #endif // @@ These tables could be smaller. namespace nv { uint32 mantissa_table[2048] = { 0xDEADBEEF }; uint32 exponent_table[64]; uint32 offset_table[64]; } void nv::half_init_tables() { // Init mantissa table. mantissa_table[0] = 0; // denormals for (int i = 1; i < 1024; i++) { uint m = i << 13; uint e = 0; while ((m & 0x00800000) == 0) { e -= 0x00800000; m <<= 1; } m &= ~0x00800000; e += 0x38800000; mantissa_table[i] = m | e; } // normals for (int i = 1024; i < 2048; i++) { mantissa_table[i] = (i - 1024) << 13; } // Init exponent table. exponent_table[0] = 0; for (int i = 1; i < 31; i++) { exponent_table[i] = 0x38000000 + (i << 23); } exponent_table[31] = 0x7f800000; exponent_table[32] = 0x80000000; for (int i = 33; i < 63; i++) { exponent_table[i] = 0xb8000000 + ((i - 32) << 23); } exponent_table[63] = 0xff800000; // Init offset table. offset_table[0] = 0; for (int i = 1; i < 32; i++) { offset_table[i] = 1024; } offset_table[32] = 0; for (int i = 33; i < 64; i++) { offset_table[i] = 1024; } } // Fast half to float conversion based on: // http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf uint32 nv::fast_half_to_float(uint16 h) { // Initialize table if necessary. if (mantissa_table[0] != 0) half_init_tables(); uint exp = h >> 10; return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; } #if 0 // Inaccurate conversion suggested at the ffmpeg mailing list: // http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2009-July/068949.html uint32 nv::fast_half_to_float(uint16 v) { if (v & 0x8000) return 0; uint exp = v >> 10; if (!exp) return (v>>9)&1; if (exp >= 15) return 0xffff; v <<= 6; return (v+(1<<16)) >> (15-exp); } #endif #if 0 // Some more from a gamedev thread: // http://www.devmaster.net/forums/showthread.php?t=10924 // I believe it does not handle specials either. // Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though. static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000}; static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00}; static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF}; static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000}; __asm { movaps xmm1, xmm0 // Input in xmm0 movaps xmm2, xmm0 andps xmm0, half_sign andps xmm1, half_exponent andps xmm2, half_mantissa paddd xmm1, half_bias_offset pslld xmm0, 16 pslld xmm1, 13 pslld xmm2, 13 orps xmm1, xmm2 orps xmm0, xmm1 // Result in xmm0 } #endif #if 0 // These version computes the tables at compile time: // http://gamedev.stackexchange.com/questions/17326/conversion-of-a-number-from-single-precision-floating-point-representation-to-a /* This method is faster than the OpenEXR implementation (very often * used, eg. in Ogre), with the additional benefit of rounding, inspired - * by James Tursa’s half-precision code. */ + * by James Tursa's half-precision code. */ static inline uint16_t float_to_half_branch(uint32_t x) { uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */ uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */ unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */ /* If zero, or denormal, or exponent underflows too much for a denormal * half, return signed zero. */ if (e < 103) return bits; /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ if (e > 142) { bits |= 0x7c00u; /* If exponent was 0xff and one mantissa bit was set, it means NaN, * not Inf, so make sure we set one mantissa bit too. */ bits |= e == 255 && (x & 0x007fffffu); return bits; } /* If exponent underflows but not too much, return a denormal */ if (e < 113) { m |= 0x0800u; /* Extra rounding may overflow and set mantissa to 0 and exponent * to 1, which is OK. */ bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); return bits; } bits |= ((e - 112) << 10) | (m >> 1); /* Extra rounding. An overflow will set mantissa to 0 and increment * the exponent, which is OK. */ bits += m & 1; return bits; } /* These macros implement a finite iterator useful to build lookup * tables. For instance, S64(0) will call S1(x) for all values of x * between 0 and 63. * Due to the exponential behaviour of the calls, the stress on the * compiler may be important. */ #define S4(x) S1((x)), S1((x)+1), S1((x)+2), S1((x)+3) #define S16(x) S4((x)), S4((x)+4), S4((x)+8), S4((x)+12) #define S64(x) S16((x)), S16((x)+16), S16((x)+32), S16((x)+48) #define S256(x) S64((x)), S64((x)+64), S64((x)+128), S64((x)+192) #define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768) -/* Lookup table-based algorithm from “Fast Half Float Conversions” +/* Lookup table-based algorithm from "Fast Half Float Conversions" * by Jeroen van der Zijp, November 2008. No rounding is performed, * and some NaN values may be incorrectly converted to Inf. */ static inline uint16_t float_to_half_nobranch(uint32_t x) { static uint16_t const basetable[512] = { #define S1(i) (((i) < 103) ? 0x0000 : \ ((i) < 113) ? 0x0400 >> (113 - (i)) : \ ((i) < 143) ? ((i) - 112) << 10 : 0x7c00) S256(0), #undef S1 #define S1(i) (0x8000 | (((i) < 103) ? 0x0000 : \ ((i) < 113) ? 0x0400 >> (113 - (i)) : \ ((i) < 143) ? ((i) - 112) << 10 : 0x7c00)) S256(0), #undef S1 }; static uint8_t const shifttable[512] = { #define S1(i) (((i) < 103) ? 24 : \ ((i) < 113) ? 126 - (i) : \ ((i) < 143 || (i) == 255) ? 13 : 24) S256(0), S256(0), #undef S1 }; uint16_t bits = basetable[(x >> 23) & 0x1ff]; bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff]; return bits; } #endif Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.inl =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.inl (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.inl (revision 23380) @@ -1,1274 +1,1274 @@ // This code is in the public domain -- castanyo@yahoo.es #pragma once #ifndef NV_MATH_MATRIX_INL #define NV_MATH_MATRIX_INL #include "Matrix.h" namespace nv { inline Matrix3::Matrix3() {} inline Matrix3::Matrix3(float f) { for(int i = 0; i < 9; i++) { m_data[i] = f; } } inline Matrix3::Matrix3(identity_t) { for(int i = 0; i < 3; i++) { for(int j = 0; j < 3; j++) { m_data[3*j+i] = (i == j) ? 1.0f : 0.0f; } } } inline Matrix3::Matrix3(const Matrix3 & m) { for(int i = 0; i < 9; i++) { m_data[i] = m.m_data[i]; } } inline Matrix3::Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2) { m_data[0] = v0.x; m_data[1] = v0.y; m_data[2] = v0.z; m_data[3] = v1.x; m_data[4] = v1.y; m_data[5] = v1.z; m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z; } inline float Matrix3::data(uint idx) const { nvDebugCheck(idx < 9); return m_data[idx]; } inline float & Matrix3::data(uint idx) { nvDebugCheck(idx < 9); return m_data[idx]; } inline float Matrix3::get(uint row, uint col) const { nvDebugCheck(row < 3 && col < 3); return m_data[col * 3 + row]; } inline float Matrix3::operator()(uint row, uint col) const { nvDebugCheck(row < 3 && col < 3); return m_data[col * 3 + row]; } inline float & Matrix3::operator()(uint row, uint col) { nvDebugCheck(row < 3 && col < 3); return m_data[col * 3 + row]; } inline Vector3 Matrix3::row(uint i) const { nvDebugCheck(i < 3); return Vector3(get(i, 0), get(i, 1), get(i, 2)); } inline Vector3 Matrix3::column(uint i) const { nvDebugCheck(i < 3); return Vector3(get(0, i), get(1, i), get(2, i)); } inline void Matrix3::operator*=(float s) { for(int i = 0; i < 9; i++) { m_data[i] *= s; } } inline void Matrix3::operator/=(float s) { float is = 1.0f /s; for(int i = 0; i < 9; i++) { m_data[i] *= is; } } inline void Matrix3::operator+=(const Matrix3 & m) { for(int i = 0; i < 9; i++) { m_data[i] += m.m_data[i]; } } inline void Matrix3::operator-=(const Matrix3 & m) { for(int i = 0; i < 9; i++) { m_data[i] -= m.m_data[i]; } } inline Matrix3 operator+(const Matrix3 & a, const Matrix3 & b) { Matrix3 m = a; m += b; return m; } inline Matrix3 operator-(const Matrix3 & a, const Matrix3 & b) { Matrix3 m = a; m -= b; return m; } inline Matrix3 operator*(const Matrix3 & a, float s) { Matrix3 m = a; m *= s; return m; } inline Matrix3 operator*(float s, const Matrix3 & a) { Matrix3 m = a; m *= s; return m; } inline Matrix3 operator/(const Matrix3 & a, float s) { Matrix3 m = a; m /= s; return m; } inline Matrix3 mul(const Matrix3 & a, const Matrix3 & b) { Matrix3 m; for(int i = 0; i < 3; i++) { const float ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2); m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0); m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1); m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2); } return m; } inline Matrix3 operator*(const Matrix3 & a, const Matrix3 & b) { return mul(a, b); } // Transform the given 3d vector with the given matrix. inline Vector3 transform(const Matrix3 & m, const Vector3 & p) { return Vector3( p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2), p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2), p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2)); } inline void Matrix3::scale(float s) { for (int i = 0; i < 9; i++) { m_data[i] *= s; } } inline void Matrix3::scale(Vector3::Arg s) { m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; m_data[3] *= s.y; m_data[4] *= s.y; m_data[5] *= s.y; m_data[6] *= s.z; m_data[7] *= s.z; m_data[8] *= s.z; } inline float Matrix3::determinant() const { return get(0,0) * get(1,1) * get(2,2) + get(0,1) * get(1,2) * get(2,0) + get(0,2) * get(1,0) * get(2,1) - get(0,2) * get(1,1) * get(2,0) - get(0,1) * get(1,0) * get(2,2) - get(0,0) * get(1,2) * get(2,1); } // Inverse using Cramer's rule. inline Matrix3 inverseCramer(const Matrix3 & m) { const float det = m.determinant(); if (equal(det, 0.0f, 0.0f)) { return Matrix3(0); } Matrix3 r; r.data(0) = - m.data(5) * m.data(7) + m.data(4) * m.data(8); r.data(1) = + m.data(5) * m.data(6) - m.data(3) * m.data(8); r.data(2) = - m.data(4) * m.data(6) + m.data(3) * m.data(7); r.data(3) = + m.data(2) * m.data(7) - m.data(1) * m.data(8); r.data(4) = - m.data(2) * m.data(6) + m.data(0) * m.data(8); r.data(5) = + m.data(1) * m.data(6) - m.data(0) * m.data(7); r.data(6) = - m.data(2) * m.data(4) + m.data(1) * m.data(5); r.data(7) = + m.data(2) * m.data(3) - m.data(0) * m.data(5); r.data(8) = - m.data(1) * m.data(3) + m.data(0) * m.data(4); r.scale(1.0f / det); return r; } inline Matrix::Matrix() { } inline Matrix::Matrix(float f) { for(int i = 0; i < 16; i++) { m_data[i] = 0.0f; } } inline Matrix::Matrix(identity_t) { for(int i = 0; i < 4; i++) { for(int j = 0; j < 4; j++) { m_data[4*j+i] = (i == j) ? 1.0f : 0.0f; } } } inline Matrix::Matrix(const Matrix & m) { for(int i = 0; i < 16; i++) { m_data[i] = m.m_data[i]; } } inline Matrix::Matrix(const Matrix3 & m) { for(int i = 0; i < 3; i++) { for(int j = 0; j < 3; j++) { operator()(i, j) = m.get(i, j); } } for(int i = 0; i < 4; i++) { operator()(3, i) = 0; operator()(i, 3) = 0; } } inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3) { m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w; m_data[ 4] = v1.x; m_data[ 5] = v1.y; m_data[ 6] = v1.z; m_data[ 7] = v1.w; m_data[ 8] = v2.x; m_data[ 9] = v2.y; m_data[10] = v2.z; m_data[11] = v2.w; m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w; } /*inline Matrix::Matrix(const float m[]) { for(int i = 0; i < 16; i++) { m_data[i] = m[i]; } }*/ // Accessors inline float Matrix::data(uint idx) const { nvDebugCheck(idx < 16); return m_data[idx]; } inline float & Matrix::data(uint idx) { nvDebugCheck(idx < 16); return m_data[idx]; } inline float Matrix::get(uint row, uint col) const { nvDebugCheck(row < 4 && col < 4); return m_data[col * 4 + row]; } inline float Matrix::operator()(uint row, uint col) const { nvDebugCheck(row < 4 && col < 4); return m_data[col * 4 + row]; } inline float & Matrix::operator()(uint row, uint col) { nvDebugCheck(row < 4 && col < 4); return m_data[col * 4 + row]; } inline const float * Matrix::ptr() const { return m_data; } inline Vector4 Matrix::row(uint i) const { nvDebugCheck(i < 4); return Vector4(get(i, 0), get(i, 1), get(i, 2), get(i, 3)); } inline Vector4 Matrix::column(uint i) const { nvDebugCheck(i < 4); return Vector4(get(0, i), get(1, i), get(2, i), get(3, i)); } inline void Matrix::zero() { m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0; m_data[4] = 0; m_data[5] = 0; m_data[6] = 0; m_data[7] = 0; m_data[8] = 0; m_data[9] = 0; m_data[10] = 0; m_data[11] = 0; m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 0; } inline void Matrix::identity() { m_data[0] = 1; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0; m_data[4] = 0; m_data[5] = 1; m_data[6] = 0; m_data[7] = 0; m_data[8] = 0; m_data[9] = 0; m_data[10] = 1; m_data[11] = 0; m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 1; } // Apply scale. inline void Matrix::scale(float s) { m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s; m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s; m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s; m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s; } // Apply scale. inline void Matrix::scale(Vector3::Arg s) { m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; m_data[3] *= s.x; m_data[4] *= s.y; m_data[5] *= s.y; m_data[6] *= s.y; m_data[7] *= s.y; m_data[8] *= s.z; m_data[9] *= s.z; m_data[10] *= s.z; m_data[11] *= s.z; } // Apply translation. inline void Matrix::translate(Vector3::Arg t) { m_data[12] = m_data[0] * t.x + m_data[4] * t.y + m_data[8] * t.z + m_data[12]; m_data[13] = m_data[1] * t.x + m_data[5] * t.y + m_data[9] * t.z + m_data[13]; m_data[14] = m_data[2] * t.x + m_data[6] * t.y + m_data[10] * t.z + m_data[14]; m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15]; } Matrix rotation(float theta, float v0, float v1, float v2); // Apply rotation. inline void Matrix::rotate(float theta, float v0, float v1, float v2) { Matrix R(rotation(theta, v0, v1, v2)); apply(R); } // Apply transform. inline void Matrix::apply(Matrix::Arg m) { nvDebugCheck(this != &m); for(int i = 0; i < 4; i++) { const float ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3); m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0); m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1); m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2); m_data[12+ i] = ai0 * m(0,3) + ai1 * m(1,3) + ai2 * m(2,3) + ai3 * m(3,3); } } // Get scale matrix. inline Matrix scale(Vector3::Arg s) { Matrix m(identity); m(0,0) = s.x; m(1,1) = s.y; m(2,2) = s.z; return m; } // Get scale matrix. inline Matrix scale(float s) { Matrix m(identity); m(0,0) = m(1,1) = m(2,2) = s; return m; } // Get translation matrix. inline Matrix translation(Vector3::Arg t) { Matrix m(identity); m(0,3) = t.x; m(1,3) = t.y; m(2,3) = t.z; return m; } // Get rotation matrix. inline Matrix rotation(float theta, float v0, float v1, float v2) { float cost = cosf(theta); float sint = sinf(theta); Matrix m(identity); if( 1 == v0 && 0 == v1 && 0 == v2 ) { m(1,1) = cost; m(2,1) = -sint; m(1,2) = sint; m(2,2) = cost; } else if( 0 == v0 && 1 == v1 && 0 == v2 ) { m(0,0) = cost; m(2,0) = sint; m(1,2) = -sint; m(2,2) = cost; } else if( 0 == v0 && 0 == v1 && 1 == v2 ) { m(0,0) = cost; m(1,0) = -sint; m(0,1) = sint; m(1,1) = cost; } else { float a2, b2, c2; a2 = v0 * v0; b2 = v1 * v1; c2 = v2 * v2; float iscale = 1.0f / sqrtf(a2 + b2 + c2); v0 *= iscale; v1 *= iscale; v2 *= iscale; float abm, acm, bcm; float mcos, asin, bsin, csin; mcos = 1.0f - cost; abm = v0 * v1 * mcos; acm = v0 * v2 * mcos; bcm = v1 * v2 * mcos; asin = v0 * sint; bsin = v1 * sint; csin = v2 * sint; m(0,0) = a2 * mcos + cost; m(1,0) = abm - csin; m(2,0) = acm + bsin; m(3,0) = abm + csin; m(1,1) = b2 * mcos + cost; m(2,1) = bcm - asin; m(3,1) = acm - bsin; m(1,2) = bcm + asin; m(2,2) = c2 * mcos + cost; } return m; } //Matrix rotation(float yaw, float pitch, float roll); //Matrix skew(float angle, Vector3::Arg v1, Vector3::Arg v2); // Get frustum matrix. inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar) { Matrix m(0.0f); float doubleznear = 2.0f * zNear; float one_deltax = 1.0f / (xmax - xmin); float one_deltay = 1.0f / (ymax - ymin); float one_deltaz = 1.0f / (zFar - zNear); m(0,0) = doubleznear * one_deltax; m(1,1) = doubleznear * one_deltay; m(0,2) = (xmax + xmin) * one_deltax; m(1,2) = (ymax + ymin) * one_deltay; m(2,2) = -(zFar + zNear) * one_deltaz; m(3,2) = -1.0f; m(2,3) = -(zFar * doubleznear) * one_deltaz; return m; } // Get inverse frustum matrix. inline Matrix frustumInverse(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar) { Matrix m(0.0f); float one_doubleznear = 1.0f / (2.0f * zNear); float one_doubleznearzfar = 1.0f / (2.0f * zNear * zFar); m(0,0) = (xmax - xmin) * one_doubleznear; m(0,3) = (xmax + xmin) * one_doubleznear; m(1,1) = (ymax - ymin) * one_doubleznear; m(1,3) = (ymax + ymin) * one_doubleznear; m(2,3) = -1; m(3,2) = -(zFar - zNear) * one_doubleznearzfar; m(3,3) = (zFar + zNear) * one_doubleznearzfar; return m; } // Get infinite frustum matrix. inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear) { Matrix m(0.0f); float doubleznear = 2.0f * zNear; float one_deltax = 1.0f / (xmax - xmin); float one_deltay = 1.0f / (ymax - ymin); float nudge = 1.0; // 0.999; m(0,0) = doubleznear * one_deltax; m(1,1) = doubleznear * one_deltay; m(0,2) = (xmax + xmin) * one_deltax; m(1,2) = (ymax + ymin) * one_deltay; m(2,2) = -1.0f * nudge; m(3,2) = -1.0f; m(2,3) = -doubleznear * nudge; return m; } // Get perspective matrix. inline Matrix perspective(float fovy, float aspect, float zNear, float zFar) { float xmax = zNear * tan(fovy / 2); float xmin = -xmax; float ymax = xmax / aspect; float ymin = -ymax; return frustum(xmin, xmax, ymin, ymax, zNear, zFar); } // Get inverse perspective matrix. inline Matrix perspectiveInverse(float fovy, float aspect, float zNear, float zFar) { float xmax = zNear * tan(fovy / 2); float xmin = -xmax; float ymax = xmax / aspect; float ymin = -ymax; return frustumInverse(xmin, xmax, ymin, ymax, zNear, zFar); } // Get infinite perspective matrix. inline Matrix perspective(float fovy, float aspect, float zNear) { float x = zNear * tan(fovy / 2); float y = x / aspect; return frustum( -x, x, -y, y, zNear ); } // Get matrix determinant. inline float Matrix::determinant() const { return m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] + m_data[2] * m_data[5] * m_data[11] * m_data[12] - m_data[1] * m_data[6] * m_data[11] * m_data[12] - m_data[3] * m_data[6] * m_data[ 8] * m_data[13] + m_data[2] * m_data[7] * m_data[ 8] * m_data[13] + m_data[3] * m_data[4] * m_data[10] * m_data[13] - m_data[0] * m_data[7] * m_data[10] * m_data[13] - m_data[2] * m_data[4] * m_data[11] * m_data[13] + m_data[0] * m_data[6] * m_data[11] * m_data[13] + m_data[3] * m_data[5] * m_data[ 8] * m_data[14] - m_data[1] * m_data[7] * m_data[ 8] * m_data[14] - m_data[3] * m_data[4] * m_data[ 9] * m_data[14] + m_data[0] * m_data[7] * m_data[ 9] * m_data[14] + m_data[1] * m_data[4] * m_data[11] * m_data[14] - m_data[0] * m_data[5] * m_data[11] * m_data[14] - m_data[2] * m_data[5] * m_data[ 8] * m_data[15] + m_data[1] * m_data[6] * m_data[ 8] * m_data[15] + m_data[2] * m_data[4] * m_data[ 9] * m_data[15] - m_data[0] * m_data[6] * m_data[ 9] * m_data[15] - m_data[1] * m_data[4] * m_data[10] * m_data[15] + m_data[0] * m_data[5] * m_data[10] * m_data[15]; } inline Matrix transpose(Matrix::Arg m) { Matrix r; for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { r(i, j) = m(j, i); } } return r; } // Inverse using Cramer's rule. inline Matrix inverseCramer(Matrix::Arg m) { Matrix r; r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15); r.data( 1) = m.data(3)*m.data(10)*m.data(13) - m.data(2)*m.data(11)*m.data(13) - m.data(3)*m.data(9)*m.data(14) + m.data(1)*m.data(11)*m.data(14) + m.data(2)*m.data(9)*m.data(15) - m.data(1)*m.data(10)*m.data(15); r.data( 2) = m.data(2)*m.data( 7)*m.data(13) - m.data(3)*m.data( 6)*m.data(13) + m.data(3)*m.data(5)*m.data(14) - m.data(1)*m.data( 7)*m.data(14) - m.data(2)*m.data(5)*m.data(15) + m.data(1)*m.data( 6)*m.data(15); r.data( 3) = m.data(3)*m.data( 6)*m.data( 9) - m.data(2)*m.data( 7)*m.data( 9) - m.data(3)*m.data(5)*m.data(10) + m.data(1)*m.data( 7)*m.data(10) + m.data(2)*m.data(5)*m.data(11) - m.data(1)*m.data( 6)*m.data(11); r.data( 4) = m.data(7)*m.data(10)*m.data(12) - m.data(6)*m.data(11)*m.data(12) - m.data(7)*m.data(8)*m.data(14) + m.data(4)*m.data(11)*m.data(14) + m.data(6)*m.data(8)*m.data(15) - m.data(4)*m.data(10)*m.data(15); r.data( 5) = m.data(2)*m.data(11)*m.data(12) - m.data(3)*m.data(10)*m.data(12) + m.data(3)*m.data(8)*m.data(14) - m.data(0)*m.data(11)*m.data(14) - m.data(2)*m.data(8)*m.data(15) + m.data(0)*m.data(10)*m.data(15); r.data( 6) = m.data(3)*m.data( 6)*m.data(12) - m.data(2)*m.data( 7)*m.data(12) - m.data(3)*m.data(4)*m.data(14) + m.data(0)*m.data( 7)*m.data(14) + m.data(2)*m.data(4)*m.data(15) - m.data(0)*m.data( 6)*m.data(15); r.data( 7) = m.data(2)*m.data( 7)*m.data( 8) - m.data(3)*m.data( 6)*m.data( 8) + m.data(3)*m.data(4)*m.data(10) - m.data(0)*m.data( 7)*m.data(10) - m.data(2)*m.data(4)*m.data(11) + m.data(0)*m.data( 6)*m.data(11); r.data( 8) = m.data(5)*m.data(11)*m.data(12) - m.data(7)*m.data( 9)*m.data(12) + m.data(7)*m.data(8)*m.data(13) - m.data(4)*m.data(11)*m.data(13) - m.data(5)*m.data(8)*m.data(15) + m.data(4)*m.data( 9)*m.data(15); r.data( 9) = m.data(3)*m.data( 9)*m.data(12) - m.data(1)*m.data(11)*m.data(12) - m.data(3)*m.data(8)*m.data(13) + m.data(0)*m.data(11)*m.data(13) + m.data(1)*m.data(8)*m.data(15) - m.data(0)*m.data( 9)*m.data(15); r.data(10) = m.data(1)*m.data( 7)*m.data(12) - m.data(3)*m.data( 5)*m.data(12) + m.data(3)*m.data(4)*m.data(13) - m.data(0)*m.data( 7)*m.data(13) - m.data(1)*m.data(4)*m.data(15) + m.data(0)*m.data( 5)*m.data(15); r.data(11) = m.data(3)*m.data( 5)*m.data( 8) - m.data(1)*m.data( 7)*m.data( 8) - m.data(3)*m.data(4)*m.data( 9) + m.data(0)*m.data( 7)*m.data( 9) + m.data(1)*m.data(4)*m.data(11) - m.data(0)*m.data( 5)*m.data(11); r.data(12) = m.data(6)*m.data( 9)*m.data(12) - m.data(5)*m.data(10)*m.data(12) - m.data(6)*m.data(8)*m.data(13) + m.data(4)*m.data(10)*m.data(13) + m.data(5)*m.data(8)*m.data(14) - m.data(4)*m.data( 9)*m.data(14); r.data(13) = m.data(1)*m.data(10)*m.data(12) - m.data(2)*m.data( 9)*m.data(12) + m.data(2)*m.data(8)*m.data(13) - m.data(0)*m.data(10)*m.data(13) - m.data(1)*m.data(8)*m.data(14) + m.data(0)*m.data( 9)*m.data(14); r.data(14) = m.data(2)*m.data( 5)*m.data(12) - m.data(1)*m.data( 6)*m.data(12) - m.data(2)*m.data(4)*m.data(13) + m.data(0)*m.data( 6)*m.data(13) + m.data(1)*m.data(4)*m.data(14) - m.data(0)*m.data( 5)*m.data(14); r.data(15) = m.data(1)*m.data( 6)*m.data( 8) - m.data(2)*m.data( 5)*m.data( 8) + m.data(2)*m.data(4)*m.data( 9) - m.data(0)*m.data( 6)*m.data( 9) - m.data(1)*m.data(4)*m.data(10) + m.data(0)*m.data( 5)*m.data(10); r.scale(1.0f / m.determinant()); return r; } inline Matrix isometryInverse(Matrix::Arg m) { Matrix r(identity); // transposed 3x3 upper left matrix for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { r(i, j) = m(j, i); } } // translate by the negative offsets r.translate(-Vector3(m.data(12), m.data(13), m.data(14))); return r; } // Transform the given 3d point with the given matrix. inline Vector3 transformPoint(Matrix::Arg m, Vector3::Arg p) { return Vector3( p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + m(0,3), p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + m(1,3), p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + m(2,3)); } // Transform the given 3d vector with the given matrix. inline Vector3 transformVector(Matrix::Arg m, Vector3::Arg p) { return Vector3( p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2), p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2), p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2)); } // Transform the given 4d vector with the given matrix. inline Vector4 transform(Matrix::Arg m, Vector4::Arg p) { return Vector4( p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + p.w * m(0,3), p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + p.w * m(1,3), p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + p.w * m(2,3), p.x * m(3,0) + p.y * m(3,1) + p.z * m(3,2) + p.w * m(3,3)); } inline Matrix mul(Matrix::Arg a, Matrix::Arg b) { // @@ Is this the right order? mul(a, b) = b * a Matrix m = a; m.apply(b); return m; } inline void Matrix::operator+=(const Matrix & m) { for(int i = 0; i < 16; i++) { m_data[i] += m.m_data[i]; } } inline void Matrix::operator-=(const Matrix & m) { for(int i = 0; i < 16; i++) { m_data[i] -= m.m_data[i]; } } inline Matrix operator+(const Matrix & a, const Matrix & b) { Matrix m = a; m += b; return m; } inline Matrix operator-(const Matrix & a, const Matrix & b) { Matrix m = a; m -= b; return m; } } // nv namespace #if 0 // old code. /** @name Special matrices. */ //@{ /** Generate a translation matrix. */ void TranslationMatrix(const Vec3 & v) { data[0] = 1; data[1] = 0; data[2] = 0; data[3] = 0; data[4] = 0; data[5] = 1; data[6] = 0; data[7] = 0; data[8] = 0; data[9] = 0; data[10] = 1; data[11] = 0; data[12] = v.x; data[13] = v.y; data[14] = v.z; data[15] = 1; } /** Rotate theta degrees around v. */ void RotationMatrix( float theta, float v0, float v1, float v2 ) { float cost = cos(theta); float sint = sin(theta); if( 1 == v0 && 0 == v1 && 0 == v2 ) { data[0] = 1.0f; data[1] = 0.0f; data[2] = 0.0f; data[3] = 0.0f; data[4] = 0.0f; data[5] = cost; data[6] = -sint;data[7] = 0.0f; data[8] = 0.0f; data[9] = sint; data[10] = cost;data[11] = 0.0f; data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; } else if( 0 == v0 && 1 == v1 && 0 == v2 ) { data[0] = cost; data[1] = 0.0f; data[2] = sint; data[3] = 0.0f; data[4] = 0.0f; data[5] = 1.0f; data[6] = 0.0f; data[7] = 0.0f; data[8] = -sint;data[9] = 0.0f;data[10] = cost; data[11] = 0.0f; data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; } else if( 0 == v0 && 0 == v1 && 1 == v2 ) { data[0] = cost; data[1] = -sint;data[2] = 0.0f; data[3] = 0.0f; data[4] = sint; data[5] = cost; data[6] = 0.0f; data[7] = 0.0f; data[8] = 0.0f; data[9] = 0.0f; data[10] = 1.0f;data[11] = 0.0f; data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f; } else { //we need scale a,b,c to unit length. float a2, b2, c2; a2 = v0 * v0; b2 = v1 * v1; c2 = v2 * v2; float iscale = 1.0f / sqrtf(a2 + b2 + c2); v0 *= iscale; v1 *= iscale; v2 *= iscale; float abm, acm, bcm; float mcos, asin, bsin, csin; mcos = 1.0f - cost; abm = v0 * v1 * mcos; acm = v0 * v2 * mcos; bcm = v1 * v2 * mcos; asin = v0 * sint; bsin = v1 * sint; csin = v2 * sint; data[0] = a2 * mcos + cost; data[1] = abm - csin; data[2] = acm + bsin; data[3] = abm + csin; data[4] = 0.0f; data[5] = b2 * mcos + cost; data[6] = bcm - asin; data[7] = acm - bsin; data[8] = 0.0f; data[9] = bcm + asin; data[10] = c2 * mcos + cost; data[11] = 0.0f; data[12] = 0.0f; data[13] = 0.0f; data[14] = 0.0f; data[15] = 1.0f; } } /* void SkewMatrix(float angle, const Vec3 & v1, const Vec3 & v2) { v1.Normalize(); v2.Normalize(); Vec3 v3; v3.Cross(v1, v2); v3.Normalize(); // Get skew factor. float costheta = Vec3DotProduct(v1, v2); float sintheta = Real.Sqrt(1 - costheta * costheta); float skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta; // Build orthonormal matrix. v1 = FXVector3.Cross(v3, v2); v1.Normalize(); Matrix R = Matrix::Identity; -R[0, 0] = v3.X; // Not sure this is in the correct order... +R[0, 0] = v3.X; // Not sure this is in the correct order... R[1, 0] = v3.Y; R[2, 0] = v3.Z; R[0, 1] = v1.X; R[1, 1] = v1.Y; R[2, 1] = v1.Z; R[0, 2] = v2.X; R[1, 2] = v2.Y; R[2, 2] = v2.Z; // Build skew matrix. Matrix S = Matrix::Identity; S[2, 1] = -skew; // Return skew transform. return R * S * R.Transpose; // Not sure this is in the correct order... } */ /** * Generate rotation matrix for the euler angles. This is the same as computing * 3 rotation matrices and multiplying them together in our custom order. * * @todo Have to recompute this code for our new convention. **/ void RotationMatrix( float yaw, float pitch, float roll ) { float sy = sin(yaw+ToRadian(90)); float cy = cos(yaw+ToRadian(90)); float sp = sin(pitch-ToRadian(90)); float cp = cos(pitch-ToRadian(90)); float sr = sin(roll); float cr = cos(roll); data[0] = cr*cy + sr*sp*sy; data[1] = cp*sy; data[2] = -sr*cy + cr*sp*sy; data[3] = 0; data[4] = -cr*sy + sr*sp*cy; data[5] = cp*cy; data[6] = sr*sy + cr*sp*cy; data[7] = 0; data[8] = sr*cp; data[9] = -sp; data[10] = cr*cp; data[11] = 0; data[12] = 0; data[13] = 0; data[14] = 0; data[15] = 1; } /** Create a frustum matrix with the far plane at the infinity. */ void Frustum( float xmin, float xmax, float ymin, float ymax, float zNear, float zFar ) { float one_deltax, one_deltay, one_deltaz, doubleznear; doubleznear = 2.0f * zNear; one_deltax = 1.0f / (xmax - xmin); one_deltay = 1.0f / (ymax - ymin); one_deltaz = 1.0f / (zFar - zNear); data[0] = (float)(doubleznear * one_deltax); data[1] = 0.0f; data[2] = 0.0f; data[3] = 0.0f; data[4] = 0.0f; data[5] = (float)(doubleznear * one_deltay); data[6] = 0.f; data[7] = 0.f; data[8] = (float)((xmax + xmin) * one_deltax); data[9] = (float)((ymax + ymin) * one_deltay); data[10] = (float)(-(zFar + zNear) * one_deltaz); data[11] = -1.f; data[12] = 0.f; data[13] = 0.f; data[14] = (float)(-(zFar * doubleznear) * one_deltaz); data[15] = 0.f; } /** Create a frustum matrix with the far plane at the infinity. */ void FrustumInf( float xmin, float xmax, float ymin, float ymax, float zNear ) { float one_deltax, one_deltay, doubleznear, nudge; doubleznear = 2.0f * zNear; one_deltax = 1.0f / (xmax - xmin); one_deltay = 1.0f / (ymax - ymin); nudge = 1.0; // 0.999; data[0] = doubleznear * one_deltax; data[1] = 0.0f; data[2] = 0.0f; data[3] = 0.0f; data[4] = 0.0f; data[5] = doubleznear * one_deltay; data[6] = 0.f; data[7] = 0.f; data[8] = (xmax + xmin) * one_deltax; data[9] = (ymax + ymin) * one_deltay; data[10] = -1.0f * nudge; data[11] = -1.0f; data[12] = 0.f; data[13] = 0.f; data[14] = -doubleznear * nudge; data[15] = 0.f; } /** Create an inverse frustum matrix with the far plane at the infinity. */ void FrustumInfInv( float left, float right, float bottom, float top, float zNear ) { // this matrix is wrong (not tested floatly) I think it should be transposed. data[0] = (right - left) / (2 * zNear); data[1] = 0; data[2] = 0; data[3] = (right + left) / (2 * zNear); data[4] = 0; data[5] = (top - bottom) / (2 * zNear); data[6] = 0; data[7] = (top + bottom) / (2 * zNear); data[8] = 0; data[9] = 0; data[10] = 0; data[11] = -1; data[12] = 0; data[13] = 0; data[14] = -1 / (2 * zNear); data[15] = 1 / (2 * zNear); } /** Create an homogeneous projection matrix. */ void Perspective( float fov, float aspect, float zNear, float zFar ) { float xmin, xmax, ymin, ymax; xmax = zNear * tan( fov/2 ); xmin = -xmax; ymax = xmax / aspect; ymin = -ymax; Frustum(xmin, xmax, ymin, ymax, zNear, zFar); } /** Create a projection matrix with the far plane at the infinity. */ void PerspectiveInf( float fov, float aspect, float zNear ) { float x = zNear * tan( fov/2 ); float y = x / aspect; FrustumInf( -x, x, -y, y, zNear ); } /** Create an inverse projection matrix with far plane at the infinity. */ void PerspectiveInfInv( float fov, float aspect, float zNear ) { float x = zNear * tan( fov/2 ); float y = x / aspect; FrustumInfInv( -x, x, -y, y, zNear ); } /** Build bone matrix from quatertion and offset. */ void BoneMatrix(const Quat & q, const Vec3 & offset) { float x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz; // calculate coefficients x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z; xx = q.x * x2; xy = q.x * y2; xz = q.x * z2; yy = q.y * y2; yz = q.y * z2; zz = q.z * z2; wx = q.w * x2; wy = q.w * y2; wz = q.w * z2; data[0] = 1.0f - (yy + zz); data[1] = xy - wz; data[2] = xz + wy; data[3] = 0.0f; data[4] = xy + wz; data[5] = 1.0f - (xx + zz); data[6] = yz - wx; data[7] = 0.0f; data[8] = xz - wy; data[9] = yz + wx; data[10] = 1.0f - (xx + yy); data[11] = 0.0f; data[12] = offset.x; data[13] = offset.y; data[14] = offset.z; data[15] = 1.0f; } //@} /** @name Transformations: */ //@{ /** Apply a general scale. */ void Scale( float x, float y, float z ) { data[0] *= x; data[4] *= y; data[8] *= z; data[1] *= x; data[5] *= y; data[9] *= z; data[2] *= x; data[6] *= y; data[10] *= z; data[3] *= x; data[7] *= y; data[11] *= z; } /** Apply a rotation of theta degrees around the axis v*/ void Rotate( float theta, const Vec3 & v ) { Matrix b; b.RotationMatrix( theta, v[0], v[1], v[2] ); Multiply4x3( b ); } /** Apply a rotation of theta degrees around the axis v*/ void Rotate( float theta, float v0, float v1, float v2 ) { Matrix b; b.RotationMatrix( theta, v0, v1, v2 ); Multiply4x3( b ); } /** * Translate the matrix by t. This is the same as multiplying by a * translation matrix with the given offset. * this = T * this */ void Translate( const Vec3 &t ) { data[12] = data[0] * t.x + data[4] * t.y + data[8] * t.z + data[12]; data[13] = data[1] * t.x + data[5] * t.y + data[9] * t.z + data[13]; data[14] = data[2] * t.x + data[6] * t.y + data[10] * t.z + data[14]; data[15] = data[3] * t.x + data[7] * t.y + data[11] * t.z + data[15]; } /** * Translate the matrix by x, y, z. This is the same as multiplying by a * translation matrix with the given offsets. */ void Translate( float x, float y, float z ) { data[12] = data[0] * x + data[4] * y + data[8] * z + data[12]; data[13] = data[1] * x + data[5] * y + data[9] * z + data[13]; data[14] = data[2] * x + data[6] * y + data[10] * z + data[14]; data[15] = data[3] * x + data[7] * y + data[11] * z + data[15]; } /** Compute the transposed matrix. */ void Transpose() { piSwap(data[1], data[4]); piSwap(data[2], data[8]); piSwap(data[6], data[9]); piSwap(data[3], data[12]); piSwap(data[7], data[13]); piSwap(data[11], data[14]); } /** Compute the inverse of a rigid-body/isometry/orthonormal matrix. */ void IsometryInverse() { // transposed 3x3 upper left matrix piSwap(data[1], data[4]); piSwap(data[2], data[8]); piSwap(data[6], data[9]); // translate by the negative offsets Vec3 v(-data[12], -data[13], -data[14]); data[12] = data[13] = data[14] = 0; Translate(v); } /** Compute the inverse of the affine portion of this matrix. */ void AffineInverse() { data[12] = data[13] = data[14] = 0; Transpose(); } //@} /** @name Matrix operations: */ //@{ /** Return the determinant of this matrix. */ float Determinant() const { return data[0] * data[5] * data[10] * data[15] + data[1] * data[6] * data[11] * data[12] + data[2] * data[7] * data[ 8] * data[13] + data[3] * data[4] * data[ 9] * data[14] - data[3] * data[6] * data[ 9] * data[12] - data[2] * data[5] * data[ 8] * data[15] - data[1] * data[4] * data[11] * data[14] - data[0] * data[7] * data[10] * data[12]; } /** Standard matrix product: this *= B. */ void Multiply4x4( const Matrix & restrict B ) { Multiply4x4(*this, B); } /** Standard matrix product: this = A * B. this != B*/ void Multiply4x4( const Matrix & A, const Matrix & restrict B ) { piDebugCheck(this != &B); for(int i = 0; i < 4; i++) { const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); } /* Unrolled but does not allow this == A data[0] = A.data[0] * B.data[0] + A.data[4] * B.data[1] + A.data[8] * B.data[2] + A.data[12] * B.data[3]; data[1] = A.data[1] * B.data[0] + A.data[5] * B.data[1] + A.data[9] * B.data[2] + A.data[13] * B.data[3]; data[2] = A.data[2] * B.data[0] + A.data[6] * B.data[1] + A.data[10] * B.data[2] + A.data[14] * B.data[3]; data[3] = A.data[3] * B.data[0] + A.data[7] * B.data[1] + A.data[11] * B.data[2] + A.data[15] * B.data[3]; data[4] = A.data[0] * B.data[4] + A.data[4] * B.data[5] + A.data[8] * B.data[6] + A.data[12] * B.data[7]; data[5] = A.data[1] * B.data[4] + A.data[5] * B.data[5] + A.data[9] * B.data[6] + A.data[13] * B.data[7]; data[6] = A.data[2] * B.data[4] + A.data[6] * B.data[5] + A.data[10] * B.data[6] + A.data[14] * B.data[7]; data[7] = A.data[3] * B.data[4] + A.data[7] * B.data[5] + A.data[11] * B.data[6] + A.data[15] * B.data[7]; data[8] = A.data[0] * B.data[8] + A.data[4] * B.data[9] + A.data[8] * B.data[10] + A.data[12] * B.data[11]; data[9] = A.data[1] * B.data[8] + A.data[5] * B.data[9] + A.data[9] * B.data[10] + A.data[13] * B.data[11]; data[10]= A.data[2] * B.data[8] + A.data[6] * B.data[9] + A.data[10] * B.data[10] + A.data[14] * B.data[11]; data[11]= A.data[3] * B.data[8] + A.data[7] * B.data[9] + A.data[11] * B.data[10] + A.data[15] * B.data[11]; data[12]= A.data[0] * B.data[12] + A.data[4] * B.data[13] + A.data[8] * B.data[14] + A.data[12] * B.data[15]; data[13]= A.data[1] * B.data[12] + A.data[5] * B.data[13] + A.data[9] * B.data[14] + A.data[13] * B.data[15]; data[14]= A.data[2] * B.data[12] + A.data[6] * B.data[13] + A.data[10] * B.data[14] + A.data[14] * B.data[15]; data[15]= A.data[3] * B.data[12] + A.data[7] * B.data[13] + A.data[11] * B.data[14] + A.data[15] * B.data[15]; */ } /** Standard matrix product: this *= B. */ void Multiply4x3( const Matrix & restrict B ) { Multiply4x3(*this, B); } /** Standard product of matrices, where the last row is [0 0 0 1]. */ void Multiply4x3( const Matrix & A, const Matrix & restrict B ) { piDebugCheck(this != &B); for(int i = 0; i < 3; i++) { const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3); GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0); GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1); GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2); GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3); } data[3] = 0.0f; data[7] = 0.0f; data[11] = 0.0f; data[15] = 1.0f; /* Unrolled but does not allow this == A data[0] = a.data[0] * b.data[0] + a.data[4] * b.data[1] + a.data[8] * b.data[2] + a.data[12] * b.data[3]; data[1] = a.data[1] * b.data[0] + a.data[5] * b.data[1] + a.data[9] * b.data[2] + a.data[13] * b.data[3]; data[2] = a.data[2] * b.data[0] + a.data[6] * b.data[1] + a.data[10] * b.data[2] + a.data[14] * b.data[3]; data[3] = 0.0f; data[4] = a.data[0] * b.data[4] + a.data[4] * b.data[5] + a.data[8] * b.data[6] + a.data[12] * b.data[7]; data[5] = a.data[1] * b.data[4] + a.data[5] * b.data[5] + a.data[9] * b.data[6] + a.data[13] * b.data[7]; data[6] = a.data[2] * b.data[4] + a.data[6] * b.data[5] + a.data[10] * b.data[6] + a.data[14] * b.data[7]; data[7] = 0.0f; data[8] = a.data[0] * b.data[8] + a.data[4] * b.data[9] + a.data[8] * b.data[10] + a.data[12] * b.data[11]; data[9] = a.data[1] * b.data[8] + a.data[5] * b.data[9] + a.data[9] * b.data[10] + a.data[13] * b.data[11]; data[10]= a.data[2] * b.data[8] + a.data[6] * b.data[9] + a.data[10] * b.data[10] + a.data[14] * b.data[11]; data[11]= 0.0f; data[12]= a.data[0] * b.data[12] + a.data[4] * b.data[13] + a.data[8] * b.data[14] + a.data[12] * b.data[15]; data[13]= a.data[1] * b.data[12] + a.data[5] * b.data[13] + a.data[9] * b.data[14] + a.data[13] * b.data[15]; data[14]= a.data[2] * b.data[12] + a.data[6] * b.data[13] + a.data[10] * b.data[14] + a.data[14] * b.data[15]; data[15]= 1.0f; */ } //@} /** @name Vector operations: */ //@{ /** Transform 3d vector (w=0). */ void TransformVec3(const Vec3 & restrict orig, Vec3 * restrict dest) const { piDebugCheck(&orig != dest); dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8]; dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9]; dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10]; } /** Transform 3d vector by the transpose (w=0). */ void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const { piDebugCheck(&orig != dest); dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2]; dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6]; dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10]; } /** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */ void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const { piDebugCheck(&orig != dest); dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; } /** Transform a point, normalize it, and return w. */ float TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const { piDebugCheck(&orig != dest); float w; dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]); *dest *= w; return w; } /** Transform a point and return w. */ float TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const { piDebugCheck(&orig != dest); dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; } /** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */ void TransformVec4(const Vec3 & orig, Vec4 * dest) const { dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12]; dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13]; dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14]; dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]; } //@} /** @name Matrix analysis. */ //@{ /** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */ void GetEulerAnglesZYZ(float * s, float * t, float * r) const { if( GetElem(2,2) < 1.0f ) { if( GetElem(2,2) > -1.0f ) { // cs*ct*cr-ss*sr -ss*ct*cr-cs*sr st*cr // cs*ct*sr+ss*cr -ss*ct*sr+cs*cr st*sr // -cs*st ss*st ct *s = atan2(GetElem(1,2), -GetElem(0,2)); *t = acos(GetElem(2,2)); *r = atan2(GetElem(2,1), GetElem(2,0)); } else { // -c(s-r) s(s-r) 0 // s(s-r) c(s-r) 0 // 0 0 -1 *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r *t = PI; *r = 0; } } else { // c(s+r) -s(s+r) 0 // s(s+r) c(s+r) 0 // 0 0 1 *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r *t = 0; *r = 0; } } //@} MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m ); /** Print to debug output. */ void Print() const { piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] ); piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] ); piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] ); piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] ); } public: float data[16]; }; #endif #endif // NV_MATH_MATRIX_INL Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.cpp (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.cpp (revision 23380) @@ -1,61 +1,61 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #include "PackedFloat.h" #include "Vector.inl" #include "ftoi.h" using namespace nv; Vector3 nv::rgb9e5_to_vector3(FloatRGB9E5 v) { } FloatRGB9E5 nv::vector3_to_rgb9e5(const Vector3 & v) { } float nv::float11_to_float32(uint v) { } float nv::float10_to_float32(uint v) { } Vector3 nv::r11g11b10_to_vector3(FloatR11G11B10 v) { } FloatR11G11B10 nv::vector3_to_r11g11b10(const Vector3 & v) { } // These are based on: // http://www.graphics.cornell.edu/~bjw/rgbe/rgbe.c // While this may not be the best way to encode/decode RGBE8, I'm not making any changes to maintain compatibility. FloatRGBE8 nv::vector3_to_rgbe8(const Vector3 & v) { float m = max3(v.x, v.y, v.z); FloatRGBE8 rgbe; if (m < 1e-32) { rgbe.v = 0; } else { int e; float scale = frexpf(m, &e) * 256.0f / m; rgbe.r = U8(ftoi_round(v.x * scale)); rgbe.g = U8(ftoi_round(v.y * scale)); rgbe.b = U8(ftoi_round(v.z * scale)); rgbe.e = U8(e + 128); } return rgbe; } Vector3 nv::rgbe8_to_vector3(FloatRGBE8 v) { if (v.e != 0) { float scale = ldexpf(1.0f, v.e-(int)(128+8)); // +8 to divide by 256. @@ Shouldn't we divide by 255 instead? return scale * Vector3(float(v.r), float(v.g), float(v.b)); } return Vector3(0); } Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.h =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.h (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.h (revision 23380) @@ -1,45 +1,45 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_MATH_PLANE_H #define NV_MATH_PLANE_H #include "nvmath.h" #include "Vector.h" #if NV_USE_ALTIVEC #undef vector #endif namespace nv { class Matrix; class NVMATH_CLASS Plane { public: Plane(); Plane(float x, float y, float z, float w); Plane(const Vector4 & v); Plane(const Vector3 & v, float d); Plane(const Vector3 & normal, const Vector3 & point); Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2); const Plane & operator=(const Plane & v); Vector3 vector() const; float offset() const; void operator*=(float s); Vector4 v; }; Plane transformPlane(const Matrix &, const Plane &); Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c); } // nv namespace #endif // NV_MATH_PLANE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.inl =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.inl (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.inl (revision 23380) @@ -1,49 +1,49 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #pragma once #ifndef NV_MATH_PLANE_INL #define NV_MATH_PLANE_INL #include "Plane.h" #include "Vector.inl" namespace nv { inline Plane::Plane() {} inline Plane::Plane(float x, float y, float z, float w) : v(x, y, z, w) {} inline Plane::Plane(const Vector4 & v) : v(v) {} inline Plane::Plane(const Vector3 & v, float d) : v(v, d) {} inline Plane::Plane(const Vector3 & normal, const Vector3 & point) : v(normal, -dot(normal, point)) {} inline Plane::Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2) { Vector3 n = cross(v1-v0, v2-v0); float d = -dot(n, v0); v = Vector4(n, d); } inline const Plane & Plane::operator=(const Plane & p) { v = p.v; return *this; } inline Vector3 Plane::vector() const { return v.xyz(); } inline float Plane::offset() const { return v.w; } // Normalize plane. inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON) { const float len = length(plane.vector()); const float inv = isZero(len, epsilon) ? 0 : 1.0f / len; return Plane(plane.v * inv); } // Get the signed distance from the given point to this plane. inline float distance(const Plane & plane, const Vector3 & point) { return dot(plane.vector(), point) + plane.offset(); } inline void Plane::operator*=(float s) { v *= s; } } // nv namespace #endif // NV_MATH_PLANE_H Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.cpp (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.cpp (revision 23380) @@ -1,61 +1,61 @@ -// This code is in the public domain -- Ignacio Castaño +// This code is in the public domain -- Ignacio Castaño #include "ParallelFor.h" #include "Thread.h" #include "Atomic.h" #include "ThreadPool.h" #include "nvcore/Utils.h" // toI32 using namespace nv; #define ENABLE_PARALLEL_FOR 1 static void worker(void * arg, int tid) { ParallelFor * owner = (ParallelFor *)arg; while(true) { uint new_idx = atomicFetchAndAdd(&owner->idx, owner->step); if (new_idx >= owner->count) { break; } const uint count = min(owner->count, new_idx + owner->step); for (uint i = new_idx; i < count; i++) { owner->task(owner->context, /*tid, */i); } } } ParallelFor::ParallelFor(ForTask * task, void * context) : task(task), context(context) { #if ENABLE_PARALLEL_FOR pool = ThreadPool::acquire(); #endif } ParallelFor::~ParallelFor() { #if ENABLE_PARALLEL_FOR ThreadPool::release(pool); #endif } void ParallelFor::run(uint count, uint step/*= 1*/) { #if ENABLE_PARALLEL_FOR storeRelease(&this->count, count); storeRelease(&this->step, step); // Init atomic counter to zero. storeRelease(&idx, 0); // Start threads. pool->run(worker, this); nvDebugCheck(idx >= count); #else for (int i = 0; i < toI32(count); i++) { task(context, i); } #endif } Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.cpp =================================================================== --- ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.cpp (revision 23379) +++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.cpp (revision 23380) @@ -1,1042 +1,1042 @@ // Copyright (c) 2009-2011 Ignacio Castano // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. #include "CubeSurface.h" #include "Surface.h" #include "nvimage/DirectDrawSurface.h" #include "nvmath/Vector.inl" #include "nvcore/Array.inl" #include "nvcore/StrLib.h" using namespace nv; using namespace nvtt; // Solid angle of an axis aligned quad from (0,0,1) to (x,y,1) // See: http://www.fizzmoll11.com/thesis/ for a derivation of this formula. static float areaElement(float x, float y) { return atan2(x*y, sqrtf(x*x + y*y + 1)); } // Solid angle of a hemicube texel. static float solidAngleTerm(uint x, uint y, float inverseEdgeLength) { // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center. float u = (float(x) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; float v = (float(y) + 0.5f) * (2 * inverseEdgeLength) - 1.0f; nvDebugCheck(u >= -1.0f && u <= 1.0f); nvDebugCheck(v >= -1.0f && v <= 1.0f); #if 1 // Exact solid angle: float x0 = u - inverseEdgeLength; float y0 = v - inverseEdgeLength; float x1 = u + inverseEdgeLength; float y1 = v + inverseEdgeLength; float solidAngle = areaElement(x0, y0) - areaElement(x0, y1) - areaElement(x1, y0) + areaElement(x1, y1); nvDebugCheck(solidAngle > 0.0f); return solidAngle; #else // This formula is equivalent, but not as precise. float pixel_area = nv::square(2.0f * inverseEdgeLength); float dist_square = 1.0f + nv::square(u) + nv::square(v); float cos_theta = 1.0f / sqrt(dist_square); float cos_theta_d2 = cos_theta / dist_square; // Funny this is just 1/dist^3 or cos(tetha)^3 return pixel_area * cos_theta_d2; #endif } static Vector3 texelDirection(uint face, uint x, uint y, int edgeLength, EdgeFixup fixupMethod) { float u, v; if (fixupMethod == EdgeFixup_Stretch) { // Transform x,y to [-1, 1] range, match up edges exactly. u = float(x) * 2.0f / (edgeLength - 1) - 1.0f; v = float(y) * 2.0f / (edgeLength - 1) - 1.0f; } else { // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center. u = (float(x) + 0.5f) * (2.0f / edgeLength) - 1.0f; v = (float(y) + 0.5f) * (2.0f / edgeLength) - 1.0f; } if (fixupMethod == EdgeFixup_Warp) { // Warp texel centers in the proximity of the edges. float a = powf(float(edgeLength), 2.0f) / powf(float(edgeLength - 1), 3.0f); u = a * powf(u, 3) + u; v = a * powf(v, 3) + v; } nvDebugCheck(u >= -1.0f && u <= 1.0f); nvDebugCheck(v >= -1.0f && v <= 1.0f); Vector3 n; if (face == 0) { n.x = 1; n.y = -v; n.z = -u; } if (face == 1) { n.x = -1; n.y = -v; n.z = u; } if (face == 2) { n.x = u; n.y = 1; n.z = v; } if (face == 3) { n.x = u; n.y = -1; n.z = -v; } if (face == 4) { n.x = u; n.y = -v; n.z = 1; } if (face == 5) { n.x = -u; n.y = -v; n.z = -1; } return normalizeFast(n); } TexelTable::TexelTable(uint edgeLength) : size(edgeLength) { uint hsize = size/2; // Allocate a small solid angle table that takes into account cube map symmetry. solidAngleArray.resize(hsize * hsize); for (uint y = 0; y < hsize; y++) { for (uint x = 0; x < hsize; x++) { solidAngleArray[y * hsize + x] = solidAngleTerm(hsize+x, hsize+y, 1.0f/edgeLength); } } directionArray.resize(size*size*6); for (uint f = 0; f < 6; f++) { for (uint y = 0; y < size; y++) { for (uint x = 0; x < size; x++) { directionArray[(f * size + y) * size + x] = texelDirection(f, x, y, edgeLength, EdgeFixup_None); } } } } const Vector3 & TexelTable::direction(uint f, uint x, uint y) const { nvDebugCheck(f < 6 && x < size && y < size); return directionArray[(f * size + y) * size + x]; } float TexelTable::solidAngle(uint f, uint x, uint y) const { uint hsize = size/2; if (x >= hsize) x -= hsize; else if (x < hsize) x = hsize - x - 1; if (y >= hsize) y -= hsize; else if (y < hsize) y = hsize - y - 1; return solidAngleArray[y * hsize + x]; } static const Vector3 faceNormals[6] = { Vector3(1, 0, 0), Vector3(-1, 0, 0), Vector3(0, 1, 0), Vector3(0, -1, 0), Vector3(0, 0, 1), Vector3(0, 0, -1), }; static const Vector3 faceU[6] = { Vector3(0, 0, -1), Vector3(0, 0, 1), Vector3(1, 0, 0), Vector3(1, 0, 0), Vector3(1, 0, 0), Vector3(-1, 0, 0), }; static const Vector3 faceV[6] = { Vector3(0, -1, 0), Vector3(0, -1, 0), Vector3(0, 0, 1), Vector3(0, 0, -1), Vector3(0, -1, 0), Vector3(0, -1, 0), }; static Vector2 toPolar(Vector3::Arg v) { Vector2 p; p.x = atan2(v.x, v.y); // theta p.y = acosf(v.z); // phi return p; } static Vector2 toPlane(float theta, float phi) { float x = sin(phi) * cos(theta); float y = sin(phi) * sin(theta); float z = cos(phi); Vector2 p; p.x = x / fabs(z); p.y = y / fabs(z); //p.x = tan(phi) * cos(theta); //p.y = tan(phi) * sin(theta); return p; } static Vector2 toPlane(Vector3::Arg v) { Vector2 p; p.x = v.x / fabs(v.z); p.y = v.y / fabs(v.z); return p; } CubeSurface::CubeSurface() : m(new CubeSurface::Private()) { m->addRef(); } CubeSurface::CubeSurface(const CubeSurface & cube) : m(cube.m) { if (m != NULL) m->addRef(); } CubeSurface::~CubeSurface() { if (m != NULL) m->release(); m = NULL; } void CubeSurface::operator=(const CubeSurface & cube) { if (cube.m != NULL) cube.m->addRef(); if (m != NULL) m->release(); m = cube.m; } void CubeSurface::detach() { if (m->refCount() > 1) { m->release(); m = new CubeSurface::Private(*m); m->addRef(); nvDebugCheck(m->refCount() == 1); } } bool CubeSurface::isNull() const { return m->edgeLength == 0; } int CubeSurface::edgeLength() const { return m->edgeLength; } int CubeSurface::countMipmaps() const { return nv::countMipmaps(m->edgeLength); } Surface & CubeSurface::face(int f) { nvDebugCheck(f >= 0 && f < 6); return m->face[f]; } const Surface & CubeSurface::face(int f) const { nvDebugCheck(f >= 0 && f < 6); return m->face[f]; } bool CubeSurface::load(const char * fileName, int mipmap) { if (strEqual(Path::extension(fileName), ".dds")) { nv::DirectDrawSurface dds(fileName); if (!dds.isValid()/* || !dds.isSupported()*/) { return false; } if (!dds.isTextureCube()) { return false; } // Make sure it's a valid cube. if (dds.header.width != dds.header.height) return false; //if ((dds.header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES) return false; if (mipmap < 0) { mipmap = dds.mipmapCount() - 1 - mipmap; } if (mipmap < 0 || mipmap > I32(dds.mipmapCount())) return false; nvtt::InputFormat inputFormat = nvtt::InputFormat_RGBA_16F; if (dds.header.hasDX10Header()) { if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R16G16B16A16_FLOAT) inputFormat = nvtt::InputFormat_RGBA_16F; else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32G32B32A32_FLOAT) inputFormat = nvtt::InputFormat_RGBA_32F; else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32_FLOAT) inputFormat = nvtt::InputFormat_R_32F; else return false; } else { if ((dds.header.pf.flags & DDPF_FOURCC) != 0) { if (dds.header.pf.fourcc == D3DFMT_A16B16G16R16F) inputFormat = nvtt::InputFormat_RGBA_16F; else if (dds.header.pf.fourcc == D3DFMT_A32B32G32R32F) inputFormat = nvtt::InputFormat_RGBA_32F; else if (dds.header.pf.fourcc == D3DFMT_R32F) inputFormat = nvtt::InputFormat_R_32F; else return false; } else { if (dds.header.pf.bitcount == 32 /*&& ...*/) inputFormat = nvtt::InputFormat_BGRA_8UB; else return false; // @@ Do pixel format conversions! } } uint edgeLength = dds.surfaceWidth(mipmap); uint size = dds.surfaceSize(mipmap); void * data = malloc(size); for (int f = 0; f < 6; f++) { dds.readSurface(f, mipmap, data, size); m->face[f].setImage(inputFormat, edgeLength, edgeLength, 1, data); } m->edgeLength = edgeLength; free(data); return true; } return false; } bool CubeSurface::save(const char * fileName) const { // @@ TODO return false; } struct ivec2 { uint x; uint y; }; // posx negx posy negy posz negz static const ivec2 foldOffsetVerticalCross[6] = { {2, 1}, {0, 1}, {1, 0}, {1, 2}, {1, 1}, {1, 3} }; static const ivec2 foldOffsetHorizontalCross[6] = { {2, 1}, {0, 1}, {1, 0}, {1, 2}, {1, 1}, {3, 1} }; static const ivec2 foldOffsetColumn[6] = { {0, 0}, {0, 1}, {0, 2}, {0, 3}, {0, 4}, {0, 5} }; static const ivec2 foldOffsetRow[6] = { {0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0} }; void CubeSurface::fold(const Surface & tex, CubeLayout layout) { ivec2 const* offsets = 0; uint edgeLength; switch(layout) { case CubeLayout_LatitudeLongitude: case CubeLayout_VerticalCross: edgeLength = tex.height() / 4; offsets = foldOffsetVerticalCross; break; case CubeLayout_HorizontalCross: edgeLength = tex.width() / 4; offsets = foldOffsetHorizontalCross; break; case CubeLayout_Column: edgeLength = tex.width(); offsets = foldOffsetColumn; break; case CubeLayout_Row: edgeLength = tex.height(); offsets = foldOffsetRow; break; } m->edgeLength = edgeLength; for(uint f = 0; f < 6; f++) { uint x = offsets[f].x * edgeLength; uint y = offsets[f].y * edgeLength; m->face[f] = tex.createSubImage(x, x + edgeLength - 1, y, y + edgeLength - 1, 0, 0); } if(layout == CubeLayout_VerticalCross || layout == CubeLayout_LatitudeLongitude) { // Back face needs to be rotated 180 degrees m->face[5].flipX(); m->face[5].flipY(); } } Surface CubeSurface::unfold(CubeLayout layout) const { ivec2 const* offsets = 0; uint edgeLength = m->edgeLength; uint width; uint height; switch(layout) { case CubeLayout_LatitudeLongitude: case CubeLayout_VerticalCross: offsets = foldOffsetVerticalCross; width = 3 * edgeLength; height = 4 * edgeLength; // Back face needs to be rotated 180 degrees m->face[5].flipX(); m->face[5].flipY(); break; case CubeLayout_HorizontalCross: offsets = foldOffsetHorizontalCross; width = 4 * edgeLength; height = 3 * edgeLength; break; case CubeLayout_Column: offsets = foldOffsetColumn; width = edgeLength; height = 6 * edgeLength; break; case CubeLayout_Row: offsets = foldOffsetRow; width = 6 * edgeLength; height = edgeLength; break; } Surface surface; surface.setImage(width, height, 1); for(uint f = 0; f < 6; f++) { uint x = offsets[f].x * edgeLength; uint y = offsets[f].y * edgeLength; surface.copy(m->face[f], 0, 0, 0, edgeLength, edgeLength, 1, x, y, 0); } if(layout == CubeLayout_VerticalCross || layout == CubeLayout_LatitudeLongitude) { // Undo back face rotation m->face[5].flipY(); m->face[5].flipX(); } return surface; } float CubeSurface::average(int channel) const { const uint edgeLength = m->edgeLength; m->allocateTexelTable(); float total = 0.0f; float sum = 0.0f; for (int f = 0; f < 6; f++) { float * c = m->face[f].m->image->channel(channel); for (uint y = 0; y < edgeLength; y++) { for (uint x = 0; x < edgeLength; x++) { float solidAngle = m->texelTable->solidAngle(f, x, y); total += solidAngle; sum += c[y * edgeLength + x] * solidAngle; } } } return sum / total; } void CubeSurface::range(int channel, float * minimum_ptr, float * maximum_ptr) const { const uint edgeLength = m->edgeLength; m->allocateTexelTable(); float minimum = NV_FLOAT_MAX; float maximum = 0.0f; for (int f = 0; f < 6; f++) { float * c = m->face[f].m->image->channel(channel); for (uint y = 0; y < edgeLength; y++) { for (uint x = 0; x < edgeLength; x++) { minimum = nv::min(minimum, c[y * edgeLength + x]); maximum = nv::max(maximum, c[y * edgeLength + x]); } } } *minimum_ptr = minimum; *maximum_ptr = maximum; } void CubeSurface::clamp(int channel, float low/*= 0.0f*/, float high/*= 1.0f*/) { for (int f = 0; f < 6; f++) { m->face[f].clamp(channel, low, high); } } #include "nvmath/SphericalHarmonic.h" CubeSurface CubeSurface::irradianceFilter(int size, EdgeFixup fixupMethod) const { m->allocateTexelTable(); // Transform this cube to spherical harmonic basis Sh2 sh; // For each texel of the input cube. const uint edgeLength = m->edgeLength; for (uint f = 0; f < 6; f++) { for (uint y = 0; y < edgeLength; y++) { for (uint x = 0; x < edgeLength; x++) { Vector3 dir = m->texelTable->direction(f, x, y); float solidAngle = m->texelTable->solidAngle(f, x, y); Sh2 shDir; shDir.eval(dir); sh.addScaled(sh, solidAngle); } } } // Evaluate spherical harmonic for each output texel. CubeSurface output; output.m->allocate(size); // @@ TODO return CubeSurface(); } // Convolve filter against this cube. Vector3 CubeSurface::Private::applyAngularFilter(const Vector3 & filterDir, float coneAngle, float * filterTable, int tableSize) { const float cosineConeAngle = cos(coneAngle); nvDebugCheck(cosineConeAngle >= 0); Vector3 color(0); float sum = 0; // Things I have tried to speed this up: // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere. // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case. // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex. // What AMD CubeMapGen does: // - Compute conservative bounds on the primary face, wrap around the adjacent faces. // For each texel of the input cube. for (uint f = 0; f < 6; f++) { // Test face cone agains filter cone. float cosineFaceAngle = dot(filterDir, faceNormals[f]); float faceAngle = acosf(cosineFaceAngle); if (faceAngle > coneAngle + atanf(sqrtf(2))) { // Skip face. continue; } const int L = I32(edgeLength-1); int x0 = 0, x1 = L; int y0 = 0, y1 = L; #if 0 float u0 = -1; float u1 = 1; float v0 = -1; float v1 = 1; // @@ Compute uvs. // Expand uv coordinates from [-1,1] to [0, edgeLength) u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f; v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f; u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f; v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f; nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f); nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f); nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f); nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f); x0 = clamp(ifloor(u0), 0, L); y0 = clamp(ifloor(v0), 0, L); x1 = clamp(iceil(u1), 0, L); y1 = clamp(iceil(v1), 0, L); #endif nvDebugCheck(x1 >= x0); nvDebugCheck(y1 >= y0); if (x1 == x0 || y1 == y0) { // Skip this face. continue; } const Surface & inputFace = face[f]; const FloatImage * inputImage = inputFace.m->image; for (int y = y0; y <= y1; y++) { bool inside = false; for (int x = x0; x <= x1; x++) { Vector3 dir = texelTable->direction(f, x, y); float cosineAngle = dot(dir, filterDir); if (cosineAngle > cosineConeAngle) { float solidAngle = texelTable->solidAngle(f, x, y); //float scale = powf(saturate(cosineAngle), cosinePower); int idx = int(saturate(cosineAngle) * (tableSize - 1)); float scale = filterTable[idx]; // @@ Do bilinear interpolation? float contribution = solidAngle * scale; sum += contribution; color.x += contribution * inputImage->pixel(0, x, y, 0); color.y += contribution * inputImage->pixel(1, x, y, 0); color.z += contribution * inputImage->pixel(2, x, y, 0); inside = true; } else if (inside) { // Filter scale is monotonic, if we have been inside once and we just exit, then we can skip the rest of the row. // We could do the same thing for the columns and skip entire rows. break; } } } } color *= (1.0f / sum); return color; } // We want to find the alpha such that: // cos(alpha)^cosinePower = epsilon // That's: acos(epsilon^(1/cosinePower)) // We can cull texels in two different ways: // - culling faces that do not touch the cone. // - computing one rectangle per face, find intersection between cone and face. // - // Other speedups: // - parallelize. Done. // - use ISPC? // Convolve filter against this cube. Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, float coneAngle, float cosinePower) { const float cosineConeAngle = cos(coneAngle); nvDebugCheck(cosineConeAngle >= 0); Vector3 color(0); float sum = 0; // Things I have tried to speed this up: // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere. // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case. // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex. // What AMD CubeMapGen does: // - Compute conservative bounds on the primary face, wrap around the adjacent faces. // For each texel of the input cube. for (uint f = 0; f < 6; f++) { // Test face cone agains filter cone. float cosineFaceAngle = dot(filterDir, faceNormals[f]); float faceAngle = acosf(cosineFaceAngle); if (faceAngle > coneAngle + atanf(sqrtf(2))) { // Skip face. continue; } const int L = I32(edgeLength-1); int x0 = 0, x1 = L; int y0 = 0, y1 = L; #if 0 float u0 = -1; float u1 = 1; float v0 = -1; float v1 = 1; // @@ Compute uvs. // Expand uv coordinates from [-1,1] to [0, edgeLength) u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f; v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f; u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f; v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f; nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f); nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f); nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f); nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f); x0 = clamp(ifloor(u0), 0, L); y0 = clamp(ifloor(v0), 0, L); x1 = clamp(iceil(u1), 0, L); y1 = clamp(iceil(v1), 0, L); #endif nvDebugCheck(x1 >= x0); nvDebugCheck(y1 >= y0); if (x1 == x0 || y1 == y0) { // Skip this face. continue; } const Surface & inputFace = face[f]; const FloatImage * inputImage = inputFace.m->image; for (int y = y0; y <= y1; y++) { bool inside = false; for (int x = x0; x <= x1; x++) { Vector3 dir = texelTable->direction(f, x, y); float cosineAngle = dot(dir, filterDir); if (cosineAngle > cosineConeAngle) { float solidAngle = texelTable->solidAngle(f, x, y); float scale = powf(saturate(cosineAngle), cosinePower); float contribution = solidAngle * scale; sum += contribution; color.x += contribution * inputImage->pixel(0, x, y, 0); color.y += contribution * inputImage->pixel(1, x, y, 0); color.z += contribution * inputImage->pixel(2, x, y, 0); inside = true; } else if (inside) { // Filter scale is monotonic, if we have been inside once and we just exit, then we can skip the rest of the row. // We could do the same thing for the columns and skip entire rows. break; } } } } color *= (1.0f / sum); return color; } #include "nvthread/ParallelFor.h" struct ApplyAngularFilterContext { CubeSurface::Private * inputCube; CubeSurface::Private * filteredCube; float coneAngle; float * filterTable; int tableSize; EdgeFixup fixupMethod; }; void ApplyAngularFilterTask(void * context, int id) { ApplyAngularFilterContext * ctx = (ApplyAngularFilterContext *)context; int size = ctx->filteredCube->edgeLength; int f = id / (size * size); int idx = id % (size * size); int y = idx / size; int x = idx % size; nvtt::Surface & filteredFace = ctx->filteredCube->face[f]; FloatImage * filteredImage = filteredFace.m->image; const Vector3 filterDir = texelDirection(f, x, y, size, ctx->fixupMethod); // Convolve filter against cube. Vector3 color = ctx->inputCube->applyAngularFilter(filterDir, ctx->coneAngle, ctx->filterTable, ctx->tableSize); filteredImage->pixel(0, idx) = color.x; filteredImage->pixel(1, idx) = color.y; filteredImage->pixel(2, idx) = color.z; } CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, EdgeFixup fixupMethod) const { // Allocate output cube. CubeSurface filteredCube; filteredCube.m->allocate(size); // Texel table is stored along with the surface so that it's compute only once. m->allocateTexelTable(); const float threshold = 0.001f; const float coneAngle = acosf(powf(threshold, 1.0f/cosinePower)); // For each texel of the output cube. /*for (uint f = 0; f < 6; f++) { nvtt::Surface filteredFace = filteredCube.m->face[f]; FloatImage * filteredImage = filteredFace.m->image; for (uint y = 0; y < uint(size); y++) { for (uint x = 0; x < uint(size); x++) { const Vector3 filterDir = texelDirection(f, x, y, size, fixupMethod); // Convolve filter against cube. Vector3 color = m->applyCosinePowerFilter(filterDir, coneAngle, cosinePower); filteredImage->pixel(0, x, y, 0) = color.x; filteredImage->pixel(1, x, y, 0) = color.y; filteredImage->pixel(2, x, y, 0) = color.z; } } }*/ ApplyAngularFilterContext context; context.inputCube = m; context.filteredCube = filteredCube.m; context.coneAngle = coneAngle; context.fixupMethod = fixupMethod; context.tableSize = 512; context.filterTable = new float[context.tableSize]; // @@ Instead of looking up table between [0 - 1] we should probably use [cos(coneAngle), 1] for (int i = 0; i < context.tableSize; i++) { float f = float(i) / (context.tableSize - 1); context.filterTable[i] = powf(f, cosinePower); } nv::ParallelFor parallelFor(ApplyAngularFilterTask, &context); parallelFor.run(6 * size * size); // @@ Implement edge averaging. if (fixupMethod == EdgeFixup_Average) { for (uint f = 0; f < 6; f++) { nvtt::Surface filteredFace = filteredCube.m->face[f]; FloatImage * filteredImage = filteredFace.m->image; // For each component. for (uint c = 0; c < 3; c++) { // @@ For each corner, sample the two adjacent faces. filteredImage->pixel(c, 0, 0, 0); filteredImage->pixel(c, size-1, 0, 0); filteredImage->pixel(c, 0, size-1, 0); filteredImage->pixel(c, size-1, size-1, 0); // @@ For each edge, sample the adjacent face. } } } return filteredCube; } // Sample cubemap in the given direction. Vector3 CubeSurface::Private::sample(const Vector3 & dir) { int f = -1; if (fabs(dir.x) > fabs(dir.y) && fabs(dir.x) > fabs(dir.z)) { if (dir.x > 0) f = 0; else f = 1; } else if (fabs(dir.y) > fabs(dir.z)) { if (dir.y > 0) f = 2; else f = 3; } else { if (dir.z > 0) f = 4; else f = 5; } nvDebugCheck(f != -1); // uv coordinates corresponding to filterDir. float u = dot(dir, faceU[f]); float v = dot(dir, faceV[f]); FloatImage * img = face[f].m->image; Vector3 color; color.x = img->sampleLinearClamp(0, u, v); color.y = img->sampleLinearClamp(1, u, v); color.z = img->sampleLinearClamp(2, u, v); return color; } // @@ Not tested! CubeSurface CubeSurface::fastResample(int size, EdgeFixup fixupMethod) const { // Allocate output cube. CubeSurface resampledCube; resampledCube.m->allocate(size); // For each texel of the output cube. for (uint f = 0; f < 6; f++) { nvtt::Surface resampledFace = resampledCube.m->face[f]; FloatImage * resampledImage = resampledFace.m->image; for (uint y = 0; y < uint(size); y++) { for (uint x = 0; x < uint(size); x++) { const Vector3 filterDir = texelDirection(f, x, y, size, fixupMethod); Vector3 color = m->sample(filterDir); resampledImage->pixel(0, x, y, 0) = color.x; resampledImage->pixel(1, x, y, 0) = color.y; resampledImage->pixel(2, x, y, 0) = color.z; } } } // @@ Implement edge averaging. Share this code with cosinePowerFilter if (fixupMethod == EdgeFixup_Average) { } return resampledCube; } void CubeSurface::toLinear(float gamma) { if (isNull()) return; detach(); for (int i = 0; i < 6; i++) { m->face[i].toLinear(gamma); } } void CubeSurface::toGamma(float gamma) { if (isNull()) return; detach(); for (int i = 0; i < 6; i++) { m->face[i].toGamma(gamma); } } #if 0 // @@ Provide solar azimuth. #include "ArHoseSkyModel.h" void CubeSurface::sky(float turbidity, float albedo[3], float solarElevation) { ArHosekSkyModelState * skymodel_state[3]; for (int i = 0; i < num_channels; i++) { skymodel_state[i] = arhosekskymodelstate_alloc_init(turbidity, albedo[i], solarElevation); } // 700 nm (red), 546.1 nm (green) and 435.8 nm (blue). float channel_center[3] = { - 700, // Red 620–740, - 546.1, // Green 520–570, - 435.8, // Blue 450–490, + 700, // Red 620-740, + 546.1, // Green 520-570, + 435.8, // Blue 450-490, }; // @@ For each pixel: // What's the channel center for the RGB model? double skydome_result[3]; for (unsigned int i = 0; i < num_channels; i++) { skydome_result[i] = arhosekskymodel_radiance(skymodel_state[i], theta, gamma, channel_center[i]); } for (int i = 0; i < num_channels; i++) { arhosek_skymodelstate_free(skymodel_state[i]); } /* ArHosekXYZSkyModelState * skymodel_state[3]; for (int i = 0; i < num_channels; i++) { skymodel_state[i] = arhosek_xyz_skymodelstate_alloc_init(turbidity, albedo[i], solarElevation); } // @@ For each pixel. double skydome_result[3]; for (unsigned int i = 0; i < num_channels; i++) { skydome_result[i] = arhosek_xyz_skymodel_radiance(skymodel_state[i], theta, gamma, i); } for (int i = 0; i < num_channels; i++) { arhosek_xyz_skymodelstate_free(skymodel_state[i]); } */ } -#endif \ No newline at end of file +#endif