Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvmath/SimdVector.h	(revision 23380)
@@ -1,12 +1,12 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #include "Vector.h" // Vector3, Vector4
 
 
 #if NV_USE_ALTIVEC
 #   include "SimdVector_VE.h"
 #endif
 
 #if NV_USE_SSE
 #   include "SimdVector_SSE.h"
 #endif
Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.h	(revision 23380)
@@ -1,181 +1,181 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_THREAD_PARALLELFOR_H
 #define NV_THREAD_PARALLELFOR_H
 
 #include "nvthread.h"
 //#include "Atomic.h" // atomic<uint>
 
 namespace nv
 {
     class Thread;
     class ThreadPool;
 
     typedef void ForTask(void * context, /*int tid,*/ int idx); // @@ It would be nice to have the thread index as an argument here.
 
     struct ParallelFor {
         ParallelFor(ForTask * task, void * context);
         ~ParallelFor();
 
         void run(uint count, uint step = 1);
 
         // Invariant:
         ForTask * task;
         void * context;
         ThreadPool * pool;
 
         // State:
         uint count;
         uint step;
         /*atomic<uint>*/ uint idx;
     };
 
 
 #if NV_CC_CPP11
 
     template <typename F>
     void sequential_for(uint count, F f) {
         for (uint i = 0; i < count; i++) {
             f(i);
         }
     }
 
 
     template <typename F>
     void parallel_for(uint count, uint step, F f) {
         // Transform lambda into function pointer.
         auto lambda = [](void* context, /*int tid, */int idx) {
             F & f = *reinterpret_cast<F *>(context);
             f(/*tid, */idx);
         };
 
         ParallelFor pf(lambda, &f);
         pf.run(count, step);
     }
 
 
     template <typename F>
     void parallel_for(uint count, F f) {
         parallel_for(count, /*step=*/1, f);
     }
 
 
     template <typename F>
     void parallel_for_if(uint count, uint step, bool condition, F f) {
         if (condition) {
             parallel_for(count, step, f);
         }
         else {
             sequential_for(count, f);
         }
     }
 
 
 #if 0
     template <typename F, typename T>
     void parallel_for_each(Array<T> & array, uint step, F f) {
         // Transform lambda into function pointer.
         auto lambda = [](void* context, int idx) {
             F & f = *reinterpret_cast<F *>(context);
             f(array[idx]);
         };
 
         ParallelFor pf(lambda, &f);
         pf.run(count, step);
     }
 #endif
 
 
 #endif // NV_CC_CPP11
 
 
 /*
 
 #include "nvthread/Mutex.h"
 #include "nvcore/Array.inl"
 
     template <typename T>
     struct ParallelOutputStream {
 #if 0
         // In its most basic implementation the parallel stream is simply a single array protected by a mutex.
         Parallel_Output_Stream(uint producer_count) {}
 
         void reset() { final_array.clear(); }
         void append(uint producer_id, const T & t) { Lock(mutex); final_array.append(t); }
         nv::Array<T> & finalize() { return final_array; }
         
         nv::Mutex mutex;
         nv::Array<T> final_array;
 
 #elif 0
         // Another simple implementation is to have N arrays that are merged at the end.
         ParallelOutputStream(uint producer_count) : producer_count(producer_count) {
             partial_array = new Array<T>[producer_count];
         }
 
         void reset() {
             for (int i = 0; i < producer_count; i++) {
                 partial_array[i].clear();
             }
         }
 
         void append(uint producer_id, const T & t) { 
             nvCheck(producer_id < producer_count);
             partial_array[producer_id].append(t);
         }
 
         nv::Array<T> & finalize() {
             for (int i = 1; i < producer_count; i++) {
                 partial_array->append(partial_array[i]);
                 partial_array[i].clear();
             }
             return *partial_array;
         }
 
         uint producer_count;
         nv::Array<T> * partial_array;
 #else
         ParallelOutputStream(uint producer_count) : producer_count(producer_count) {
             partial_array = new PartialArray[producer_count];
         }
 
         // But a more sophisticated implementation keeps N short arrays that are merged as they get full. This preserves partial order.
         struct PartialArray {          // Make sure this is aligned to cache lines. We want producers to access their respective arrays without conflicts.
             uint count;
             T data[32];                 // Pick size to minimize wasted space considering cache line alignment?
         };
 
         const uint producer_count;
         PartialArray * partial_array;
 
         // @@ Make sure mutex and partial_array are not in the same cache line!
 
         nv::Mutex mutex;
         nv::Array<T> final_array;
 
         void append(uint producer_id, const T & t) {
             if (partial_array[producer_id].count == 32) {
                 partial_array[producer_id].count = 0;
                 Lock(mutex);
                 final_array.append(partial_array[producer_id].data, 32);
             }
 
             partial_array[producer_id].data[partial_array[producer_id].count++] = t;
         }
         nv::Array<T> & finalize() {
             for (int i = 0; i < producer_count; i++) {
                 final_array.append(partial_array[producer_id].data, partial_array[producer_id].count);
             }
             return final_array;
         }
 #endif
     };
 
 */
 
 
 } // nv namespace
 
 
 #endif // NV_THREAD_PARALLELFOR_H
Index: ps/trunk/libraries/source/nvtt/src/CMakeLists.txt
===================================================================
--- ps/trunk/libraries/source/nvtt/src/CMakeLists.txt	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/CMakeLists.txt	(revision 23380)
@@ -1,88 +1,88 @@
 CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
 PROJECT(NV)
 ENABLE_TESTING()
 
 SET(NV_CMAKE_DIR "${NV_SOURCE_DIR}/cmake")
 SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${NV_CMAKE_DIR}")
 
 # GCC check (needs -std:c++11 flag)
 #if(CMAKE_COMPILER_IS_GNUCC)
 #	ADD_DEFINITIONS("-std=c++11")
 #ENDIF(CMAKE_COMPILER_IS_GNUCC)
 set (CMAKE_CXX_STANDARD 11)
 
 #IF(WIN32)
 	# gnuwin32 paths:
 	#SET(GNUWIN32_PATH "${NV_SOURCE_DIR}/extern/gnuwin32")
 	#SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "${GNUWIN32_PATH}/include")
 	#SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "${GNUWIN32_PATH}/lib")
 	
 	# Set GLUT path:
 	#SET(GLUT_ROOT_DIR "${NV_SOURCE_DIR}/extern/glut")
 
 	# Set FreeImage path:
 	#SET(FREEIMAGE_ROOT_DIR "${NV_SOURCE_DIR}/extern/FreeImage")
 
 #ENDIF(WIN32)
 
 INCLUDE(${NV_CMAKE_DIR}/OptimalOptions.cmake)
 MESSAGE(STATUS "Setting optimal options")
 MESSAGE(STATUS "  Processor: ${NV_SYSTEM_PROCESSOR}")
 MESSAGE(STATUS "  Compiler Flags: ${CMAKE_CXX_FLAGS}")
 
 IF(CMAKE_BUILD_TYPE MATCHES "debug")
 	SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.")
     ADD_DEFINITIONS(-D_DEBUG=1)
 ENDIF()
 
 
 IF(NVTT_SHARED)
 	SET(NVCORE_SHARED TRUE)
 	SET(NVMATH_SHARED TRUE)
 	SET(NVIMAGE_SHARED TRUE)
 ENDIF(NVTT_SHARED)
 
 SET(CMAKE_SKIP_BUILD_RPATH TRUE)
 SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
 SET(CMAKE_INSTALL_RPATH "$ORIGIN")
 SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 SET(CMAKE_INSTALL_NAME_DIR "@executable_path")
 
 ADD_SUBDIRECTORY(extern)
 
 ADD_SUBDIRECTORY(src)
 
 # These files should only be installed when creating packages.
 INSTALL(FILES 
 	LICENSE
 	README.md
 	DESTINATION share/doc/nvtt)
 
 # Add packaging support
 INCLUDE(InstallRequiredSystemLibraries)
 
 IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 	SET(CPACK_GENERATOR "TGZ;DEB")
 ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 
 SET(CPACK_PACKAGE_NAME "nvidia-texture-tools")
 SET(CPACK_PACKAGE_VERSION_MAJOR "2")
 SET(CPACK_PACKAGE_VERSION_MINOR "1")
 SET(CPACK_PACKAGE_VERSION_PATCH "0")
 SET(CPACK_PACKAGE_VERSION "2.1.0")
-SET(CPACK_PACKAGE_CONTACT "Ignacio Castaño <castano@gmail.com>")
+SET(CPACK_PACKAGE_CONTACT "Ignacio CastaÃ±o <castano@gmail.com>")
 #SET(CPACK_PACKAGE_VENDOR "NVIDIA Corporation")
 SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Texture processing tools with support for Direct3D 10 and 11 formats.")
 
 SET(CPACK_PACKAGE_DESCRIPTION_FILE "${NV_SOURCE_DIR}/README.md")
 SET(CPACK_RESOURCE_FILE_LICENSE "${NV_SOURCE_DIR}/LICENSE")
 
 # NSIS options:
 IF(WIN32)
 	SET(CPACK_NSIS_DISPLAY_NAME "${CPACK_PACKAGE_VENDOR}\\\\NVIDIA Texture Tools 2.1")
 	SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${CPACK_PACKAGE_VENDOR}\\\\NVIDIA Texture Tools 2.1")
 	SET(CPACK_PACKAGE_ICON "${NV_SOURCE_DIR}\\\\project\\\\vc8\\\\nvcompress\\\\nvidia.ico")
 ENDIF(WIN32)
 
 INCLUDE(CPack)
 
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.h	(revision 23380)
@@ -1,182 +1,182 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_ARRAY_H
 #define NV_CORE_ARRAY_H
 
 /*
 This array class requires the elements to be relocable; it uses memmove and realloc. Ideally I should be 
 using swap, but I honestly don't care. The only thing that you should be aware of is that internal pointers
 are not supported.
 
 Note also that push_back and resize does not support inserting arguments elements that are in the same 
 container. This is forbidden to prevent an extra copy.
 */
 
 
 #include "Memory.h"
 #include "Debug.h"
 #include "ForEach.h" // PseudoIndex
 
 
 namespace nv 
 {
     class Stream;
 
     /**
     * Replacement for std::vector that is easier to debug and provides
     * some nice foreach enumerators. 
     */
     template<typename T>
     class NVCORE_CLASS Array {
     public:
         typedef uint size_type;
 
         // Default constructor.
         NV_FORCEINLINE Array() : m_buffer(NULL), m_capacity(0), m_size(0) {}
 
         // Copy constructor.
         NV_FORCEINLINE Array(const Array & a) : m_buffer(NULL), m_capacity(0), m_size(0) {
             copy(a.m_buffer, a.m_size);
         }
 
         // Constructor that initializes the vector with the given elements.
         NV_FORCEINLINE Array(const T * ptr, uint num) : m_buffer(NULL), m_capacity(0), m_size(0) {
             copy(ptr, num);
         }
 
         // Allocate array.
         NV_FORCEINLINE explicit Array(uint capacity) : m_buffer(NULL), m_capacity(0), m_size(0) {
             setArrayCapacity(capacity);
         }
 
         // Destructor.
         NV_FORCEINLINE ~Array() {
             clear();
             free<T>(m_buffer);
         }
 
 
         /// Const element access.
         NV_FORCEINLINE const T & operator[]( uint index ) const
         {
             nvDebugCheck(index < m_size);
             return m_buffer[index];
         }
         NV_FORCEINLINE const T & at( uint index ) const
         {
             nvDebugCheck(index < m_size);
             return m_buffer[index];
         }
 
         /// Element access.
         NV_FORCEINLINE T & operator[] ( uint index )
         {
             nvDebugCheck(index < m_size);
             return m_buffer[index];
         }
         NV_FORCEINLINE T & at( uint index )
         {
             nvDebugCheck(index < m_size);
             return m_buffer[index];
         }
 
         /// Get vector size.
         NV_FORCEINLINE uint size() const { return m_size; }
 
         /// Get vector size.
         NV_FORCEINLINE uint count() const { return m_size; }
 
         /// Get vector capacity.
         NV_FORCEINLINE uint capacity() const { return m_capacity; }
 
         /// Get const vector pointer.
         NV_FORCEINLINE const T * buffer() const { return m_buffer; }
 
         /// Get vector pointer.
         NV_FORCEINLINE T * buffer() { return m_buffer; }
 
         /// Provide begin/end pointers for C++11 range-based for loops.
         NV_FORCEINLINE T * begin() { return m_buffer; }
         NV_FORCEINLINE T * end() { return m_buffer + m_size; }
         NV_FORCEINLINE const T * begin() const { return m_buffer; }
         NV_FORCEINLINE const T * end() const { return m_buffer + m_size; }
 
         /// Is vector empty.
         NV_FORCEINLINE bool isEmpty() const { return m_size == 0; }
 
         /// Is a null vector.
         NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; }
 
 
         T & append();
         void push_back( const T & val );
         void pushBack( const T & val );
         Array<T> & append( const T & val );
         Array<T> & operator<< ( T & t );
         void pop_back();
         void popBack(uint count = 1);
         void popFront(uint count = 1);
         const T & back() const;
         T & back();
         const T & front() const;
         T & front();
         bool contains(const T & e) const;
         bool find(const T & element, uint * indexPtr) const;
         bool find(const T & element, uint begin, uint end, uint * indexPtr) const;
         void removeAt(uint index);
         bool remove(const T & element);
         void insertAt(uint index, const T & val = T());
         void append(const Array<T> & other);
         void append(const T other[], uint count);
         void replaceWithLast(uint index);
         void resize(uint new_size);
         void resize(uint new_size, const T & elem);
         void fill(const T & elem);
         void clear();
         void shrink();
         void reserve(uint desired_size);
         void copy(const T * data, uint count);
         Array<T> & operator=( const Array<T> & a );
         T * release();
 
 
         // Array enumerator.
         typedef uint PseudoIndex;
 
         NV_FORCEINLINE PseudoIndex start() const { return 0; }
         NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
         NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
 
 #if NV_CC_MSVC
         NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
             return m_buffer[i(this)];
         }
         NV_FORCEINLINE const T & operator[]( const PseudoIndexWrapper & i ) const {
             return m_buffer[i(this)];
         }
 #endif
 
         // Friends.
         template <typename Typ> 
         friend Stream & operator<< ( Stream & s, Array<Typ> & p );
 
         template <typename Typ>
         friend void swap(Array<Typ> & a, Array<Typ> & b);
 
 
     protected:
 
         void setArraySize(uint new_size);
         void setArrayCapacity(uint new_capacity);
 
         T * m_buffer;
         uint m_capacity;
         uint m_size;
 
     };
 
 
 } // nv namespace
 
 #endif // NV_CORE_ARRAY_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.inl
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.inl	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Array.inl	(revision 23380)
@@ -1,438 +1,438 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_ARRAY_INL
 #define NV_CORE_ARRAY_INL
 
 #include "Array.h"
 
 #include "Stream.h"
 #include "Utils.h" // swap
 
 #include <string.h>	// memmove
 #include <new> // for placement new
 
 
 
 namespace nv 
 {
     template <typename T>
     NV_FORCEINLINE T & Array<T>::append()
     {
         uint old_size = m_size;
         uint new_size = m_size + 1;
 
         setArraySize(new_size);
 
         construct_range(m_buffer, new_size, old_size);
 
         return m_buffer[old_size]; // Return reference to last element.
     }
 
     // Push an element at the end of the vector.
     template <typename T>
     NV_FORCEINLINE void Array<T>::push_back( const T & val )
     {
 #if 1
         nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size);
 
         uint old_size = m_size;
         uint new_size = m_size + 1;
 
         setArraySize(new_size);
 
         construct_range(m_buffer, new_size, old_size, val);
 #else
         uint new_size = m_size + 1;
 
         if (new_size > m_capacity)
         {
             // @@ Is there any way to avoid this copy?
             // @@ Can we create a copy without side effects? Ie. without calls to constructor/destructor. Use alloca + memcpy?
             // @@ Assert instead of copy?
             const T copy(val);	// create a copy in case value is inside of this array.
 
             setArraySize(new_size);
 
             new (m_buffer+new_size-1) T(copy);
         }
         else
         {
             m_size = new_size;
             new(m_buffer+new_size-1) T(val);
         }
 #endif // 0/1
     }
     template <typename T>
     NV_FORCEINLINE void Array<T>::pushBack( const T & val )
     {
         push_back(val);
     }
     template <typename T>
     NV_FORCEINLINE Array<T> & Array<T>::append( const T & val )
     {
         push_back(val);
         return *this;
     }
 
     // Qt like push operator.
     template <typename T>
     NV_FORCEINLINE Array<T> & Array<T>::operator<< ( T & t )
     {
         push_back(t);
         return *this;
     }
 
     // Pop the element at the end of the vector.
     template <typename T>
     NV_FORCEINLINE void Array<T>::pop_back()
     {
         nvDebugCheck( m_size > 0 );
         resize( m_size - 1 );
     }
     template <typename T>
     NV_FORCEINLINE void Array<T>::popBack(uint count)
     {
         nvDebugCheck(m_size >= count);
         resize(m_size - count);
     }
 
     template <typename T>
     NV_FORCEINLINE void Array<T>::popFront(uint count)
     {
         nvDebugCheck(m_size >= count);
         //resize(m_size - count);
 
         if (m_size == count) {
             clear();
         }
         else {
             destroy_range(m_buffer, 0, count);
 
             memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count));
 
             m_size -= count;
         }
 
     }
 
 
     // Get back element.
     template <typename T>
     NV_FORCEINLINE const T & Array<T>::back() const
     {
         nvDebugCheck( m_size > 0 );
         return m_buffer[m_size-1];
     }
 
     // Get back element.
     template <typename T>
     NV_FORCEINLINE T & Array<T>::back()
     {
         nvDebugCheck( m_size > 0 );
         return m_buffer[m_size-1];
     }
 
     // Get front element.
     template <typename T>
     NV_FORCEINLINE const T & Array<T>::front() const
     {
         nvDebugCheck( m_size > 0 );
         return m_buffer[0];
     }
 
     // Get front element.
     template <typename T>
     NV_FORCEINLINE T & Array<T>::front()
     {
         nvDebugCheck( m_size > 0 );
         return m_buffer[0];
     }
 
     // Check if the given element is contained in the array.
     template <typename T>
     NV_FORCEINLINE bool Array<T>::contains(const T & e) const
     {
         return find(e, NULL);
     }
 
     // Return true if element found.
     template <typename T>
     NV_FORCEINLINE bool Array<T>::find(const T & element, uint * indexPtr) const
     {
         return find(element, 0, m_size, indexPtr);
     }
 
     // Return true if element found within the given range.
     template <typename T>
     NV_FORCEINLINE bool Array<T>::find(const T & element, uint begin, uint end, uint * indexPtr) const
     {
         return ::nv::find(element, m_buffer, begin, end, indexPtr);
     }
 
 
     // Remove the element at the given index. This is an expensive operation!
     template <typename T>
     void Array<T>::removeAt(uint index)
     {
         nvDebugCheck(index >= 0 && index < m_size);
 
         if (m_size == 1) {
             clear();
         }
         else {
             m_buffer[index].~T();
 
             memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index));
             m_size--;
         }
     }
 
     // Remove the first instance of the given element.
     template <typename T>
     bool Array<T>::remove(const T & element)
     {
         uint index;
         if (find(element, &index)) {
             removeAt(index);
             return true;
         }
         return false;
     }
 
     // Insert the given element at the given index shifting all the elements up.
     template <typename T>
     void Array<T>::insertAt(uint index, const T & val/*=T()*/)
     {
         nvDebugCheck( index >= 0 && index <= m_size );
 
         setArraySize(m_size + 1);
 
         if (index < m_size - 1) {
             memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index));
         }
 
         // Copy-construct into the newly opened slot.
         new(m_buffer+index) T(val);
     }
 
     // Append the given data to our vector.
     template <typename T>
     NV_FORCEINLINE void Array<T>::append(const Array<T> & other)
     {
         append(other.m_buffer, other.m_size);
     }
 
     // Append the given data to our vector.
     template <typename T>
     void Array<T>::append(const T other[], uint count)
     {
         if (count > 0) {
             const uint old_size = m_size;
 
             setArraySize(m_size + count);
 
             for (uint i = 0; i < count; i++ ) {
                 new(m_buffer + old_size + i) T(other[i]);
             }
         }
     }
 
 
     // Remove the given element by replacing it with the last one.
     template <typename T> 
     void Array<T>::replaceWithLast(uint index)
     {
         nvDebugCheck( index < m_size );
         nv::swap(m_buffer[index], back());      // @@ Is this OK when index == size-1?
         (m_buffer+m_size-1)->~T();
         m_size--;
     }
 
     // Resize the vector preserving existing elements.
     template <typename T> 
     void Array<T>::resize(uint new_size)
     {
         uint old_size = m_size;
 
         // Destruct old elements (if we're shrinking).
         destroy_range(m_buffer, new_size, old_size);
 
         setArraySize(new_size);
 
         // Call default constructors
         construct_range(m_buffer, new_size, old_size);
     }
 
 
     // Resize the vector preserving existing elements and initializing the
     // new ones with the given value.
     template <typename T> 
     void Array<T>::resize(uint new_size, const T & elem)
     {
         nvDebugCheck(&elem < m_buffer || &elem > m_buffer+m_size);
 
         uint old_size = m_size;
 
         // Destruct old elements (if we're shrinking).
         destroy_range(m_buffer, new_size, old_size);
 
         setArraySize(new_size);
 
         // Call copy constructors
         construct_range(m_buffer, new_size, old_size, elem);
     }
 
     // Fill array with the given value.
     template <typename T>
     void Array<T>::fill(const T & elem)
     {
         fill(m_buffer, m_size, elem);
     }
 
     // Clear the buffer.
     template <typename T> 
     NV_FORCEINLINE void Array<T>::clear()
     {
         nvDebugCheck(isValidPtr(m_buffer));
 
         // Destruct old elements
         destroy_range(m_buffer, 0, m_size);
 
         m_size = 0;
     }
 
     // Shrink the allocated vector.
     template <typename T> 
     NV_FORCEINLINE void Array<T>::shrink()
     {
         if (m_size < m_capacity) {
             setArrayCapacity(m_size);
         }
     }
 
     // Preallocate space.
     template <typename T> 
     NV_FORCEINLINE void Array<T>::reserve(uint desired_size)
     {
         if (desired_size > m_capacity) {
             setArrayCapacity(desired_size);
         }
     }
 
     // Copy elements to this array. Resizes it if needed.
     template <typename T>
     NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)
     {
 #if 1   // More simple, but maybe not be as efficient?
         destroy_range(m_buffer, 0, m_size);
 
         setArraySize(count);
 
         construct_range(m_buffer, count, 0, data);
 #else
         const uint old_size = m_size;
 
         destroy_range(m_buffer, count, old_size);
 
         setArraySize(count);
 
         copy_range(m_buffer, data, old_size);
 
         construct_range(m_buffer, count, old_size, data);
 #endif
     }
 
     // Assignment operator.
     template <typename T>
     NV_FORCEINLINE Array<T> & Array<T>::operator=( const Array<T> & a )
     {
         copy(a.m_buffer, a.m_size);
         return *this;
     }
 
     // Release ownership of allocated memory and returns pointer to it.
     template <typename T>
     T * Array<T>::release() {
         T * tmp = m_buffer;
         m_buffer = NULL;
         m_capacity = 0;
         m_size = 0;
         return tmp;
     }
 
 
 
     // Change array size.
     template <typename T> 
     inline void Array<T>::setArraySize(uint new_size) {
         m_size = new_size;
 
         if (new_size > m_capacity) {
             uint new_buffer_size;
             if (m_capacity == 0) {
                 // first allocation is exact
                 new_buffer_size = new_size;
             }
             else {
                 // following allocations grow array by 25%
                 new_buffer_size = new_size + (new_size >> 2);
             }
 
             setArrayCapacity( new_buffer_size );
         }
     }
 
     // Change array capacity.
     template <typename T> 
     inline void Array<T>::setArrayCapacity(uint new_capacity) {
         nvDebugCheck(new_capacity >= m_size);
 
         if (new_capacity == 0) {
             // free the buffer.
             if (m_buffer != NULL) {
                 free<T>(m_buffer);
                 m_buffer = NULL;
             }
         }
         else {
             // realloc the buffer
             m_buffer = realloc<T>(m_buffer, new_capacity);
         }
 
         m_capacity = new_capacity;
     }
 
     // Array serialization.
     template <typename Typ> 
     inline Stream & operator<< ( Stream & s, Array<Typ> & p )
     {
         if (s.isLoading()) {
             uint size;
             s << size;
             p.resize( size );
         }
         else {
             s << p.m_size;
         }
 
         for (uint i = 0; i < p.m_size; i++) {
             s << p.m_buffer[i];
         }
 
         return s;
     }
 
     // Swap the members of the two given vectors.
     template <typename Typ>
     inline void swap(Array<Typ> & a, Array<Typ> & b)
     {
         nv::swap(a.m_buffer, b.m_buffer);
         nv::swap(a.m_capacity, b.m_capacity);
         nv::swap(a.m_size, b.m_size);
     }
 
 
 } // nv namespace
 
 #endif // NV_CORE_ARRAY_INL
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp	(revision 23380)
@@ -1,1270 +1,1270 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #include "Debug.h"
 #include "Array.inl"
 #include "StrLib.h" // StringBuilder
 
 #include "StdStream.h" // fileOpen
 
 #include <stdlib.h>
 
 // Extern
 #if NV_OS_WIN32 //&& NV_CC_MSVC
 #   define WIN32_LEAN_AND_MEAN
 #   define VC_EXTRALEAN
 #   include <windows.h>
 #   include <direct.h>
 #   if NV_CC_MSVC
 #       include <crtdbg.h>
 #       if _MSC_VER < 1300
 #           define DECLSPEC_DEPRECATED
 // VC6: change this path to your Platform SDK headers
 #           include <dbghelp.h> // must be XP version of file
 //          include "M:\\dev7\\vs\\devtools\\common\\win32sdk\\include\\dbghelp.h"
 #       else
 // VC7: ships with updated headers
 #           include <dbghelp.h>
 #       endif
 #   endif
 #   pragma comment(lib,"dbghelp.lib")
 #endif
 
 #if NV_OS_XBOX
 #    include <Xtl.h>
 #    ifdef _DEBUG
 #        include <xbdm.h>
 #    endif //_DEBUG
 #endif //NV_OS_XBOX
 
 #if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
 #   include <signal.h>
 #endif
 
 #if NV_OS_UNIX
 #   include <unistd.h> // getpid
 #endif
 
 #if NV_OS_LINUX && defined(HAVE_EXECINFO_H)
 #   include <execinfo.h> // backtrace
 #   if NV_CC_GNUC // defined(HAVE_CXXABI_H)
 #       include <cxxabi.h>
 #   endif
 #endif
 
 #if NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD
 #   include <sys/types.h>
 #   include <sys/param.h>
 #   include <sys/sysctl.h> // sysctl
 #   if !defined(NV_OS_OPENBSD)
 #       include <sys/ucontext.h>
 #   endif
 #   if defined(HAVE_EXECINFO_H) // only after OSX 10.5
 #       include <execinfo.h> // backtrace
 #       if NV_CC_GNUC // defined(HAVE_CXXABI_H)
 #           include <cxxabi.h>
 #       endif
 #   endif
 #endif
 
 #if NV_OS_ORBIS
 #include <libdbg.h>
 #endif
 
 #define NV_USE_SEPARATE_THREAD 1
 
 
 using namespace nv;
 
 namespace 
 {
 
     static MessageHandler * s_message_handler = NULL;
     static AssertHandler * s_assert_handler = NULL;
 
     static bool s_sig_handler_enabled = false;
     static bool s_interactive = true;
 
 #if NV_OS_WIN32 && NV_CC_MSVC
 
     // Old exception filter.
     static LPTOP_LEVEL_EXCEPTION_FILTER s_old_exception_filter = NULL;
 
 #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
 
     // Old signal handlers.
     struct sigaction s_old_sigsegv;
     struct sigaction s_old_sigtrap;
     struct sigaction s_old_sigfpe;
     struct sigaction s_old_sigbus;
 
 #endif
 
 
 #if NV_OS_WIN32 && NV_CC_MSVC
 
     // We should try to simplify the top level filter as much as possible.
     // http://www.nynaeve.net/?p=128
 
 #if NV_USE_SEPARATE_THREAD
 
     // The critical section enforcing the requirement that only one exception be
     // handled by a handler at a time.
     static CRITICAL_SECTION s_handler_critical_section;
 
     // Semaphores used to move exception handling between the exception thread
     // and the handler thread.  handler_start_semaphore_ is signalled by the
     // exception thread to wake up the handler thread when an exception occurs.
     // handler_finish_semaphore_ is signalled by the handler thread to wake up
     // the exception thread when handling is complete.
     static HANDLE s_handler_start_semaphore = NULL;
     static HANDLE s_handler_finish_semaphore = NULL;
 
     // The exception handler thread.
     static HANDLE s_handler_thread = NULL;
 
     static DWORD s_requesting_thread_id = 0;
     static EXCEPTION_POINTERS * s_exception_info = NULL;
 
 #endif // NV_USE_SEPARATE_THREAD
 
 
     struct MinidumpCallbackContext {
         ULONG64 memory_base;
         ULONG memory_size;
         bool finished;
     };
 
     // static
     static BOOL CALLBACK miniDumpWriteDumpCallback(PVOID context, const PMINIDUMP_CALLBACK_INPUT callback_input, PMINIDUMP_CALLBACK_OUTPUT callback_output)
     {
         switch (callback_input->CallbackType)
         {
         case MemoryCallback: {
             MinidumpCallbackContext* callback_context = reinterpret_cast<MinidumpCallbackContext*>(context);
             if (callback_context->finished)
                 return FALSE;
 
             // Include the specified memory region.
             callback_output->MemoryBase = callback_context->memory_base;
             callback_output->MemorySize = callback_context->memory_size;
             callback_context->finished = true;
             return TRUE;
         }
 
         // Include all modules.
         case IncludeModuleCallback:
         case ModuleCallback:
             return TRUE;
 
         // Include all threads.
         case IncludeThreadCallback:
         case ThreadCallback:
             return TRUE;
 
         // Stop receiving cancel callbacks.
         case CancelCallback:
             callback_output->CheckCancel = FALSE;
             callback_output->Cancel = FALSE;
             return TRUE;
         }
 
         // Ignore other callback types.
         return FALSE;
     }
 
     static bool writeMiniDump(EXCEPTION_POINTERS * pExceptionInfo)
     {
         // create the file
         HANDLE hFile = CreateFileA("crash.dmp", GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
         if (hFile == INVALID_HANDLE_VALUE) {
             //nvDebug("*** Failed to create dump file.\n");
             return false;
         }
 
         MINIDUMP_EXCEPTION_INFORMATION * pExInfo = NULL;
         MINIDUMP_CALLBACK_INFORMATION * pCallback = NULL;
 
         if (pExceptionInfo != NULL) {
             MINIDUMP_EXCEPTION_INFORMATION ExInfo;
             ExInfo.ThreadId = ::GetCurrentThreadId();
             ExInfo.ExceptionPointers = pExceptionInfo;
             ExInfo.ClientPointers = NULL;
             pExInfo = &ExInfo;
 
             MINIDUMP_CALLBACK_INFORMATION callback;
             MinidumpCallbackContext context;
 
             // Find a memory region of 256 bytes centered on the
             // faulting instruction pointer.
             const ULONG64 instruction_pointer = 
             #if defined(_M_IX86)
                 pExceptionInfo->ContextRecord->Eip;
             #elif defined(_M_AMD64)
                 pExceptionInfo->ContextRecord->Rip;
             #else
                 #error Unsupported platform
             #endif
 
             MEMORY_BASIC_INFORMATION info;
             
             if (VirtualQuery(reinterpret_cast<LPCVOID>(instruction_pointer), &info, sizeof(MEMORY_BASIC_INFORMATION)) != 0 && info.State == MEM_COMMIT)
             {
                 // Attempt to get 128 bytes before and after the instruction
                 // pointer, but settle for whatever's available up to the
                 // boundaries of the memory region.
                 const ULONG64 kIPMemorySize = 256;
                 context.memory_base = max(reinterpret_cast<ULONG64>(info.BaseAddress), instruction_pointer - (kIPMemorySize / 2));
                 ULONG64 end_of_range = min(instruction_pointer + (kIPMemorySize / 2), reinterpret_cast<ULONG64>(info.BaseAddress) + info.RegionSize);
                 context.memory_size = static_cast<ULONG>(end_of_range - context.memory_base);
                 context.finished = false;
 
                 callback.CallbackRoutine = miniDumpWriteDumpCallback;
                 callback.CallbackParam = reinterpret_cast<void*>(&context);
                 pCallback = &callback;
             }
         }
 
         MINIDUMP_TYPE miniDumpType = (MINIDUMP_TYPE)(MiniDumpNormal|MiniDumpWithHandleData|MiniDumpWithThreadInfo);
 
         // write the dump
         BOOL ok = MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, miniDumpType, pExInfo, NULL, pCallback) != 0;
         CloseHandle(hFile);
 
         if (ok == FALSE) {
             //nvDebug("*** Failed to save dump file.\n");
             return false;
         }
 
         //nvDebug("\nDump file saved.\n");
 
         return true;
     }
 
 #if NV_USE_SEPARATE_THREAD
 
     static DWORD WINAPI ExceptionHandlerThreadMain(void* lpParameter) {
         nvDebugCheck(s_handler_start_semaphore != NULL);
         nvDebugCheck(s_handler_finish_semaphore != NULL);
 
         while (true) {
             if (WaitForSingleObject(s_handler_start_semaphore, INFINITE) == WAIT_OBJECT_0) {
                 writeMiniDump(s_exception_info);
 
                 // Allow the requesting thread to proceed.
                 ReleaseSemaphore(s_handler_finish_semaphore, 1, NULL);
             }
         }
 
         // This statement is not reached when the thread is unconditionally
         // terminated by the ExceptionHandler destructor.
         return 0;
     }
 
 #endif // NV_USE_SEPARATE_THREAD
 
     static bool hasStackTrace() {
         return true;
     }
 
     /*static NV_NOINLINE int backtrace(void * trace[], int maxcount) {
 
         // In Windows XP and Windows Server 2003, the sum of the FramesToSkip and FramesToCapture parameters must be less than 63.
         int xp_maxcount = min(63-1, maxcount);
 
         int count = RtlCaptureStackBackTrace(1, xp_maxcount, trace, NULL);
         nvDebugCheck(count <= maxcount);
 
         return count;
     }*/
 
     static NV_NOINLINE int backtraceWithSymbols(CONTEXT * ctx, void * trace[], int maxcount, int skip = 0) {
         
         // Init the stack frame for this function
         STACKFRAME64 stackFrame = { 0 };
 
     #if NV_CPU_X86_64
         DWORD dwMachineType = IMAGE_FILE_MACHINE_AMD64;
         stackFrame.AddrPC.Offset = ctx->Rip;
         stackFrame.AddrFrame.Offset = ctx->Rbp;
         stackFrame.AddrStack.Offset = ctx->Rsp;
     #elif NV_CPU_X86
         DWORD dwMachineType = IMAGE_FILE_MACHINE_I386;
         stackFrame.AddrPC.Offset = ctx->Eip;
         stackFrame.AddrFrame.Offset = ctx->Ebp;
         stackFrame.AddrStack.Offset = ctx->Esp;
     #else
         #error "Platform not supported!"
     #endif
         stackFrame.AddrPC.Mode = AddrModeFlat;
         stackFrame.AddrFrame.Mode = AddrModeFlat;
         stackFrame.AddrStack.Mode = AddrModeFlat;
 
         // Walk up the stack
         const HANDLE hThread = GetCurrentThread();
         const HANDLE hProcess = GetCurrentProcess();
         int i;
         for (i = 0; i < maxcount; i++)
         {
             // walking once first makes us skip self
             if (!StackWalk64(dwMachineType, hProcess, hThread, &stackFrame, ctx, NULL, &SymFunctionTableAccess64, &SymGetModuleBase64, NULL)) {
                 break;
             }
 
             /*if (stackFrame.AddrPC.Offset == stackFrame.AddrReturn.Offset || stackFrame.AddrPC.Offset == 0) {
                 break;
             }*/
 
             if (i >= skip) {
                 trace[i - skip] = (PVOID)stackFrame.AddrPC.Offset;
             }
         }
 
         return i - skip;
     }
 
 #pragma warning(push)
 #pragma warning(disable:4748)
     static NV_NOINLINE int backtrace(void * trace[], int maxcount) {
         CONTEXT ctx = { 0 };
 #if NV_CPU_X86 && !NV_CPU_X86_64
         ctx.ContextFlags = CONTEXT_CONTROL;
         _asm {
              call x
           x: pop eax
              mov ctx.Eip, eax
              mov ctx.Ebp, ebp
              mov ctx.Esp, esp
         }
 #else
         RtlCaptureContext(&ctx); // Not implemented correctly in x86.
 #endif
 
         return backtraceWithSymbols(&ctx, trace, maxcount, 1);
     }
 #pragma warning(pop)
 
     static NV_NOINLINE void writeStackTrace(void * trace[], int size, int start, Array<const char *> & lines)
     {
         StringBuilder builder(512);
 
         HANDLE hProcess = GetCurrentProcess();
         
         // Resolve PC to function names
         for (int i = start; i < size; i++)
         {
             // Check for end of stack walk
             DWORD64 ip = (DWORD64)trace[i];
             if (ip == NULL)
                 break;
 
             // Get function name
             #define MAX_STRING_LEN  (512)
             unsigned char byBuffer[sizeof(IMAGEHLP_SYMBOL64) + MAX_STRING_LEN] = { 0 };
             IMAGEHLP_SYMBOL64 * pSymbol = (IMAGEHLP_SYMBOL64*)byBuffer;
             pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
             pSymbol->MaxNameLength = MAX_STRING_LEN;
 
             DWORD64 dwDisplacement;
             
             if (SymGetSymFromAddr64(hProcess, ip, &dwDisplacement, pSymbol))
             {
                 pSymbol->Name[MAX_STRING_LEN-1] = 0;
                 
                 /*
                 // Make the symbol readable for humans
                 UnDecorateSymbolName( pSym->Name, lpszNonUnicodeUnDSymbol, BUFFERSIZE, 
                     UNDNAME_COMPLETE | 
                     UNDNAME_NO_THISTYPE |
                     UNDNAME_NO_SPECIAL_SYMS |
                     UNDNAME_NO_MEMBER_TYPE |
                     UNDNAME_NO_MS_KEYWORDS |
                     UNDNAME_NO_ACCESS_SPECIFIERS );
                 */
                 
                 // pSymbol->Name
                 const char * pFunc = pSymbol->Name;
 
                 // Get file/line number
                 IMAGEHLP_LINE64 theLine = { 0 };
                 theLine.SizeOfStruct = sizeof(theLine);
 
                 DWORD dwDisplacement;
                 if (!SymGetLineFromAddr64(hProcess, ip, &dwDisplacement, &theLine))
                 {
                     // Do not print unknown symbols anymore.
                     break;
                     //builder.format("unknown(%08X) : %s\n", (uint32)ip, pFunc);
                 }
                 else
                 {
                     /*
                     const char* pFile = strrchr(theLine.FileName, '\\');
                     if ( pFile == NULL ) pFile = theLine.FileName;
                     else pFile++;
                     */
                     const char * pFile = theLine.FileName;
                     
                     int line = theLine.LineNumber;
                     
                     builder.format("%s(%d) : %s\n", pFile, line, pFunc);
                 }
 
                 lines.append(builder.release());
 
                 if (pFunc != NULL && strcmp(pFunc, "WinMain") == 0) {
                     break;
                 }
             }
         }
     }
 
 
     // Write mini dump and print stack trace.
     static LONG WINAPI handleException(EXCEPTION_POINTERS * pExceptionInfo)
     {
         EnterCriticalSection(&s_handler_critical_section);
 #if NV_USE_SEPARATE_THREAD
         s_requesting_thread_id = GetCurrentThreadId();
         s_exception_info = pExceptionInfo;
 
         // This causes the handler thread to call writeMiniDump.
         ReleaseSemaphore(s_handler_start_semaphore, 1, NULL);
 
         // Wait until WriteMinidumpWithException is done and collect its return value.
         WaitForSingleObject(s_handler_finish_semaphore, INFINITE);
         //bool status = s_handler_return_value;
 
         // Clean up.
         s_requesting_thread_id = 0;
         s_exception_info = NULL;
 #else
         // First of all, write mini dump.
         writeMiniDump(pExceptionInfo);
 #endif
         LeaveCriticalSection(&s_handler_critical_section);
 
         nvDebug("\nDump file saved.\n");
 
         // Try to attach to debugger.
         if (s_interactive && debug::attachToDebugger()) {
             nvDebugBreak();
             return EXCEPTION_CONTINUE_EXECUTION;
         }
 
         // If that fails, then try to pretty print a stack trace and terminate.
         void * trace[64];
         
         int size = backtraceWithSymbols(pExceptionInfo->ContextRecord, trace, 64);
 
         // @@ Use win32's CreateFile?
         FILE * fp = fileOpen("crash.txt", "wb");
         if (fp != NULL) {
             Array<const char *> lines;
             writeStackTrace(trace, size, 0, lines);
 
             for (uint i = 0; i < lines.count(); i++) {
                 fputs(lines[i], fp);
                 delete lines[i];
             }
 
             // @@ Add more info to crash.txt?
 
             fclose(fp);
         }
 
         // This should terminate the process and set the error exit code.
         TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 2);
 
         return EXCEPTION_EXECUTE_HANDLER;   // Terminate app. In case terminate process did not succeed.
     }
 
     static void handlePureVirtualCall() {
         nvDebugBreak();
         TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
     }
 
     static void handleInvalidParameter(const wchar_t * wexpresion, const wchar_t * wfunction, const wchar_t * wfile, unsigned int line, uintptr_t reserved) {
 
         size_t convertedCharCount = 0;
         
         StringBuilder expresion;
         if (wexpresion != NULL) {
             uint size = U32(wcslen(wexpresion) + 1);
             expresion.reserve(size);
             wcstombs_s(&convertedCharCount, expresion.str(), size, wexpresion, _TRUNCATE);
         }
 
         StringBuilder file;
         if (wfile != NULL) {
             uint size = U32(wcslen(wfile) + 1);
             file.reserve(size);
             wcstombs_s(&convertedCharCount, file.str(), size, wfile, _TRUNCATE);
         }
 
         StringBuilder function;
         if (wfunction != NULL) {
             uint size = U32(wcslen(wfunction) + 1);
             function.reserve(size);
             wcstombs_s(&convertedCharCount, function.str(), size, wfunction, _TRUNCATE);
         }
         
         int result = nvAbort(expresion.str(), file.str(), line, function.str());
         if (result == NV_ABORT_DEBUG) {
             nvDebugBreak();
         } 
     }
 
 #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN
 
 #if defined(HAVE_EXECINFO_H)
 
     static bool hasStackTrace() {
         return true;
     }
 
 
     static void writeStackTrace(void * trace[], int size, int start, Array<const char *> & lines) {
         StringBuilder builder(512);
         char ** string_array = backtrace_symbols(trace, size);
 
         for(int i = start; i < size-1; i++ ) {
 #       if NV_CC_GNUC // defined(HAVE_CXXABI_H)
             // @@ Write a better parser for the possible formats.
             char * begin = strchr(string_array[i], '(');
             char * end = strrchr(string_array[i], '+');
             char * module = string_array[i];
 
             if (begin == 0 && end != 0) {
                 *(end - 1) = '\0';
                 begin = strrchr(string_array[i], ' ');
                 module = NULL; // Ignore module.
             }
 
             if (begin != 0 && begin < end) {
                 int stat;
                 *end = '\0';
                 *begin = '\0';
                 char * name = abi::__cxa_demangle(begin+1, 0, 0, &stat);
                 if (module == NULL) {
                     if (name == NULL || stat != 0) {
                         builder.format("  In: '%s'\n", begin+1);
                     }
                     else {
                         builder.format("  In: '%s'\n", name);
                     }
                 }
                 else {
                     if (name == NULL || stat != 0) {
                         builder.format("  In: [%s] '%s'\n", module, begin+1);
                     }
                     else {
                         builder.format("  In: [%s] '%s'\n", module, name);
                     }
                 }
                 free(name);
             }
             else {
                 builder.format("  In: '%s'\n", string_array[i]);
             }
 #       else
             builder.format("  In: '%s'\n", string_array[i]);
 #       endif
             lines.append(builder.release());
         }
 
         free(string_array);
     }
 
     static void printStackTrace(void * trace[], int size, int start=0) {
         nvDebug( "\nDumping stacktrace:\n" );
 
         Array<const char *> lines;
         writeStackTrace(trace, size, 1, lines);
 
         for (uint i = 0; i < lines.count(); i++) {
             nvDebug("%s", lines[i]);
             delete lines[i];
         }
 
         nvDebug("\n");
     }
 
 #endif // defined(HAVE_EXECINFO_H)
 
     static void * callerAddress(void * secret)
     {
 #if NV_OS_DARWIN
 #  if defined(_STRUCT_MCONTEXT)
 #    if NV_CPU_PPC
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->__ss.__srr0;
 #    elif NV_CPU_X86_64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->__ss.__rip;
 #    elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->__ss.__eip;
 #    elif NV_CPU_ARM
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->__ss.__pc;
 #    else
 #      error "Unknown CPU"
 #    endif
 #  else
 #    if NV_CPU_PPC
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->ss.srr0;
 #    elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->ss.eip;
 #    else
 #      error "Unknown CPU"
 #    endif
 #  endif
 #elif NV_OS_FREEBSD
 #  if NV_CPU_X86_64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.mc_rip;
 #  elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.mc_eip;
 #    else
 #      error "Unknown CPU"
 #    endif
 #elif NV_OS_NETBSD
 #  if NV_CPU_X86_64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.__gregs[_REG_RIP];
 #  elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.__gregs[_REG_EIP];
 #  elif NV_CPU_PPC
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext.__gregs[_REG_PC];
 #  else
 #      error "Unknown CPU"
 #  endif
 #elif NV_OS_OPENBSD
 #  if NV_CPU_X86_64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->sc_rip;
 #  elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->sc_eip;
 #  else
 #       error "Unknown CPU"
 #  endif        
 #else
 #  if NV_CPU_X86_64
         // #define REG_RIP REG_INDEX(rip) // seems to be 16
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.gregs[REG_RIP];
 #  elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
 #  elif NV_CPU_PPC
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext.regs->nip;
 #    elif NV_CPU_AARCH64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext.pc;
 #    else
 #      error "Unknown CPU"
 #    endif
 #endif
 
         // How to obtain the instruction pointers in different platforms, from mlton's source code.
         // http://mlton.org/
         // OpenBSD
         // ucp->sc_eip
         // FreeBSD:
         // ucp->uc_mcontext.mc_eip
         // HPUX:
         // ucp->uc_link
         // Solaris:
         // ucp->uc_mcontext.gregs[REG_PC]
         // Linux hppa:
         // uc->uc_mcontext.sc_iaoq[0] & ~0x3UL
         // Linux sparc:
         // ((struct sigcontext*) secret)->sigc_regs.tpc
         // Linux sparc64:
         // ((struct sigcontext*) secret)->si_regs.pc
 
         // potentially correct for other archs:
         // Linux alpha: ucp->m_context.sc_pc
         // Linux arm: ucp->m_context.ctx.arm_pc
         // Linux ia64: ucp->m_context.sc_ip & ~0x3UL
         // Linux mips: ucp->m_context.sc_pc
         // Linux s390: ucp->m_context.sregs->regs.psw.addr
     }
 
     static void nvSigHandler(int sig, siginfo_t *info, void *secret)
     {
         void * pnt = callerAddress(secret);
 
         // Do something useful with siginfo_t
         if (sig == SIGSEGV) {
             if (pnt != NULL) nvDebug("Got signal %d, faulty address is %p, from %p\n", sig, info->si_addr, pnt);
             else nvDebug("Got signal %d, faulty address is %p\n", sig, info->si_addr);
         }
         else if(sig == SIGTRAP) {
             nvDebug("Breakpoint hit.\n");
         }
         else {
             nvDebug("Got signal %d\n", sig);
         }
 
 #if defined(HAVE_EXECINFO_H)
         if (hasStackTrace()) // in case of weak linking
         {
             void * trace[64];
             int size = backtrace(trace, 64);
 
             if (pnt != NULL) {
                 // Overwrite sigaction with caller's address.
                 trace[1] = pnt;
             }
 
             printStackTrace(trace, size, 1);
         }
 #endif // defined(HAVE_EXECINFO_H)
 
         exit(0);
     }
 
 #endif // defined(HAVE_SIGNAL_H)
 
 
 
 #if NV_OS_WIN32 //&& NV_CC_MSVC
 
     /** Win32 assert handler. */
     struct Win32AssertHandler : public AssertHandler 
     {
         // Flush the message queue. This is necessary for the message box to show up.
         static void flushMessageQueue()
         {
             MSG msg;
             while( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) {
                 //if( msg.message == WM_QUIT ) break;
                 TranslateMessage( &msg );
                 DispatchMessage( &msg );
             }
         }
 
         // Assert handler method.
         virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
         {
             int ret = NV_ABORT_EXIT;
 
             StringBuilder error_string;
             error_string.format("*** Assertion failed: %s\n    On file: %s\n    On line: %d\n", exp, file, line );
             if (func != NULL) {
                 error_string.appendFormat("    On function: %s\n", func);
             }
             if (msg != NULL) {
                 error_string.append("    Message: ");
                 va_list tmp;
                 va_copy(tmp, arg);
                 error_string.appendFormatList(msg, tmp);
                 va_end(tmp);
                 error_string.append("\n");
             }
             nvDebug( error_string.str() );
 
             // Print stack trace:
             debug::dumpInfo();
 
             if (debug::isDebuggerPresent()) {
                 return NV_ABORT_DEBUG;
             }
 
             if (s_interactive) {
                 flushMessageQueue();
                 int action = MessageBoxA(NULL, error_string.str(), "Assertion failed", MB_ABORTRETRYIGNORE | MB_ICONERROR | MB_TOPMOST);
                 switch( action ) {
                 case IDRETRY:
                     ret = NV_ABORT_DEBUG;
                     break;
                 case IDIGNORE:
                     ret = NV_ABORT_IGNORE;
                     break;
                 case IDABORT:
                 default:
                     ret = NV_ABORT_EXIT;
                     break;
                 }
                 /*if( _CrtDbgReport( _CRT_ASSERT, file, line, module, exp ) == 1 ) {
                     return NV_ABORT_DEBUG;
                 }*/
             }
 
             if (ret == NV_ABORT_EXIT) {
                 // Exit cleanly.
                 exit(EXIT_FAILURE + 1);
             }
 
             return ret;
         }
     };
 #elif NV_OS_XBOX
 
     /** Xbox360 assert handler. */
     struct Xbox360AssertHandler : public AssertHandler 
     {
         // Assert handler method.
         virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
         {
             int ret = NV_ABORT_EXIT;
 
             StringBuilder error_string;
             if( func != NULL ) {
                 error_string.format( "*** Assertion failed: %s\n    On file: %s\n    On function: %s\n    On line: %d\n ", exp, file, func, line );
                 nvDebug( error_string.str() );
             }
             else {
                 error_string.format( "*** Assertion failed: %s\n    On file: %s\n    On line: %d\n ", exp, file, line );
                 nvDebug( error_string.str() );
             }
 
             if (debug::isDebuggerPresent()) {
                 return NV_ABORT_DEBUG;
             }
 
             if( ret == NV_ABORT_EXIT ) {
                  // Exit cleanly.
                 exit(EXIT_FAILURE + 1);
             }
 
             return ret;
         }
     };
 #elif NV_OS_ORBIS
 
     /** Orbis assert handler. */
     struct OrbisAssertHandler : public AssertHandler
     {
         // Assert handler method.
         virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
         {
             if( func != NULL ) {
                 nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On function: %s\n    On line: %d\n ", exp, file, func, line );
             }
             else {
                 nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On line: %d\n ", exp, file, line );
             }
 
             //SBtodoORBIS print stack trace
             /*if (hasStackTrace())
             {
                 void * trace[64];
                 int size = backtrace(trace, 64);
                 printStackTrace(trace, size, 2);
             }*/
             
             if (debug::isDebuggerPresent())
                 return NV_ABORT_DEBUG;
 
             return NV_ABORT_IGNORE;
         }
     };
 
 #else
 
     /** Unix assert handler. */
     struct UnixAssertHandler : public AssertHandler
     {
         // Assert handler method.
         virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
         {
             int ret = NV_ABORT_EXIT;            
             
             if( func != NULL ) {
                 nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On function: %s\n    On line: %d\n ", exp, file, func, line );
             }
             else {
                 nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On line: %d\n ", exp, file, line );
             }
 
 #if _DEBUG
             if (debug::isDebuggerPresent()) {
                 return NV_ABORT_DEBUG;
             }
 #endif
 
 #if defined(HAVE_EXECINFO_H)
             if (hasStackTrace())
             {
                 void * trace[64];
                 int size = backtrace(trace, 64);
                 printStackTrace(trace, size, 2);
             }
 #endif
 
             if( ret == NV_ABORT_EXIT ) {
                 // Exit cleanly.
                 exit(EXIT_FAILURE + 1);
             }
             
             return ret;
         }
     };
 
 #endif
 
 } // namespace
 
 
 /// Handle assertion through the assert handler.
 int nvAbort(const char * exp, const char * file, int line, const char * func/*=NULL*/, const char * msg/*= NULL*/, ...)
 {
 #if NV_OS_WIN32 //&& NV_CC_MSVC
     static Win32AssertHandler s_default_assert_handler;
 #elif NV_OS_XBOX
     static Xbox360AssertHandler s_default_assert_handler;
 #elif NV_OS_ORBIS
     static OrbisAssertHandler s_default_assert_handler;
 #else
     static UnixAssertHandler s_default_assert_handler;
 #endif
 
     va_list arg;
     va_start(arg,msg);
 
     AssertHandler * handler = s_assert_handler != NULL ? s_assert_handler : &s_default_assert_handler;
     int result = handler->assertion(exp, file, line, func, msg, arg);
 
     va_end(arg);
 
     return result;
 }
 
 // Abnormal termination. Create mini dump and output call stack.
 void debug::terminate(int code)
 {
 #if NV_OS_WIN32
     EnterCriticalSection(&s_handler_critical_section);
 
     writeMiniDump(NULL);
 
     const int max_stack_size = 64;
     void * trace[max_stack_size];
     int size = backtrace(trace, max_stack_size);
 
     // @@ Use win32's CreateFile?
     FILE * fp = fileOpen("crash.txt", "wb");
     if (fp != NULL) {
         Array<const char *> lines;
         writeStackTrace(trace, size, 0, lines);
 
         for (uint i = 0; i < lines.count(); i++) {
             fputs(lines[i], fp);
             delete lines[i];
         }
 
         // @@ Add more info to crash.txt?
 
         fclose(fp);
     }
 
     LeaveCriticalSection(&s_handler_critical_section);
 #endif
 
     exit(code);
 }
 
 
 /// Shows a message through the message handler.
 void NV_CDECL nvDebugPrint(const char *msg, ...)
 {
     va_list arg;
     va_start(arg,msg);
     if (s_message_handler != NULL) {
         s_message_handler->log( msg, arg );
     }
     va_end(arg);
 }
 
 
 /// Dump debug info.
 void debug::dumpInfo()
 {
 #if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H))
     if (hasStackTrace())
     {
         void * trace[64];
         int size = backtrace(trace, 64);
 
         nvDebug( "\nDumping stacktrace:\n" );
 
         Array<const char *> lines;
         writeStackTrace(trace, size, 1, lines);
 
         for (uint i = 0; i < lines.count(); i++) {
             nvDebug("%s", lines[i]);
             delete lines[i];
         }
     }
 #endif
 }
 
 /// Dump callstack using the specified handler.
 void debug::dumpCallstack(MessageHandler *messageHandler, int callstackLevelsToSkip /*= 0*/)
 {
 #if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H))
     if (hasStackTrace())
     {
         void * trace[64];
         int size = backtrace(trace, 64);
 
         Array<const char *> lines;
         writeStackTrace(trace, size, callstackLevelsToSkip + 1, lines);     // + 1 to skip the call to dumpCallstack
 
         for (uint i = 0; i < lines.count(); i++) {
             messageHandler->log(lines[i], NULL);
             delete lines[i];
         }
     }
 #endif
 }
 
 
 /// Set the debug message handler.
 void debug::setMessageHandler(MessageHandler * message_handler)
 {
     s_message_handler = message_handler;
 }
 
 /// Reset the debug message handler.
 void debug::resetMessageHandler()
 {
     s_message_handler = NULL;
 }
 
 /// Set the assert handler.
 void debug::setAssertHandler(AssertHandler * assert_handler)
 {
     s_assert_handler = assert_handler;
 }
 
 /// Reset the assert handler.
 void debug::resetAssertHandler()
 {
     s_assert_handler = NULL;
 }
 
 #if NV_OS_WIN32
 #if NV_USE_SEPARATE_THREAD
 
 static void initHandlerThread()
 {
     static const int kExceptionHandlerThreadInitialStackSize = 64 * 1024;
 
     // Set synchronization primitives and the handler thread.  Each
     // ExceptionHandler object gets its own handler thread because that's the
     // only way to reliably guarantee sufficient stack space in an exception,
     // and it allows an easy way to get a snapshot of the requesting thread's
     // context outside of an exception.
     InitializeCriticalSection(&s_handler_critical_section);
     
     s_handler_start_semaphore = CreateSemaphore(NULL, 0, 1, NULL);
     nvDebugCheck(s_handler_start_semaphore != NULL);
 
     s_handler_finish_semaphore = CreateSemaphore(NULL, 0, 1, NULL);
     nvDebugCheck(s_handler_finish_semaphore != NULL);
 
     // Don't attempt to create the thread if we could not create the semaphores.
     if (s_handler_finish_semaphore != NULL && s_handler_start_semaphore != NULL) {
         DWORD thread_id;
         s_handler_thread = CreateThread(NULL,         // lpThreadAttributes
                                         kExceptionHandlerThreadInitialStackSize,
                                         ExceptionHandlerThreadMain,
                                         NULL,         // lpParameter
                                         0,            // dwCreationFlags
                                         &thread_id);
         nvDebugCheck(s_handler_thread != NULL);
     }
 
     /* @@ We should avoid loading modules in the exception handler!
     dbghelp_module_ = LoadLibrary(L"dbghelp.dll");
     if (dbghelp_module_) {
         minidump_write_dump_ = reinterpret_cast<MiniDumpWriteDump_type>(GetProcAddress(dbghelp_module_, "MiniDumpWriteDump"));
     }
     */
 }
 
 static void shutHandlerThread() {
     // @@ Free stuff. Terminate thread.
 }
 
 #endif // NV_USE_SEPARATE_THREAD
 #endif // NV_OS_WIN32
 
 
 // Enable signal handler.
 void debug::enableSigHandler(bool interactive)
 {
     nvCheck(s_sig_handler_enabled != true);
     s_sig_handler_enabled = true;
     s_interactive = interactive;
 
 #if NV_OS_WIN32 && NV_CC_MSVC
     if (interactive) {
         // Do not display message boxes on error.
         // http://msdn.microsoft.com/en-us/library/windows/desktop/ms680621(v=vs.85).aspx
         SetErrorMode(SEM_FAILCRITICALERRORS|SEM_NOGPFAULTERRORBOX|SEM_NOOPENFILEERRORBOX);
 
         // CRT reports errors to debug output only.
         // http://msdn.microsoft.com/en-us/library/1y71x448(v=vs.80).aspx
         _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_DEBUG);
         _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_DEBUG);
         _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_DEBUG);
     }
 
 
 #if NV_USE_SEPARATE_THREAD
     initHandlerThread();
 #endif
 
     s_old_exception_filter = ::SetUnhandledExceptionFilter( handleException );
 
 #if _MSC_VER >= 1400  // MSVC 2005/8
     _set_invalid_parameter_handler(handleInvalidParameter);
 #endif  // _MSC_VER >= 1400
 
     _set_purecall_handler(handlePureVirtualCall);
 
 
     // SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces
     SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_FAIL_CRITICAL_ERRORS|SYMOPT_LOAD_LINES|SYMOPT_UNDNAME);
 
     if (!SymInitialize(GetCurrentProcess(), NULL, TRUE)) {
         DWORD error = GetLastError();
         nvDebug("SymInitialize returned error : %d\n", error);
     }
 
 #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
 
     // Install our signal handler
     struct sigaction sa;
     sa.sa_sigaction = nvSigHandler;
     sigemptyset (&sa.sa_mask);
     sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
 
     sigaction(SIGSEGV, &sa, &s_old_sigsegv);
     sigaction(SIGTRAP, &sa, &s_old_sigtrap);
     sigaction(SIGFPE, &sa, &s_old_sigfpe);
     sigaction(SIGBUS, &sa, &s_old_sigbus);
 
 #endif
 }
 
 /// Disable signal handler.
 void debug::disableSigHandler()
 {
     nvCheck(s_sig_handler_enabled == true);
     s_sig_handler_enabled = false;
 
 #if NV_OS_WIN32 && NV_CC_MSVC
 
     ::SetUnhandledExceptionFilter( s_old_exception_filter );
     s_old_exception_filter = NULL;
 
     SymCleanup(GetCurrentProcess());
 
 #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
 
     sigaction(SIGSEGV, &s_old_sigsegv, NULL);
     sigaction(SIGTRAP, &s_old_sigtrap, NULL);
     sigaction(SIGFPE, &s_old_sigfpe, NULL);
     sigaction(SIGBUS, &s_old_sigbus, NULL);
 
 #endif
 }
 
 
 bool debug::isDebuggerPresent()
 {
 #if NV_OS_WIN32
     HINSTANCE kernel32 = GetModuleHandleA("kernel32.dll");
     if (kernel32) {
         FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent");
         if (IsDebuggerPresent != NULL && IsDebuggerPresent()) {
             return true;
         }
     }
     return false;
 #elif NV_OS_XBOX
 #ifdef _DEBUG
     return DmIsDebuggerPresent() == TRUE;
 #else
     return false;
 #endif
 #elif NV_OS_ORBIS
   #if PS4_FINAL_REQUIREMENTS
     return false; 
   #else
     return sceDbgIsDebuggerAttached() == 1;
   #endif
 #elif NV_OS_DARWIN
     int mib[4];
     struct kinfo_proc info;
     size_t size;
     mib[0] = CTL_KERN;
     mib[1] = KERN_PROC;
     mib[2] = KERN_PROC_PID;
     mib[3] = getpid();
     size = sizeof(info);
     info.kp_proc.p_flag = 0;
     sysctl(mib,4,&info,&size,NULL,0);
     return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
 #else
     // if ppid != sid, some process spawned our app, probably a debugger. 
     return getsid(getpid()) != getppid();
 #endif
 }
 
 bool debug::attachToDebugger()
 {
 #if NV_OS_WIN32
     if (isDebuggerPresent() == FALSE) {
         Path process(1024);
         process.copy("\"");
         GetSystemDirectoryA(process.str() + 1, 1024 - 1);
 
         process.appendSeparator();
 
         process.appendFormat("VSJitDebugger.exe\" -p %lu", ::GetCurrentProcessId());
 
         STARTUPINFOA sSi;
         memset(&sSi, 0, sizeof(sSi));
 
         PROCESS_INFORMATION sPi;
         memset(&sPi, 0, sizeof(sPi));
         
         BOOL b = CreateProcessA(NULL, process.str(), NULL, NULL, FALSE, 0, NULL, NULL, &sSi, &sPi);
         if (b != FALSE) {
             ::WaitForSingleObject(sPi.hProcess, INFINITE);
             
             DWORD dwExitCode;
             ::GetExitCodeProcess(sPi.hProcess, &dwExitCode);
             if (dwExitCode != 0) //if exit code is zero, a debugger was selected
                 b = FALSE;
         }
 
         if (sPi.hThread != NULL) ::CloseHandle(sPi.hThread);
         if (sPi.hProcess != NULL) ::CloseHandle(sPi.hProcess);
 
         if (b == FALSE)
             return false;
 
         for (int i = 0; i < 5*60; i++) {
             if (isDebuggerPresent())
                 break;
             ::Sleep(200);
         }
     }
 #endif // NV_OS_WIN32
 
     return true;
 }
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h	(revision 23380)
@@ -1,217 +1,217 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_DEBUG_H
 #define NV_CORE_DEBUG_H
 
 #include "nvcore.h"
 
 #include <stdarg.h> // va_list
 
 
 // Make sure we are using our assert.
 #undef assert
 
 #define NV_ABORT_DEBUG      1
 #define NV_ABORT_IGNORE     2
 #define NV_ABORT_EXIT       3
 
 #define nvNoAssert(exp) \
     NV_MULTI_LINE_MACRO_BEGIN \
     (void)sizeof(exp); \
     NV_MULTI_LINE_MACRO_END
 
 #if NV_NO_ASSERT
 
 #   define nvAssert(exp) nvNoAssert(exp)
 #   define nvCheck(exp) nvNoAssert(exp)
 #   define nvDebugAssert(exp) nvNoAssert(exp)
 #   define nvDebugCheck(exp) nvNoAssert(exp)
 #   define nvDebugBreak() nvNoAssert(0)
 
 #else // NV_NO_ASSERT
 
 #   if NV_CC_MSVC
         // @@ Does this work in msvc-6 and earlier?
 #       define nvDebugBreak()       __debugbreak()
 //#       define nvDebugBreak()        __asm { int 3 }
 #   elif NV_OS_ORBIS
 #       define nvDebugBreak()       __debugbreak()
 #   elif NV_CC_GNUC
 #       define nvDebugBreak()       __builtin_trap()
 #   else
 #       error "No nvDebugBreak()!"
 #   endif
 
 /*
 #   elif NV_CC_GNUC || NV_CPU_PPC && NV_OS_DARWIN
         // @@ Use __builtin_trap() on GCC
 #       define nvDebugBreak()       __asm__ volatile ("trap")
 #   elif (NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64) && NV_OS_DARWIN
 #       define nvDebugBreak()       __asm__ volatile ("int3")
 #   elif NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64
 #       define nvDebugBreak()       __asm__ ( "int %0" : :"I"(3) )
 #   else
 #       include <signal.h>
 #       define nvDebugBreak()       raise(SIGTRAP)
 #   endif
 */
 
 #define nvDebugBreakOnce() \
     NV_MULTI_LINE_MACRO_BEGIN \
     static bool firstTime = true; \
     if (firstTime) { firstTime = false; nvDebugBreak(); } \
     NV_MULTI_LINE_MACRO_END
 
 #define nvAssertMacro(exp) \
     NV_MULTI_LINE_MACRO_BEGIN \
     if (!(exp)) { \
         if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \
             nvDebugBreak(); \
         } \
     } \
     NV_MULTI_LINE_MACRO_END
 
 // GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care
 #define nvAssertMacroWithIgnoreAll(exp,...) \
     NV_MULTI_LINE_MACRO_BEGIN \
         static bool ignoreAll = false; \
         if (!ignoreAll && !(exp)) { \
             int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \
             if (result == NV_ABORT_DEBUG) { \
                 nvDebugBreak(); \
             } else if (result == NV_ABORT_IGNORE) { \
                 ignoreAll = true; \
             } \
         } \
     NV_MULTI_LINE_MACRO_END
 
 // Interesting assert macro from Insomniac:
 // http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to
 // Used as follows:
 // if (nvCheck(i < count)) {
 //     normal path
 // } else {
 //     fixup code.
 // }
 // This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely.
 #define nvCheckMacro(exp) \
     (\
         (exp) ? true : ( \
             (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \
         ) \
     )
 
 
 #define nvAssert(exp)    nvAssertMacro(exp)
 #define nvCheck(exp)     nvAssertMacro(exp)
 
 #if defined(_DEBUG)
 #   define nvDebugAssert(exp)   nvAssertMacro(exp)
 #   define nvDebugCheck(exp)    nvAssertMacro(exp)
 #else // _DEBUG
 #   define nvDebugAssert(exp)   nvNoAssert(exp)
 #   define nvDebugCheck(exp)    nvNoAssert(exp)
 #endif // _DEBUG
 
 #endif // NV_NO_ASSERT
 
 // Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc.
 /*#if !defined(_DEBUG)
 #   if NV_CC_MSVC
 #       define nvAssume(exp)    __assume(exp)
 #   else
 #       define nvAssume(exp)    nvCheck(exp)
 #   endif
 #else
 #   define nvAssume(exp)    nvCheck(exp)
 #endif*/
 
 #if defined(_DEBUG)
 #  if NV_CC_MSVC
 #   define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0)
 #  else
 #   define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable()
 #  endif
 #else
 #  if NV_CC_MSVC
 #   define nvUnreachable() __assume(0)
 #  else
 #   define nvUnreachable() __builtin_unreachable()
 #  endif
 #endif
 
 
 #define nvError(x)      nvAbort(x, __FILE__, __LINE__, __FUNC__)
 #define nvWarning(x)    nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x))
 
 #ifndef NV_DEBUG_PRINT
 #define NV_DEBUG_PRINT 1 //defined(_DEBUG)
 #endif
 
 #if NV_DEBUG_PRINT
 #define nvDebug(...)    nvDebugPrint(__VA_ARGS__)
 #else
 #if NV_CC_MSVC
 #define nvDebug(...)    __noop(__VA_ARGS__)
 #else
 #define nvDebug(...)    ((void)0) // Non-msvc platforms do not evaluate arguments?
 #endif
 #endif
 
 
 NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6)));
 NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
 
 namespace nv
 {
     inline bool isValidPtr(const void * ptr) {
     #if NV_CPU_X86_64 || POSH_CPU_PPC64
         if (ptr == NULL) return true;
         if (reinterpret_cast<uint64>(ptr) < 0x10000ULL) return false;
         if (reinterpret_cast<uint64>(ptr) >= 0x000007FFFFFEFFFFULL) return false;
     #else
 	    if (reinterpret_cast<uint32>(ptr) == 0xcccccccc) return false;
 	    if (reinterpret_cast<uint32>(ptr) == 0xcdcdcdcd) return false;
 	    if (reinterpret_cast<uint32>(ptr) == 0xdddddddd) return false;
 	    if (reinterpret_cast<uint32>(ptr) == 0xffffffff) return false;
     #endif
         return true;
     }
 
     // Message handler interface.
     struct MessageHandler {
         virtual void log(const char * str, va_list arg) = 0;
         virtual ~MessageHandler() {}
     };
 
     // Assert handler interface.
     struct AssertHandler {
         virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0;
         virtual ~AssertHandler() {}
     };
 
 
     namespace debug
     {
         NVCORE_API void dumpInfo();
         NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 );
 
         NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
         NVCORE_API void resetMessageHandler();
 
         NVCORE_API void setAssertHandler( AssertHandler * assertHanlder );
         NVCORE_API void resetAssertHandler();
 
         NVCORE_API void enableSigHandler(bool interactive);
         NVCORE_API void disableSigHandler();
 
         NVCORE_API bool isDebuggerPresent();
         NVCORE_API bool attachToDebugger();
 
         NVCORE_API void terminate(int code);
     }
 
 } // nv namespace
 
 #endif // NV_CORE_DEBUG_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/ForEach.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/ForEach.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/ForEach.h	(revision 23380)
@@ -1,68 +1,68 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_FOREACH_H
 #define NV_CORE_FOREACH_H
 
 /*
 These foreach macros are very non-standard and somewhat confusing, but I like them.
 */
 
 #include "nvcore.h"
 
 #if NV_CC_GNUC // If typeof or decltype is available:
 #if !NV_CC_CPP11
 #   define NV_DECLTYPE typeof // Using a non-standard extension over typeof that behaves as C++11 decltype
 #else
 #   define NV_DECLTYPE decltype
 #endif
 
 /*
 Ideally we would like to write this:
 
 #define NV_FOREACH(i, container) \
     for(NV_DECLTYPE(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
 
 But gcc versions prior to 4.7 required an intermediate type. See:
 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
 */
 
 #define NV_FOREACH(i, container) \
     typedef NV_DECLTYPE(container) NV_STRING_JOIN2(cont,__LINE__); \
     for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
 
 #else // If typeof not available:
 
 #include <new> // placement new
 
 struct PseudoIndexWrapper {
     template <typename T>
     PseudoIndexWrapper(const T & container) {
         nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory));
         new (memory) typename T::PseudoIndex(container.start());
     }
     // PseudoIndex cannot have a dtor!
 
     template <typename T> typename T::PseudoIndex & operator()(const T * /*container*/) {
         return *reinterpret_cast<typename T::PseudoIndex *>(memory);
     }
     template <typename T> const typename T::PseudoIndex & operator()(const T * /*container*/) const {
         return *reinterpret_cast<const typename T::PseudoIndex *>(memory);
     }
 
     uint8 memory[4];	// Increase the size if we have bigger enumerators.
 };
 
 #define NV_FOREACH(i, container) \
     for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container))))
 
 #endif
 
 // Declare foreach keyword.
 #if !defined NV_NO_USE_KEYWORDS
 #   define foreach NV_FOREACH
 #   define foreach_index NV_FOREACH
 #endif
 
 
 #endif // NV_CORE_FOREACH_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Hash.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Hash.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Hash.h	(revision 23380)
@@ -1,83 +1,83 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_HASH_H
 #define NV_CORE_HASH_H
 
 #include "nvcore.h"
 
 namespace nv
 {
     inline uint sdbmHash(const void * data_in, uint size, uint h = 5381)
     {
         const uint8 * data = (const uint8 *) data_in;
         uint i = 0;
         while (i < size) {
             h = (h << 16) + (h << 6) - h + (uint) data[i++];
         }
         return h;
     }
 
     // Note that this hash does not handle NaN properly.
     inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381)
     {
         for (uint i = 0; i < count; i++) {
             //nvDebugCheck(nv::isFinite(*f));
             union { float f; uint32 i; } x = { f[i] };
             if (x.i == 0x80000000) x.i = 0;
             h = sdbmHash(&x, 4, h);
         }
         return h;
     }
 
 
     template <typename T>
     inline uint hash(const T & t, uint h = 5381)
     {
         return sdbmHash(&t, sizeof(T), h);
     }
 
     template <>
     inline uint hash(const float & f, uint h)
     {
         return sdbmFloatHash(&f, 1, h);
     }
 
 
     // Functors for hash table:
     template <typename Key> struct Hash 
     {
         uint operator()(const Key & k) const {
             return hash(k);
         }
     };
 
     template <typename Key> struct Equal
     {
         bool operator()(const Key & k0, const Key & k1) const {
             return k0 == k1;
         }
     };
 
 
     // @@ Move to Utils.h?
     template <typename T1, typename T2>
     struct Pair {
         T1 first;
         T2 second;
     };
 
     template <typename T1, typename T2>
     bool operator==(const Pair<T1,T2> & p0, const Pair<T1,T2> & p1) {
         return p0.first == p1.first && p0.second == p1.second;
     }
 
     template <typename T1, typename T2>
     uint hash(const Pair<T1,T2> & p, uint h = 5381) {
         return hash(p.second, hash(p.first));
     }
 
 
 } // nv namespace
 
 #endif // NV_CORE_HASH_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.cpp	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Memory.cpp	(revision 23380)
@@ -1,119 +1,119 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #include "Memory.h"
 #include "Debug.h"
 
 #include <stdlib.h>
 
 #define USE_EFENCE 0
 
 #if USE_EFENCE
 extern "C" void *EF_malloc(size_t size);
 extern "C" void *EF_realloc(void * oldBuffer, size_t newSize);
 extern "C" void EF_free(void * address);
 #endif
 
 using namespace nv;
 
 #if NV_OVERRIDE_ALLOC
 
 void * malloc(size_t size)
 {
 #if USE_EFENCE
     return EF_malloc(size);
 #else
     return ::malloc(size);
 #endif
 }
 
 void * debug_malloc(size_t size, const char * file, int line)
 {
     NV_UNUSED(file);
     NV_UNUSED(line);
 #if USE_EFENCE
     return EF_malloc(size);
 #else
     return ::malloc(size);
 #endif
 }
 
 void free(void * ptr)
 {
 #if USE_EFENCE
     return EF_free(const_cast<void *>(ptr));
 #else
     ::free(const_cast<void *>(ptr));
 #endif
 }
 
 void * realloc(void * ptr, size_t size)
 {
     nvDebugCheck(ptr != NULL || size != 0); // undefined realloc behavior.
 #if USE_EFENCE
     return EF_realloc(ptr, size);
 #else
     return ::realloc(ptr, size);
 #endif
 }
 
 /* No need to override this unless we want line info.
 void * operator new (size_t size) throw()
 {
     return malloc(size);
 }
 
 void operator delete (void *p) throw()
 {
     free(p);
 }
 
 void * operator new [] (size_t size) throw()
 {
     return malloc(size);
 }
 
 void operator delete [] (void * p) throw()
 {
     free(p);
 }
 */
 
 #if 0 // Code from Apple:
 void* operator new(std::size_t sz) throw (std::bad_alloc)
 {
         void *result = std::malloc (sz == 0 ? 1 : sz);
         if (result == NULL)
                 throw std::bad_alloc();
         gNewCounter++;
         return result;
 }
 void operator delete(void* p) throw()
 {
         if (p == NULL)
                 return;
         std::free (p);
         gDeleteCounter++;
 }
 
 /* These are the 'nothrow' versions of the above operators.
    The system version will try to call a std::new_handler if they
    fail, but your overriding versions are not required to do this.  */
 void* operator new(std::size_t sz, const std::nothrow_t&) throw()
 {
         try {
                 void * result = ::operator new (sz);  // calls our overridden operator new
                 return result;
         } catch (std::bad_alloc &) {
           return NULL;
         }
 }
 void operator delete(void* p, const std::nothrow_t&) throw()
 {
         ::operator delete (p);
 }
 
 #endif // 0
 
 #endif // NV_OVERRIDE_ALLOC
 
 
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Ptr.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Ptr.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Ptr.h	(revision 23380)
@@ -1,322 +1,322 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #ifndef NV_CORE_PTR_H
 #define NV_CORE_PTR_H
 
 #include "nvcore.h"
 #include "Debug.h"
 
 #include "RefCounted.h"
 
 namespace nv
 {
     class WeakProxy;
 
     /** Simple auto pointer template class.
     *
     * This is very similar to the standard auto_ptr class, but with some 
     * additional limitations to make its use less error prone:
     * - Copy constructor and assignment operator are disabled.
     * - reset method is removed.
     * 
     * The semantics of the standard auto_ptr are not clear and change depending
     * on the std implementation. For a discussion of the problems of auto_ptr read:
     * http://www.awprofessional.com/content/images/020163371X/autoptrupdate\auto_ptr_update.html
     */
     template <class T>
     class AutoPtr
     {
         NV_FORBID_COPY(AutoPtr);
         NV_FORBID_HEAPALLOC();
     public:
 
         /// Ctor.
         AutoPtr(T * p = NULL) : m_ptr(p) { }
 
         template <class Q>
         AutoPtr(Q * p) : m_ptr(static_cast<T *>(p)) { }
 
         /// Dtor. Deletes owned pointer.
         ~AutoPtr() {
             delete m_ptr;
             m_ptr = NULL;
         }
 
         /// Delete owned pointer and assign new one.
         void operator=( T * p ) {
             if (p != m_ptr)
             {
                 delete m_ptr;
                 m_ptr = p;
             }
         }
 
         template <class Q>
         void operator=( Q * p ) {
             if (p != m_ptr)
             {
                 delete m_ptr;
                 m_ptr = static_cast<T *>(p);
             }
         }
 
         /// Member access.
         T * operator -> () const {
             nvDebugCheck(m_ptr != NULL);
             return m_ptr;
         }
 
         /// Get reference.
         T & operator*() const {
             nvDebugCheck(m_ptr != NULL);
             return *m_ptr;
         }
 
         /// Get pointer.
         T * ptr() const { return m_ptr; }
 
         /// Relinquish ownership of the underlying pointer and returns that pointer.
         T * release() {
             T * tmp = m_ptr;
             m_ptr = NULL;
             return tmp;
         }
 
         /// Const pointer equal comparation.
         friend bool operator == (const AutoPtr<T> & ap, const T * const p) {
             return (ap.ptr() == p);
         }
 
         /// Const pointer nequal comparation.
         friend bool operator != (const AutoPtr<T> & ap, const T * const p) {
             return (ap.ptr() != p);
         }
 
         /// Const pointer equal comparation.
         friend bool operator == (const T * const p, const AutoPtr<T> & ap) {
             return (ap.ptr() == p);
         }
 
         /// Const pointer nequal comparation.
         friend bool operator != (const T * const p, const AutoPtr<T> & ap) {
             return (ap.ptr() != p);
         }
 
     private:
         T * m_ptr;
     };
 
 
     /// Smart pointer template class.
     template <class BaseClass>
     class SmartPtr {
     public:
 
         // BaseClass must implement addRef() and release().
         typedef SmartPtr<BaseClass>	ThisType;
 
         /// Default ctor.
         SmartPtr() : m_ptr(NULL) 
         {
         }
 
         /// Other type assignment.
         template <class OtherBase>
         SmartPtr( const SmartPtr<OtherBase> & tc )
         {
             m_ptr = static_cast<BaseClass *>( tc.ptr() );
             if (m_ptr) {
                 m_ptr->addRef();
             }
         }
 
         /// Copy ctor.
         SmartPtr( const ThisType & bc )
         {
             m_ptr = bc.ptr();
             if (m_ptr) {
                 m_ptr->addRef();
             }
         }
 
         /// Copy cast ctor. SmartPtr(NULL) is valid.
         explicit SmartPtr( BaseClass * bc )
         {
             m_ptr = bc;
             if (m_ptr) {
                 m_ptr->addRef();
             }
         }
 
         /// Dtor.
         ~SmartPtr()
         {
             set(NULL);
         }
 
 
         /// -> operator.
         BaseClass * operator -> () const
         {
             nvCheck( m_ptr != NULL );
             return m_ptr;
         }
 
         /// * operator.
         BaseClass & operator*() const
         {
             nvCheck( m_ptr != NULL );
             return *m_ptr;
         }
 
         /// Get pointer.
         BaseClass * ptr() const
         {
             return m_ptr;
         }
 
         /// Other type assignment.
         template <class OtherBase>
         void operator = ( const SmartPtr<OtherBase> & tc )
         {
             set( static_cast<BaseClass *>(tc.ptr()) );
         }
 
         /// This type assignment.
         void operator = ( const ThisType & bc )
         {
             set( bc.ptr() );
         }
 
         /// Pointer assignment.
         void operator = ( BaseClass * bc )
         {
             set( bc );
         }
 
 
         /// Other type equal comparation.
         template <class OtherBase>
         bool operator == ( const SmartPtr<OtherBase> & other ) const
         {
             return m_ptr == other.ptr();
         }
 
         /// This type equal comparation.
         bool operator == ( const ThisType & bc ) const
         {
             return m_ptr == bc.ptr();
         }
 
         /// Const pointer equal comparation.
         bool operator == ( const BaseClass * const bc ) const
         {
             return m_ptr == bc;
         }
 
         /// Other type not equal comparation.
         template <class OtherBase>
         bool operator != ( const SmartPtr<OtherBase> & other ) const
         {
             return m_ptr != other.ptr();
         }
 
         /// Other type not equal comparation.
         bool operator != ( const ThisType & bc ) const
         {
             return m_ptr != bc.ptr();
         }
 
         /// Const pointer not equal comparation.
         bool operator != (const BaseClass * const bc) const
         {
             return m_ptr != bc;
         }
 
         /// This type lower than comparation.
         bool operator < (const ThisType & p) const
         {
             return m_ptr < p.ptr();
         }
 
         bool isValid() const {
             return isValidPtr(m_ptr);
         }
 
     private:
 
         // Set this pointer.
         void set( BaseClass * p )
         {
             if (p) p->addRef();
             if (m_ptr) m_ptr->release();
             m_ptr = p;
         }
 
     private:
 
         BaseClass * m_ptr;
 
     };
 
 
     /// Smart pointer template class.
     template <class T>
     class WeakPtr {
     public:
 
         WeakPtr() {}
 
         WeakPtr(T * p)  { operator=(p); }
         WeakPtr(const SmartPtr<T> & p) { operator=(p.ptr()); }
 
         // Default constructor and assignment from weak_ptr<T> are OK.
 
         void operator=(T * p)
         {
             if (p) {
                 m_proxy = p->getWeakProxy();
                 nvDebugCheck(m_proxy != NULL);
                 nvDebugCheck(m_proxy->ptr() == p);
             }
             else {
                 m_proxy = NULL;
             }
         }
 
         void operator=(const SmartPtr<T> & ptr) { operator=(ptr.ptr()); }
 
         bool operator==(const SmartPtr<T> & p) const { return ptr() == p.ptr(); }
         bool operator!=(const SmartPtr<T> & p) const { return ptr() != p.ptr(); }
 
         bool operator==(const WeakPtr<T> & p) const { return ptr() == p.ptr(); }
         bool operator!=(const WeakPtr<T> & p) const { return ptr() != p.ptr(); }
 
         bool operator==(T * p) const { return ptr() == p; }
         bool operator!=(T * p) const { return ptr() != p; }
 
         T * operator->() const
         {
             T * p = ptr();
             nvDebugCheck(p != NULL);
             return p;
         }
 
         T * ptr() const
         {
             if (m_proxy != NULL) {
                 return static_cast<T *>(m_proxy->ptr());
             }
             return NULL;
         }
 
     private:
 
         mutable SmartPtr<WeakProxy> m_proxy;
 
     };
 
 
 } // nv namespace
 
 #endif // NV_CORE_PTR_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/RefCounted.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/RefCounted.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/RefCounted.h	(revision 23380)
@@ -1,149 +1,149 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #ifndef NV_CORE_REFCOUNTED_H
 #define NV_CORE_REFCOUNTED_H
 
 #include "nvcore.h"
 #include "Debug.h"
 
 #define NV_DECLARE_PTR(Class) \
     template <class T> class SmartPtr; \
     typedef SmartPtr<class Class> Class ## Ptr; \
     typedef SmartPtr<const class Class> Class ## ConstPtr
 
 
 namespace nv
 {
     /// Weak proxy.
     class WeakProxy
     {
         NV_FORBID_COPY(WeakProxy);
     public:
 	    /// Ctor.
 	    WeakProxy(void * ptr) : m_count(0), m_ptr(ptr) { }
 
         /// Dtor.
         ~WeakProxy()
         {
             nvCheck( m_count == 0 );
         }
 
         /// Increase reference count.
         uint addRef() const
         {
             m_count++;
             return m_count;
         }
 
         /// Decrease reference count and remove when 0.
         uint release() const
         {
             nvCheck( m_count > 0 );
 
             m_count--;
             if( m_count == 0 ) {
                 delete this;
                 return 0;
             }
             return m_count;
         }
 
 	    /// WeakPtr's call this to determine if their pointer is valid or not.
 	    bool isAlive() const {
 		    return m_ptr != NULL;
 	    }
 
 	    /// Only the actual object should call this.
 	    void notifyObjectDied() {
 		    m_ptr = NULL;
 	    }
 
         /// Return proxy pointer.
         void * ptr() const {
             return m_ptr;
         }
 
     private:
         mutable int m_count;
 	    void * m_ptr;
     };
 
 
     /// Reference counted base class to be used with SmartPtr and WeakPtr.
     class RefCounted
     {
         NV_FORBID_COPY(RefCounted);
     public:
 
         /// Ctor.
         RefCounted() : m_count(0), m_weak_proxy(NULL)
         {
         }
 
         /// Virtual dtor.
         virtual ~RefCounted()
         {
             nvCheck( m_count == 0 );
             releaseWeakProxy();
         }
 
 
         /// Increase reference count.
         uint addRef() const
         {
             m_count++;
             return m_count;
         }
 
 
         /// Decrease reference count and remove when 0.
         uint release() const
         {
             nvCheck( m_count > 0 );
 
             m_count--;
             if( m_count == 0 ) {
                 delete this;
                 return 0;
             }
             return m_count;
         }
 
         /// Get weak proxy.
         WeakProxy * getWeakProxy() const
         {
             if (m_weak_proxy == NULL) {
                 m_weak_proxy = new WeakProxy((void *)this);
                 m_weak_proxy->addRef();
             }
             return m_weak_proxy;
         }
 
         /// Release the weak proxy.	
         void releaseWeakProxy() const
         {
             if (m_weak_proxy != NULL) {
                 m_weak_proxy->notifyObjectDied();
                 m_weak_proxy->release();
                 m_weak_proxy = NULL;
             }
         }
 
         /// Get reference count.
         int refCount() const
         {
             return m_count;
         }
 
 
     private:
 
         mutable int m_count;
         mutable WeakProxy * m_weak_proxy;
 
     };
 
 } // nv namespace
 
 
 #endif // NV_CORE_REFCOUNTED_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/StdStream.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/StdStream.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/StdStream.h	(revision 23380)
@@ -1,463 +1,463 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 //#pragma once
 //#ifndef NV_CORE_STDSTREAM_H
 //#define NV_CORE_STDSTREAM_H
 
 #include "nvcore.h"
 #include "Stream.h"
 #include "Array.h"
 
 #include <stdio.h> // fopen
 #include <string.h> // memcpy
 
 namespace nv
 {
 
     // Portable version of fopen.
     inline FILE * fileOpen(const char * fileName, const char * mode)
     {
         nvCheck(fileName != NULL);
 #if NV_CC_MSVC && _MSC_VER >= 1400
         FILE * fp;
         if (fopen_s(&fp, fileName, mode) == 0) {
             return fp;
         }
         return NULL;
 #else
         return fopen(fileName, mode);
 #endif
     }
 
 
     /// Base stdio stream.
     class NVCORE_CLASS StdStream : public Stream
     {
         NV_FORBID_COPY(StdStream);
     public:
 
         /// Ctor.
         StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { }
 
         /// Dtor. 
         virtual ~StdStream()
         {
             if( m_fp != NULL && m_autoclose ) {
 #if NV_OS_WIN32
                 _fclose_nolock( m_fp );
 #else
                 fclose( m_fp );
 #endif
             }
         }
 
 
         /** @name Stream implementation. */
         //@{
         virtual void seek( uint pos )
         {
             nvDebugCheck(m_fp != NULL);
             nvDebugCheck(pos <= size());
 #if NV_OS_WIN32
             _fseek_nolock(m_fp, pos, SEEK_SET);
 #else
             fseek(m_fp, pos, SEEK_SET);
 #endif
         }
 
         virtual uint tell() const
         {
             nvDebugCheck(m_fp != NULL);
 #if NV_OS_WIN32
             return _ftell_nolock(m_fp);
 #else
             return (uint)ftell(m_fp);
 #endif
         }
 
         virtual uint size() const
         {
             nvDebugCheck(m_fp != NULL);
 #if NV_OS_WIN32
             uint pos = _ftell_nolock(m_fp);
             _fseek_nolock(m_fp, 0, SEEK_END);
             uint end = _ftell_nolock(m_fp);
             _fseek_nolock(m_fp, pos, SEEK_SET);
 #else
             uint pos = (uint)ftell(m_fp);
             fseek(m_fp, 0, SEEK_END);
             uint end = (uint)ftell(m_fp);
             fseek(m_fp, pos, SEEK_SET);
 #endif
             return end;
         }
 
         virtual bool isError() const
         {
             return m_fp == NULL || ferror( m_fp ) != 0;
         }
 
         virtual void clearError()
         {
             nvDebugCheck(m_fp != NULL);
             clearerr(m_fp);
         }
 
         // @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream. 
         // That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better 
         // implementation uses use ftell and fseek to determine our location within the file.
         virtual bool isAtEnd() const
         {
             if (m_fp == NULL) return true;
             //nvDebugCheck(m_fp != NULL);
             //return feof( m_fp ) != 0;
 #if NV_OS_WIN32
             uint pos = _ftell_nolock(m_fp);
             _fseek_nolock(m_fp, 0, SEEK_END);
             uint end = _ftell_nolock(m_fp);
             _fseek_nolock(m_fp, pos, SEEK_SET);
 #else
             uint pos = (uint)ftell(m_fp);
             fseek(m_fp, 0, SEEK_END);
             uint end = (uint)ftell(m_fp);
             fseek(m_fp, pos, SEEK_SET);
 #endif
             return pos == end;
         }
 
         /// Always true.
         virtual bool isSeekable() const { return true; }
         //@}
 
     protected:
 
         FILE * m_fp;
         bool m_autoclose;
 
     };
 
 
     /// Standard output stream.
     class NVCORE_CLASS StdOutputStream : public StdStream
     {
         NV_FORBID_COPY(StdOutputStream);
     public:
 
         /// Construct stream by file name.
         StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { }
 
         /// Construct stream by file handle.
         StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose)
         {
         }
 
         /** @name Stream implementation. */
         //@{
         /// Write data.
         virtual uint serialize( void * data, uint len )
         {
             nvDebugCheck(data != NULL);
             nvDebugCheck(m_fp != NULL);
 #if NV_OS_WIN32
             return (uint)_fwrite_nolock(data, 1, len, m_fp);
 #elif NV_OS_LINUX
             return (uint)fwrite_unlocked(data, 1, len, m_fp);
 #elif NV_OS_DARWIN
             // @@ No error checking, always returns len.
             for (uint i = 0; i < len; i++) {
                 putc_unlocked(((char *)data)[i], m_fp);
             }
             return len;
 #else
             return (uint)fwrite(data, 1, len, m_fp);
 #endif
         }
 
         virtual bool isLoading() const
         {
             return false;
         }
 
         virtual bool isSaving() const
         {
             return true;
         }
         //@}
 
     };
 
 
     /// Standard input stream.
     class NVCORE_CLASS StdInputStream : public StdStream
     {
         NV_FORBID_COPY(StdInputStream);
     public:
 
         /// Construct stream by file name.
         StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { }
 
         /// Construct stream by file handle.
         StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
         {
         }
 
         /** @name Stream implementation. */
         //@{
         /// Read data.
         virtual uint serialize( void * data, uint len )
         {
             nvDebugCheck(data != NULL);
             nvDebugCheck(m_fp != NULL);
 #if NV_OS_WIN32
             return (uint)_fread_nolock(data, 1, len, m_fp);
 #elif NV_OS_LINUX
             return (uint)fread_unlocked(data, 1, len, m_fp);
 #elif NV_OS_DARWIN
             // @@ No error checking, always returns len.
             for (uint i = 0; i < len; i++) {
                 ((char *)data)[i] = getc_unlocked(m_fp);
             }
             return len;
 #else
             return (uint)fread(data, 1, len, m_fp);
 #endif
             
         }
 
         virtual bool isLoading() const
         {
             return true;
         }
 
         virtual bool isSaving() const
         {
             return false;
         }
         //@}
     };
 
 
 
     /// Memory input stream.
     class NVCORE_CLASS MemoryInputStream : public Stream
     {
         NV_FORBID_COPY(MemoryInputStream);
     public:
 
         /// Ctor.
         MemoryInputStream( const uint8 * mem, uint size ) : m_mem(mem), m_ptr(mem), m_size(size) { }
 
         /** @name Stream implementation. */
         //@{
         /// Read data.
         virtual uint serialize( void * data, uint len )
         {
             nvDebugCheck(data != NULL);
             nvDebugCheck(!isError());
 
             uint left = m_size - tell();
             if (len > left) len = left;
 
             memcpy( data, m_ptr, len );
             m_ptr += len;
 
             return len;
         }
 
         virtual void seek( uint pos )
         {
             nvDebugCheck(!isError());
             m_ptr = m_mem + pos;
             nvDebugCheck(!isError());
         }
 
         virtual uint tell() const
         {
             nvDebugCheck(m_ptr >= m_mem);
             return uint(m_ptr - m_mem);
         }
 
         virtual uint size() const
         {
             return m_size;
         }
 
         virtual bool isError() const
         {
             return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem;
         }
 
         virtual void clearError()
         {
             // Nothing to do.
         }
 
         virtual bool isAtEnd() const
         {
             return m_ptr == m_mem + m_size;
         }
 
         /// Always true.
         virtual bool isSeekable() const
         {
             return true;
         }
 
         virtual bool isLoading() const
         {
             return true;
         }
 
         virtual bool isSaving() const
         {
             return false;
         }
         //@}
 
         const uint8 * ptr() const { return m_ptr; }
 
 
     private:
 
         const uint8 * m_mem;
         const uint8 * m_ptr;
         uint m_size;
 
     };
 
 
     /// Buffer output stream.
     class NVCORE_CLASS BufferOutputStream : public Stream
     {
         NV_FORBID_COPY(BufferOutputStream);
     public:
 
         BufferOutputStream(Array<uint8> & buffer) : m_buffer(buffer) { }
 
         virtual uint serialize( void * data, uint len )
         {
             nvDebugCheck(data != NULL);
             m_buffer.append((uint8 *)data, len);
             return len;
         }
 
         virtual void seek( uint /*pos*/ ) { /*Not implemented*/ }
         virtual uint tell() const { return m_buffer.size(); }
         virtual uint size() const { return m_buffer.size(); }
 
         virtual bool isError() const { return false; }
         virtual void clearError() {}
 
         virtual bool isAtEnd() const { return true; }
         virtual bool isSeekable() const { return false; }
         virtual bool isLoading() const { return false; }
         virtual bool isSaving() const { return true; }
 
     private:
         Array<uint8> & m_buffer;
     };
 
 
     /// Protected input stream.
     class NVCORE_CLASS ProtectedStream : public Stream
     {
         NV_FORBID_COPY(ProtectedStream);
     public:
 
         /// Ctor.
         ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false)
         { 
         }
 
         /// Ctor.
         ProtectedStream( Stream * s, bool autodelete = true ) : 
         m_s(s), m_autodelete(autodelete) 
         {
             nvDebugCheck(m_s != NULL);
         }
 
         /// Dtor.
         virtual ~ProtectedStream()
         {
             if( m_autodelete ) {
                 delete m_s;
             }
         }
 
         /** @name Stream implementation. */
         //@{
         /// Read data.
         virtual uint serialize( void * data, uint len )
         {
             nvDebugCheck(data != NULL);
             len = m_s->serialize( data, len );
 
             if( m_s->isError() ) {
                 throw;
             }
 
             return len;
         }
 
         virtual void seek( uint pos )
         {
             m_s->seek( pos );
 
             if( m_s->isError() ) {
                 throw;
             }
         }
 
         virtual uint tell() const
         {
             return m_s->tell();
         }
 
         virtual uint size() const
         {
             return m_s->size();
         }
 
         virtual bool isError() const
         {
             return m_s->isError();
         }
 
         virtual void clearError()
         {
             m_s->clearError();
         }
 
         virtual bool isAtEnd() const
         {
             return m_s->isAtEnd();
         }
 
         virtual bool isSeekable() const
         {
             return m_s->isSeekable();
         }
 
         virtual bool isLoading() const
         {
             return m_s->isLoading();
         }
 
         virtual bool isSaving() const
         {
             return m_s->isSaving();
         }
         //@}
 
 
     private:
 
         Stream * const m_s;
         bool const m_autodelete;
 
     };
 
 } // nv namespace
 
 
 //#endif // NV_CORE_STDSTREAM_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.cpp	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.cpp	(revision 23380)
@@ -1,772 +1,772 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #include "StrLib.h"
 
 #include "Memory.h"
 #include "Utils.h" // swap
 
 #include <math.h>   // log
 #include <stdio.h>  // vsnprintf
 #include <string.h> // strlen, strcmp, etc.
 
 #if NV_CC_MSVC
 #include <stdarg.h> // vsnprintf
 #endif
 
 using namespace nv;
 
 namespace 
 {
     static char * strAlloc(uint size)
     {
         return malloc<char>(size);
     }
 
     static char * strReAlloc(char * str, uint size)
     {
         return realloc<char>(str, size);
     }
 
     static void strFree(const char * str)
     {
         return free<char>(str);
     }
 
     /*static char * strDup( const char * str )
     {
         nvDebugCheck( str != NULL );
         uint len = uint(strlen( str ) + 1);
         char * dup = strAlloc( len );
         memcpy( dup, str, len );
         return dup;
     }*/
 
     // helper function for integer to string conversion.
     static char * i2a( uint i, char *a, uint r )
     {
         if( i / r > 0 ) {
             a = i2a( i / r, a, r );
         }
         *a = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % r];
         return a + 1;
     }
 
     // Locale independent functions.
     static inline char toUpper( char c ) {
         return (c<'a' || c>'z') ? (c) : (c+'A'-'a');
     }
     static inline char toLower( char c ) {
         return (c<'A' || c>'Z') ? (c) : (c+'a'-'A');
     }
     static inline bool isAlpha( char c ) {
         return (c>='a' && c<='z') || (c>='A' && c<='Z');
     }
     static inline bool isDigit( char c ) {
         return c>='0' && c<='9';
     }
     static inline bool isAlnum( char c ) {
         return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9');
     }
 
 }
 
 uint nv::strLen(const char * str)
 {
     nvDebugCheck(str != NULL);
     return U32(strlen(str));
 }
 
 int nv::strDiff(const char * s1, const char * s2)
 {
     nvDebugCheck(s1 != NULL);
     nvDebugCheck(s2 != NULL);
     return strcmp(s1, s2);
 }
 
 int nv::strCaseDiff(const char * s1, const char * s2)
 {
     nvDebugCheck(s1 != NULL);
     nvDebugCheck(s1 != NULL);
 #if NV_CC_MSVC
     return _stricmp(s1, s2);
 #else
     return strcasecmp(s1, s2);
 #endif
 }
 
 bool nv::strEqual(const char * s1, const char * s2)
 {
     if (s1 == s2) return true;
     if (s1 == NULL || s2 == NULL) return false;
     return strcmp(s1, s2) == 0;
 }
 
 bool nv::strCaseEqual(const char * s1, const char * s2)
 {
     if (s1 == s2) return true;
     if (s1 == NULL || s2 == NULL) return false;
     return strCaseDiff(s1, s2) == 0;
 }
 
 bool nv::strBeginsWith(const char * str, const char * prefix)
 {
     //return strstr(str, prefix) == dst;
     return strncmp(str, prefix, strlen(prefix)) == 0;
 }
 
 bool nv::strEndsWith(const char * str, const char * suffix)
 {
     uint ml = strLen(str);
     uint sl = strLen(suffix);
     if (ml < sl) return false;
     return strncmp(str + ml - sl, suffix, sl) == 0;
 }
 
 // @@ Add asserts to detect overlap between dst and src?
 void nv::strCpy(char * dst, uint size, const char * src)
 {
     nvDebugCheck(dst != NULL);
     nvDebugCheck(src != NULL);
 #if NV_CC_MSVC && _MSC_VER >= 1400
     strcpy_s(dst, size, src);
 #else
     NV_UNUSED(size);
     strcpy(dst, src);
 #endif
 }
 
 void nv::strCpy(char * dst, uint size, const char * src, uint len)
 {
     nvDebugCheck(dst != NULL);
     nvDebugCheck(src != NULL);
 #if NV_CC_MSVC && _MSC_VER >= 1400
     strncpy_s(dst, size, src, len);
 #else
     int n = min(len+1, size);
     strncpy(dst, src, n);
     dst[n-1] = '\0';
 #endif
 }
 
 void nv::strCat(char * dst, uint size, const char * src)
 {
     nvDebugCheck(dst != NULL);
     nvDebugCheck(src != NULL);
 #if NV_CC_MSVC && _MSC_VER >= 1400
     strcat_s(dst, size, src);
 #else
     NV_UNUSED(size);
     strcat(dst, src);
 #endif
 }
 
 NVCORE_API const char * nv::strSkipWhiteSpace(const char * str)
 {
     nvDebugCheck(str != NULL);
     while (*str == ' ') str++;
     return str;
 }
 
 NVCORE_API char * nv::strSkipWhiteSpace(char * str)
 {
     nvDebugCheck(str != NULL);
     while (*str == ' ') str++;
     return str;
 }
 
 
 /** Pattern matching routine. I don't remember where did I get this. */
 bool nv::strMatch(const char * str, const char * pat)
 {
     nvDebugCheck(str != NULL);
     nvDebugCheck(pat != NULL);
 
     char c2;
 
     while (true) {
         if (*pat==0) {
             if (*str==0) return true;
             else         return false;
         }
         if ((*str==0) && (*pat!='*')) return false;
         if (*pat=='*') {
             pat++;
             if (*pat==0) return true;
             while (true) {
                 if (strMatch(str, pat)) return true;
                 if (*str==0) return false;
                 str++;
             }
         }
         if (*pat=='?') goto match;
         if (*pat=='[') {
             pat++;
             while (true) {
                 if ((*pat==']') || (*pat==0)) return false;
                 if (*pat==*str) break;
                 if (pat[1] == '-') {
                     c2 = pat[2];
                     if (c2==0) return false;
                     if ((*pat<=*str) && (c2>=*str)) break;
                     if ((*pat>=*str) && (c2<=*str)) break;
                     pat+=2;
                 }
                 pat++;
             }
             while (*pat!=']') {
                 if (*pat==0) {
                     pat--;
                     break;
                 }
                 pat++;
             }
             goto match;
         }
 
         if (*pat == NV_PATH_SEPARATOR) {
             pat++;
             if (*pat==0) return false;
         }
         if (*pat!=*str) return false;
 
 match:
         pat++;
         str++;
     }
 }
 
 bool nv::isNumber(const char * str) {
     while(*str != '\0') {
         if (!isDigit(*str)) return false;
         str++;
     }
     return true;
 }
 
 
 /** Empty string. */
 StringBuilder::StringBuilder() : m_size(0), m_str(NULL)
 {
 }
 
 /** Preallocate space. */
 StringBuilder::StringBuilder( uint size_hint ) : m_size(size_hint)
 {
     nvDebugCheck(m_size > 0);
     m_str = strAlloc(m_size);
     *m_str = '\0';
 }
 
 /** Copy ctor. */
 StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
 {
     copy(s);
 }
 
 /** Copy string. */
 StringBuilder::StringBuilder(const char * s) : m_size(0), m_str(NULL)
 {
     if (s != NULL) {
         copy(s);
     }
 }
 
 /** Copy string. */
 StringBuilder::StringBuilder(const char * s, uint len) : m_size(0), m_str(NULL)
 {
     copy(s, len);
 }
 
 /** Delete the string. */
 StringBuilder::~StringBuilder()
 {
     strFree(m_str);
 }
 
 
 /** Format a string safely. */
 StringBuilder & StringBuilder::format( const char * fmt, ... )
 {
     nvDebugCheck(fmt != NULL);
     va_list arg;
     va_start( arg, fmt );
 
     formatList( fmt, arg );
 
     va_end( arg );
 
     return *this;
 }
 
 
 /** Format a string safely. */
 StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg )
 {
     nvDebugCheck(fmt != NULL);
 
     if (m_size == 0) {
         m_size = 64;
         m_str = strAlloc( m_size );
     }
 
     va_list tmp;
     va_copy(tmp, arg);
 #if NV_CC_MSVC && _MSC_VER >= 1400
     int n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp);
 #else
     int n = vsnprintf(m_str, m_size, fmt, tmp);
 #endif
     va_end(tmp);
 
     while( n < 0 || n >= int(m_size) ) {
         if( n > -1 ) {
             m_size = n + 1;
         }
         else {
             m_size *= 2;
         }
 
         m_str = strReAlloc(m_str, m_size);
 
         va_copy(tmp, arg);
 #if NV_CC_MSVC && _MSC_VER >= 1400
         n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp);
 #else
         n = vsnprintf(m_str, m_size, fmt, tmp);
 #endif
         va_end(tmp);
     }
 
     nvDebugCheck(n < int(m_size));
 
     // Make sure it's null terminated.
     nvDebugCheck(m_str[n] == '\0');
     //str[n] = '\0';
 
     return *this;
 }
 
 
 /** Append a string. */
 StringBuilder & StringBuilder::append( const char * s )
 {
 	return append(s, U32(strlen( s )));
 }
 
 
 /** Append a string. */
 StringBuilder & StringBuilder::append(const char * s, uint len)
 {
     nvDebugCheck(s != NULL);
 
 	uint offset = length();
 	const uint size = offset + len + 1;
 	reserve(size);
 	strCpy(m_str + offset, len + 1, s, len);
 
     return *this;
 }
 
 
 /** Append a formatted string. */
 StringBuilder & StringBuilder::appendFormat( const char * fmt, ... )
 {
     nvDebugCheck( fmt != NULL );
 
     va_list arg;
     va_start( arg, fmt );
 
     appendFormatList( fmt, arg );
 
     va_end( arg );
 
     return *this;
 }
 
 
 /** Append a formatted string. */
 StringBuilder & StringBuilder::appendFormatList( const char * fmt, va_list arg )
 {
     nvDebugCheck( fmt != NULL );
 
     va_list tmp;
     va_copy(tmp, arg);
 
     if (m_size == 0) {
         formatList(fmt, arg);
     }
     else {
         StringBuilder tmp_str;
         tmp_str.formatList( fmt, tmp );
         append( tmp_str.str() );
     }
 
     va_end(tmp);
 
     return *this;
 }
 
 // Append n spaces.
 StringBuilder & StringBuilder::appendSpace(uint n)
 {
     if (m_str == NULL) {
         m_size = n + 1;
         m_str = strAlloc(m_size);
         memset(m_str, ' ', m_size);
         m_str[n] = '\0';
     }
     else {
         const uint len = strLen(m_str);
         if (m_size < len + n + 1) {
             m_size = len + n + 1;
             m_str = strReAlloc(m_str, m_size);
         }
         memset(m_str + len, ' ', n);
         m_str[len+n] = '\0';
     }
 
     return *this;
 }
 
 
 /** Convert number to string in the given base. */
 StringBuilder & StringBuilder::number( int i, int base )
 {
     nvCheck( base >= 2 );
     nvCheck( base <= 36 );
 
     // @@ This needs to be done correctly.
     // length = floor(log(i, base));
     uint len = uint(log(float(i)) / log(float(base)) + 2); // one more if negative
     reserve(len);
 
     if( i < 0 ) {
         *m_str = '-';
         *i2a(uint(-i), m_str+1, base) = 0;
     }
     else {
         *i2a(i, m_str, base) = 0;
     }
 
     return *this;
 }
 
 
 /** Convert number to string in the given base. */
 StringBuilder & StringBuilder::number( uint i, int base )
 {
     nvCheck( base >= 2 );
     nvCheck( base <= 36 );
 
     // @@ This needs to be done correctly.
     // length = floor(log(i, base));
     uint len = uint(log(float(i)) / log(float(base)) - 0.5f + 1);
     reserve(len);
 
     *i2a(i, m_str, base) = 0;
 
     return *this;
 }
 
 
 /** Resize the string preserving the contents. */
 StringBuilder & StringBuilder::reserve( uint size_hint )
 {
     nvCheck(size_hint != 0);
     if (size_hint > m_size) {
         m_str = strReAlloc(m_str, size_hint);
         m_size = size_hint;
     }
     return *this;
 }
 
 
 /** Copy a string safely. */
 StringBuilder & StringBuilder::copy(const char * s)
 {
     nvCheck( s != NULL );
     const uint str_size = uint(strlen( s )) + 1;
     reserve(str_size);
     memcpy(m_str, s, str_size);
     return *this;
 }
 
 /** Copy a string safely. */
 StringBuilder & StringBuilder::copy(const char * s, uint len)
 {
     nvCheck( s != NULL );
     const uint str_size = len + 1;
     reserve(str_size);
     strCpy(m_str, str_size, s, len);
     return *this;
 }
 
 
 /** Copy an StringBuilder. */
 StringBuilder & StringBuilder::copy( const StringBuilder & s )
 {
     if (s.m_str == NULL) {
         nvCheck( s.m_size == 0 );
         reset();
     }
     else {
         reserve( s.m_size );
         strCpy( m_str, s.m_size, s.m_str );
     }
     return *this;
 }
 
 bool StringBuilder::endsWith(const char * str) const
 {
     uint l = uint(strlen(str));
     uint ml = uint(strlen(m_str));
     if (ml < l) return false;
     return strncmp(m_str + ml - l, str, l) == 0;
 }
 
 bool StringBuilder::beginsWith(const char * str) const 
 {
     size_t l = strlen(str);
     return strncmp(m_str, str, l) == 0;
 }
 
 // Find given char starting from the end.
 char * StringBuilder::reverseFind(char c)
 {
     int length = (int)strlen(m_str) - 1;
     while (length >= 0 && m_str[length] != c) {
         length--;
     }
     if (length >= 0) {
         return m_str + length;
     }
     else {
         return NULL;
     }
 }
 
 
 /** Reset the string. */
 void StringBuilder::reset()
 {
     m_size = 0;
     strFree( m_str );
     m_str = NULL;
 }
 
 /** Release the allocated string. */
 char * StringBuilder::release()
 {
     char * str = m_str;
     m_size = 0;
     m_str = NULL;
     return str;
 }
 
 // Swap strings.
 void nv::swap(StringBuilder & a, StringBuilder & b) {
     swap(a.m_size, b.m_size);
     swap(a.m_str, b.m_str);
 }
 
 
 /// Get the file name from a path.
 const char * Path::fileName() const
 {
     return fileName(m_str);
 }
 
 
 /// Get the extension from a file path.
 const char * Path::extension() const
 {
     return extension(m_str);
 }
 
 
 /*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) {
     nvCheck(path != NULL);
 
     for (int i = 0;; i++) {
         if (path[i] == '\0') break;
         if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator;
     }
 }
 
 /// Toggles path separators (ie. \\ into /).
 void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/)
 {
     nvCheck(!isNull());
     translatePath(m_str, pathSeparator);
 }
 
 void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/)
 {
     nvCheck(!isNull());
 
     const uint l = length();
     
     if (m_str[l] != '\\' && m_str[l] != '/') {
         char separatorString[] = { pathSeparator, '\0' };
         append(separatorString);
     }
 }
 
 
 /**
 * Strip the file name from a path.
 * @warning path cannot end with '/' o '\\', can't it?
 */
 void Path::stripFileName()
 {
     nvCheck( m_str != NULL );
 
     int length = (int)strlen(m_str) - 1;
     while (length > 0 && m_str[length] != '/' && m_str[length] != '\\'){
         length--;
     }
     if( length ) {
         m_str[length+1] = 0;
     }
     else {
         m_str[0] = 0;
     }
 }
 
 
 /// Strip the extension from a path name.
 void Path::stripExtension()
 {
     nvCheck( m_str != NULL );
 
     int length = (int)strlen(m_str) - 1;
     while (length > 0 && m_str[length] != '.') {
         length--;
         if( m_str[length] == NV_PATH_SEPARATOR ) {
             return; // no extension
         }
     }
     if (length > 0) {
         m_str[length] = 0;
     }
 }
 
 
 /// Get the path separator.
 // static
 char Path::separator()
 {
     return NV_PATH_SEPARATOR;
 }
 
 // static 
 const char * Path::fileName(const char * str)
 {
     nvCheck( str != NULL );
 
     int length = (int)strlen(str) - 1;
     while (length >= 0 && str[length] != '\\' && str[length] != '/') {
         length--;
     }
 
     return &str[length+1];
 }
 
 // static 
 const char * Path::extension(const char * str)
 {
     nvCheck( str != NULL );
 
     int length, l;
     l = length = (int)strlen( str );
     while (length > 0 && str[length] != '.') {
         length--;
         if (str[length] == '\\' || str[length] == '/') {
             return &str[l]; // no extension
         }
     }
     if (length == 0) {
         return &str[l];
     }
     return &str[length];
 }
 
 
 
 /// Clone this string
 String String::clone() const
 {
     String str(data);
     return str;
 }
 
 void String::setString(const char * str)
 {
     if (str == NULL) {
         data = NULL;
     }
     else {
         allocString( str );
         addRef();
     }
 }
 
 void String::setString(const char * str, uint length)
 {
     nvDebugCheck(str != NULL);
 
     allocString(str, length);
     addRef();
 }
 
 void String::setString(const StringBuilder & str)
 {
     if (str.str() == NULL) {
         data =	NULL;
     }
     else {
         allocString(str.str());
         addRef();
     }
 }	
 
 // Add reference count.
 void String::addRef()
 {
     if (data != NULL)
     {
         setRefCount(getRefCount() + 1);
     }
 }
 
 // Decrease reference count.
 void String::release()
 {
     if (data != NULL)
     {
         const uint16 count = getRefCount();
         setRefCount(count - 1);
         if (count - 1 == 0) {
             free(data - 2);
             data = NULL;
         }
     }
 }
 
 void String::allocString(const char * str, uint len)
 {
     const char * ptr = malloc<char>(2 + len + 1);
 
     setData( ptr );
     setRefCount( 0 );
 
     // Copy string.
     strCpy(const_cast<char *>(data), len+1, str, len);
 
     // Add terminating character.
     const_cast<char *>(data)[len] = '\0';
 }
 
 void nv::swap(String & a, String & b) {
     swap(a.data, b.data);
 }
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/StrLib.h	(revision 23380)
@@ -1,430 +1,430 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_STRING_H
 #define NV_CORE_STRING_H
 
 #include "Debug.h"
 #include "Hash.h" // hash
 
 //#include <string.h> // strlen, etc.
 
 #if NV_OS_WIN32
 #define NV_PATH_SEPARATOR '\\'
 #else
 #define NV_PATH_SEPARATOR '/'
 #endif
 
 namespace nv
 {
 
     NVCORE_API uint strHash(const char * str, uint h) NV_PURE;
 
     /// String hash based on Bernstein's hash.
     inline uint strHash(const char * data, uint h = 5381)
     {
         uint i = 0;
         while(data[i] != 0) {
             h = (33 * h) ^ uint(data[i]);
             i++;
         }
         return h;
     }
 
     template <> struct Hash<const char *> {
         uint operator()(const char * str) const { return strHash(str); }
     };
 
     NVCORE_API uint strLen(const char * str) NV_PURE;                       // Asserts on NULL strings.
 
     NVCORE_API int strDiff(const char * s1, const char * s2) NV_PURE;       // Asserts on NULL strings.
     NVCORE_API int strCaseDiff(const char * s1, const char * s2) NV_PURE;   // Asserts on NULL strings.
     NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE;     // Accepts NULL strings.
     NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
 
     template <> struct Equal<const char *> {
         bool operator()(const char * a, const char * b) const { return strEqual(a, b); }
     };
 
     NVCORE_API bool strBeginsWith(const char * dst, const char * prefix) NV_PURE;
     NVCORE_API bool strEndsWith(const char * dst, const char * suffix) NV_PURE;
 
 
     NVCORE_API void strCpy(char * dst, uint size, const char * src);
     NVCORE_API void strCpy(char * dst, uint size, const char * src, uint len);
     NVCORE_API void strCat(char * dst, uint size, const char * src);
 
     NVCORE_API const char * strSkipWhiteSpace(const char * str);
     NVCORE_API char * strSkipWhiteSpace(char * str);
 
     NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
 
     NVCORE_API bool isNumber(const char * str) NV_PURE;
 
     /* @@ Implement these two functions and modify StringBuilder to use them?
     NVCORE_API void strFormat(const char * dst, const char * fmt, ...);
     NVCORE_API void strFormatList(const char * dst, const char * fmt, va_list arg);
 
     template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) __attribute__((format (printf, 2, 3)));
     template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) {
         va_list args;
         va_start(args, fmt);
         strFormatList(buffer, count, fmt, args);
         va_end(args);
     }
     template <size_t count> void strFormatListSafe(char (&buffer)[count], const char *fmt, va_list arg) {
         va_list tmp;
         va_copy(tmp, args);
         strFormatList(buffer, count, fmt, tmp);
         va_end(tmp);
     }*/
 
     template <int count> void strCpySafe(char (&buffer)[count], const char *src) {
         strCpy(buffer, count, src);
     }
 
     template <int count> void strCatSafe(char (&buffer)[count], const char * src) {
         strCat(buffer, count, src);
     }
 
 
 
     /// String builder.
     class NVCORE_CLASS StringBuilder
     {
     public:
 
         StringBuilder();
         explicit StringBuilder( uint size_hint );
         StringBuilder(const char * str);
         StringBuilder(const char * str, uint len);
         StringBuilder(const StringBuilder & other);
 
         ~StringBuilder();
 
         StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
         StringBuilder & formatList( const char * format, va_list arg );
 
         StringBuilder & append(const char * str);
 		StringBuilder & append(const char * str, uint len);
         StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
         StringBuilder & appendFormatList(const char * format, va_list arg);
 
         StringBuilder & appendSpace(uint n);
 
         StringBuilder & number( int i, int base = 10 );
         StringBuilder & number( uint i, int base = 10 );
 
         StringBuilder & reserve(uint size_hint);
         StringBuilder & copy(const char * str);
         StringBuilder & copy(const char * str, uint len);
         StringBuilder & copy(const StringBuilder & str);
 
         StringBuilder & toLower();
         StringBuilder & toUpper();
 
         bool endsWith(const char * str) const;
         bool beginsWith(const char * str) const;
 
         char * reverseFind(char c);
 
         void reset();
         bool isNull() const { return m_size == 0; }
 
         // const char * accessors
         //operator const char * () const { return m_str; }
         //operator char * () { return m_str; }
         const char * str() const { return m_str; }
         char * str() { return m_str; }
 
         char * release();
 
         /// Implement value semantics.
         StringBuilder & operator=( const StringBuilder & s ) {
             return copy(s);
         }
 
         /// Implement value semantics.
         StringBuilder & operator=( const char * s ) {
             return copy(s);
         }
 
         /// Equal operator.
         bool operator==( const StringBuilder & s ) const {
             return strMatch(s.m_str, m_str);
         }
 
         /// Return the exact length.
         uint length() const { return isNull() ? 0 : strLen(m_str); }
 
         /// Return the size of the string container.
         uint capacity() const { return m_size; }
 
         /// Return the hash of the string.
         uint hash() const { return isNull() ? 0 : strHash(m_str); }
 
         // Swap strings.
         friend void swap(StringBuilder & a, StringBuilder & b);
 
     protected:
 
         /// Size of the string container.
         uint m_size;
 
         /// String.
         char * m_str;
 
     };
 
 
     /// Path string. @@ This should be called PathBuilder.
     class NVCORE_CLASS Path : public StringBuilder
     {
     public:
         Path() : StringBuilder() {}
         explicit Path(int size_hint) : StringBuilder(size_hint) {}
         Path(const char * str) : StringBuilder(str) {}
         Path(const Path & path) : StringBuilder(path) {}
 
         const char * fileName() const;
         const char * extension() const;
 
         void translatePath(char pathSeparator = NV_PATH_SEPARATOR);
 
         void appendSeparator(char pathSeparator = NV_PATH_SEPARATOR);
 
         void stripFileName();
         void stripExtension();
 
         // statics
         static char separator();
         static const char * fileName(const char *);
         static const char * extension(const char *);
 
         static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR);
     };
 
 
     /// String class.
     class NVCORE_CLASS String
     {
     public:
 
         /// Constructs a null string. @sa isNull()
         String()
         {
             data = NULL;
         }
 
         /// Constructs a shared copy of str.
         String(const String & str)
         {
             data = str.data;
             if (data != NULL) addRef();
         }
 
         /// Constructs a shared string from a standard string.
         String(const char * str)
         {
             setString(str);
         }
 
         /// Constructs a shared string from a standard string.
         String(const char * str, int length)
         {
             setString(str, length);
         }
 
         /// Constructs a shared string from a StringBuilder.
         String(const StringBuilder & str)
         {
             setString(str);
         }
 
         /// Dtor.
         ~String()
         {
             release();
         }
 
         String clone() const;
 
         /// Release the current string and allocate a new one.
         const String & operator=( const char * str )
         {
             release();
             setString( str );
             return *this;
         }
 
         /// Release the current string and allocate a new one.
         const String & operator=( const StringBuilder & str )
         {
             release();
             setString( str );
             return *this;
         }
 
         /// Implement value semantics.
         String & operator=( const String & str )
         {
             if (str.data != data)
             {
                 release();
                 data = str.data;
                 addRef();
             }
             return *this;
         }
 
         /// Equal operator.
         bool operator==( const String & str ) const
         {
             return strMatch(str.data, data);
         }
 
         /// Equal operator.
         bool operator==( const char * str ) const
         {
             return strMatch(str, data);
         }
 
         /// Not equal operator.
         bool operator!=( const String & str ) const
         {
             return !strMatch(str.data, data);
         }
 
         /// Not equal operator.
         bool operator!=( const char * str ) const
         {
             return !strMatch(str, data);
         }
 
         /// Returns true if this string is the null string.
         bool isNull() const { return data == NULL; }
 
         /// Return the exact length.
         uint length() const { nvDebugCheck(data != NULL); return strLen(data); }
 
         /// Return the hash of the string.
         uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
 
         /// const char * cast operator.
         operator const char * () const { return data; }
 
         /// Get string pointer.
         const char * str() const { return data; }
 
 
     private:
 
         // Add reference count.
         void addRef();
 
         // Decrease reference count.
         void release();
 
         uint16 getRefCount() const
         {
             nvDebugCheck(data != NULL);
             return *reinterpret_cast<const uint16 *>(data - 2);
         }
 
         void setRefCount(uint16 count) {
             nvDebugCheck(data != NULL);
             nvCheck(count < 0xFFFF);
             *reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
         }
 
         void setData(const char * str) {
             data = str + 2;
         }
 
         void allocString(const char * str)
         {
             allocString(str, strLen(str));
         }
 
         void allocString(const char * str, uint length);
 
         void setString(const char * str);
         void setString(const char * str, uint length);
         void setString(const StringBuilder & str);
 
         // Swap strings.
         friend void swap(String & a, String & b);
 
     private:
 
         const char * data;
 
     };
 
     template <> struct Hash<String> {
         uint operator()(const String & str) const { return str.hash(); }
     };
 
 
     // Like AutoPtr, but for const char strings.
     class AutoString
     {
         NV_FORBID_COPY(AutoString);
         NV_FORBID_HEAPALLOC();
     public:
 
         // Ctor.
         AutoString(const char * p = NULL) : m_ptr(p) { }
 
 #if NV_CC_CPP11
         // Move ctor.
         AutoString(AutoString && ap) : m_ptr(ap.m_ptr) { ap.m_ptr = NULL; }
 #endif
         
         // Dtor. Deletes owned pointer.
         ~AutoString() {
             delete [] m_ptr;
             m_ptr = NULL;
         }
 
         // Delete owned pointer and assign new one.
         void operator=(const char * p) {
             if (p != m_ptr) 
             {
                 delete [] m_ptr;
                 m_ptr = p;
             }
         }
 
         // Get pointer.
         const char * ptr() const { return m_ptr; }
         operator const char *() const { return m_ptr; }
 
         // Relinquish ownership of the underlying pointer and returns that pointer.
         const char * release() {
             const char * tmp = m_ptr;
             m_ptr = NULL;
             return tmp;
         }
 
         // comparison operators.
         friend bool operator == (const AutoString & ap, const char * const p) {
             return (ap.ptr() == p);
         }
         friend bool operator != (const AutoString & ap, const char * const p) {
             return (ap.ptr() != p);
         }
         friend bool operator == (const char * const p, const AutoString & ap) {
             return (ap.ptr() == p);
         }
         friend bool operator != (const char * const p, const AutoString & ap) {
             return (ap.ptr() != p);
         }
 
     private:
         const char * m_ptr;
     };
 
 } // nv namespace
 
 #endif // NV_CORE_STRING_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Stream.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Stream.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Stream.h	(revision 23380)
@@ -1,164 +1,164 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_STREAM_H
 #define NV_CORE_STREAM_H
 
 #include "nvcore.h"
 #include "Debug.h"
 
 namespace nv
 {
 
     /// Base stream class.
     class NVCORE_CLASS Stream {
     public:
 
         enum ByteOrder {
             LittleEndian = false,
             BigEndian = true,
         };
 
         /// Get the byte order of the system.
         static ByteOrder getSystemByteOrder() { 
 #if NV_LITTLE_ENDIAN
             return LittleEndian;
 #else
             return BigEndian;
 #endif
         }
 
 
         /// Ctor.
         Stream() : m_byteOrder(LittleEndian) { }
 
         /// Virtual destructor.
         virtual ~Stream() {}
 
         /// Set byte order.
         void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
 
         /// Get byte order.
         ByteOrder byteOrder() const { return m_byteOrder; }
 
 
         /// Serialize the given data.
         virtual uint serialize( void * data, uint len ) = 0;
 
         /// Move to the given position in the archive.
         virtual void seek( uint pos ) = 0;
 
         /// Return the current position in the archive.
         virtual uint tell() const = 0;
 
         /// Return the current size of the archive.
         virtual uint size() const = 0;
 
         /// Determine if there has been any error.
         virtual bool isError() const = 0;
 
         /// Clear errors.
         virtual void clearError() = 0;
 
         /// Return true if the stream is at the end.
         virtual bool isAtEnd() const = 0;
 
         /// Return true if the stream is seekable.
         virtual bool isSeekable() const = 0;
 
         /// Return true if this is an input stream.
         virtual bool isLoading() const = 0;
 
         /// Return true if this is an output stream.
         virtual bool isSaving() const = 0;
 
 
         void advance(uint offset) { seek(tell() + offset); }
 
 
         // friends	
         friend Stream & operator<<( Stream & s, bool & c ) {
 #if NV_OS_DARWIN && !NV_CC_CPP11
             nvStaticCheck(sizeof(bool) == 4);
             uint8 b = c ? 1 : 0;
             s.serialize( &b, 1 );
             c = (b == 1);
 #else
             nvStaticCheck(sizeof(bool) == 1);
             s.serialize( &c, 1 );
 #endif
             return s;
         }
         friend Stream & operator<<( Stream & s, char & c ) {
             nvStaticCheck(sizeof(char) == 1);
             s.serialize( &c, 1 );
             return s;
         }
         friend Stream & operator<<( Stream & s, uint8 & c ) {
             nvStaticCheck(sizeof(uint8) == 1);
             s.serialize( &c, 1 );
             return s;
         }
         friend Stream & operator<<( Stream & s, int8 & c ) {
             nvStaticCheck(sizeof(int8) == 1);
             s.serialize( &c, 1 );
             return s;
         }
         friend Stream & operator<<( Stream & s, uint16 & c ) {
             nvStaticCheck(sizeof(uint16) == 2);
             return s.byteOrderSerialize( &c, 2 );
         }
         friend Stream & operator<<( Stream & s, int16 & c ) {
             nvStaticCheck(sizeof(int16) == 2);
             return s.byteOrderSerialize( &c, 2 );
         }
         friend Stream & operator<<( Stream & s, uint32 & c ) {
             nvStaticCheck(sizeof(uint32) == 4);
             return s.byteOrderSerialize( &c, 4 );
         }
         friend Stream & operator<<( Stream & s, int32 & c ) {
             nvStaticCheck(sizeof(int32) == 4);
             return s.byteOrderSerialize( &c, 4 );
         }
         friend Stream & operator<<( Stream & s, uint64 & c ) {
             nvStaticCheck(sizeof(uint64) == 8);
             return s.byteOrderSerialize( &c, 8 );
         }
         friend Stream & operator<<( Stream & s, int64 & c ) {
             nvStaticCheck(sizeof(int64) == 8);
             return s.byteOrderSerialize( &c, 8 );
         }
         friend Stream & operator<<( Stream & s, float & c ) {
             nvStaticCheck(sizeof(float) == 4);
             return s.byteOrderSerialize( &c, 4 );
         }
         friend Stream & operator<<( Stream & s, double & c ) {
             nvStaticCheck(sizeof(double) == 8);
             return s.byteOrderSerialize( &c, 8 );
         }
 
     protected:
 
         /// Serialize in the stream byte order.
         Stream & byteOrderSerialize( void * v, uint len ) {
             if( m_byteOrder == getSystemByteOrder() ) {
                 serialize( v, len );
             }
             else {
                 for( uint i = len; i > 0; i-- ) {
                     serialize( (uint8 *)v + i - 1, 1 );
                 }
             }
             return *this;
         }
 
 
     private:
 
         ByteOrder m_byteOrder;
 
     };
 
 } // nv namespace
 
 #endif // NV_CORE_STREAM_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.cpp	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.cpp	(revision 23380)
@@ -1,45 +1,45 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #include "TextWriter.h"
 
 using namespace nv;
 
 
 /// Constructor
 TextWriter::TextWriter(Stream * s) : 
     s(s), 
     str(1024)
 {
     nvCheck(s != NULL);
     nvCheck(s->isSaving());
 }
 
 void TextWriter::writeString(const char * str)
 {
     nvDebugCheck(s != NULL);
     s->serialize(const_cast<char *>(str), strLen(str));
 }
 
 void TextWriter::writeString(const char * str, uint len)
 {
     nvDebugCheck(s != NULL);
     s->serialize(const_cast<char *>(str), len);
 }
 
 void TextWriter::format(const char * format, ...)
 {
     va_list arg;
     va_start(arg,format);
     str.formatList(format, arg);
     writeString(str.str(), str.length());
     va_end(arg);
 }
 
 void TextWriter::formatList(const char * format, va_list arg)
 {
     va_list tmp;
     va_copy(tmp, arg);
     str.formatList(format, arg);
     writeString(str.str(), str.length());
     va_end(tmp);
 }
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/TextWriter.h	(revision 23380)
@@ -1,62 +1,62 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NVCORE_TEXTWRITER_H
 #define NVCORE_TEXTWRITER_H
 
 #include "nvcore.h"
 #include "Stream.h"
 #include "StrLib.h"
 
 namespace nv
 {
 
     /// Text writer.
     class NVCORE_CLASS TextWriter
     {
     public:
 
         TextWriter(Stream * s);
 
         void writeString(const char * str);
         void writeString(const char * str, uint len);
         void format(const char * format, ...) __attribute__((format (printf, 2, 3)));
         void formatList(const char * format, va_list arg);
 
     private:
 
         Stream * s;
 
         // Temporary string.
         StringBuilder str;
 
     };
 
 
     inline TextWriter & operator<<( TextWriter & tw, int i)
     {
         tw.format("%d", i);
         return tw;
     }
 
     inline TextWriter & operator<<( TextWriter & tw, uint i)
     {
         tw.format("%u", i);
         return tw;
     }
 
     inline TextWriter & operator<<( TextWriter & tw, float f)
     {
         tw.format("%f", f);
         return tw;
     }
 
     inline TextWriter & operator<<( TextWriter & tw, const char * str)
     {
         tw.writeString(str);
         return tw;
     }
 
 } // nv namespace
 
 #endif // NVCORE_TEXTWRITER_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Utils.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Utils.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Utils.h	(revision 23380)
@@ -1,282 +1,282 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_UTILS_H
 #define NV_CORE_UTILS_H
 
 #include "Debug.h" // nvDebugCheck
 
 #include <new> // for placement new
 
 
 // Just in case. Grrr.
 #undef min
 #undef max
 
 #define NV_INT8_MIN    (-128)
 #define NV_INT8_MAX    127
 #define NV_UINT8_MAX    255
 #define NV_INT16_MIN    (-32767-1)
 #define NV_INT16_MAX    32767
 #define NV_UINT16_MAX   0xffff
 #define NV_INT32_MIN    (-2147483647-1)
 #define NV_INT32_MAX    2147483647
 #define NV_UINT32_MAX   0xffffffff
 #define NV_INT64_MAX    POSH_I64(9223372036854775807)
 #define NV_INT64_MIN    (-POSH_I64(9223372036854775807)-1)
 #define NV_UINT64_MAX   POSH_U64(0xffffffffffffffff)
 
 #define NV_HALF_MAX     65504.0F
 #define NV_FLOAT_MAX    3.402823466e+38F
 
 #define NV_INTEGER_TO_FLOAT_MAX  16777217     // Largest integer such that it and all smaller integers can be stored in a 32bit float.
 
 
 namespace nv
 {
     // Less error prone than casting. From CB:
     // http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
 
     // These intentionally look like casts.
 
     // uint32 casts:
     template <typename T> inline uint32 U32(T x) { return x; }
     template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
     template <> inline uint32 U32<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
     //template <> inline uint32 U32<uint32>(uint32 x) { return x; }
     template <> inline uint32 U32<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; }
     //template <> inline uint32 U32<uint16>(uint16 x) { return x; }
     template <> inline uint32 U32<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; }
     //template <> inline uint32 U32<uint8>(uint8 x) { return x; }
     template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
 
     // int32 casts:
     template <typename T> inline int32 I32(T x) { return x; }
     template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
     template <> inline int32 I32<int64>(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; }
     template <> inline int32 I32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
     //template <> inline int32 I32<int32>(int32 x) { return x; }
     //template <> inline int32 I32<uint16>(uint16 x) { return x; }
     //template <> inline int32 I32<int16>(int16 x) { return x; }
     //template <> inline int32 I32<uint8>(uint8 x) { return x; }
     //template <> inline int32 I32<int8>(int8 x) { return x; }
 
     // uint16 casts:
     template <typename T> inline uint16 U16(T x) { return x; }
     template <> inline uint16 U16<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
     template <> inline uint16 U16<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
     template <> inline uint16 U16<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
     template <> inline uint16 U16<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
     //template <> inline uint16 U16<uint16>(uint16 x) { return x; }
     template <> inline uint16 U16<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; }
     //template <> inline uint16 U16<uint8>(uint8 x) { return x; }
     template <> inline uint16 U16<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; }
 
     // int16 casts:
     template <typename T> inline int16 I16(T x) { return x; }
     template <> inline int16 I16<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
     template <> inline int16 I16<int64>(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
     template <> inline int16 I16<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
     template <> inline int16 I16<int32>(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
     template <> inline int16 I16<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
     //template <> inline int16 I16<int16>(int16 x) { return x; }
     //template <> inline int16 I16<uint8>(uint8 x) { return x; }
     //template <> inline int16 I16<int8>(int8 x) { return x; }
 
     // uint8 casts:
     template <typename T> inline uint8 U8(T x) { return x; }
     template <> inline uint8 U8<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
     template <> inline uint8 U8<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
     template <> inline uint8 U8<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
     template <> inline uint8 U8<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
     template <> inline uint8 U8<uint16>(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
     template <> inline uint8 U8<int16>(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
     //template <> inline uint8 U8<uint8>(uint8 x) { return x; }
     template <> inline uint8 U8<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; }
     //template <> inline uint8 U8<float>(int8 x) { nvDebugCheck(x >= 0.0f && x <= 255.0f); return (uint8)x; }
 
     // int8 casts:
     template <typename T> inline int8 I8(T x) { return x; }
     template <> inline int8 I8<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
     template <> inline int8 I8<int64>(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
     template <> inline int8 I8<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
     template <> inline int8 I8<int32>(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
     template <> inline int8 I8<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
     template <> inline int8 I8<int16>(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
     template <> inline int8 I8<uint8>(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
     //template <> inline int8 I8<int8>(int8 x) { return x; }
 
     // float casts:
     template <typename T> inline float F32(T x) { return x; }
     template <> inline float F32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
     template <> inline float F32<int64>(int64 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
     template <> inline float F32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
     template <> inline float F32<int32>(int32 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
     // The compiler should not complain about these conversions:
     //template <> inline float F32<uint16>(uint16 x) { nvDebugCheck(return (float)x; }
     //template <> inline float F32<int16>(int16 x) { nvDebugCheck(return (float)x; }
     //template <> inline float F32<uint8>(uint8 x) { nvDebugCheck(return (float)x; }
     //template <> inline float F32<int8>(int8 x) { nvDebugCheck(return (float)x; }
 
 
     /// Swap two values.
     template <typename T> 
     inline void swap(T & a, T & b)
     {
         T temp(a);
         a = b; 
         b = temp;
     }
 
     /// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN.
     template <typename T> 
     //inline const T & max(const T & a, const T & b)
     inline T max(const T & a, const T & b)
     {
         return (b < a) ? a : b;
     }
 
 	/// Return the maximum of the four arguments.
 	template <typename T> 
 	//inline const T & max4(const T & a, const T & b, const T & c)
 	inline T max4(const T & a, const T & b, const T & c, const T & d)
 	{
 		return max(max(a, b), max(c, d));
 	}
 
     /// Return the maximum of the three arguments.
     template <typename T> 
     //inline const T & max3(const T & a, const T & b, const T & c)
     inline T max3(const T & a, const T & b, const T & c)
     {
         return max(a, max(b, c));
     }
 
     /// Return the minimum of two values.
     template <typename T> 
     //inline const T & min(const T & a, const T & b)
     inline T min(const T & a, const T & b)
     {
         return (a < b) ? a : b;
     }
 
     /// Return the maximum of the three arguments.
     template <typename T> 
     //inline const T & min3(const T & a, const T & b, const T & c)
     inline T min3(const T & a, const T & b, const T & c)
     {
         return min(a, min(b, c));
     }
 
     /// Clamp between two values.
     template <typename T> 
     //inline const T & clamp(const T & x, const T & a, const T & b)
     inline T clamp(const T & x, const T & a, const T & b)
     {
         return min(max(x, a), b);
     }
 
     /** Return the next power of two. 
     * @see http://graphics.stanford.edu/~seander/bithacks.html
     * @warning Behaviour for 0 is undefined.
     * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
     * @note nextPowerOfTwo(x) = 2 << log2(x-1)
     */
     inline uint nextPowerOfTwo( uint x )
     {
         nvDebugCheck( x != 0 );
 #if 1	// On modern CPUs this is supposed to be as fast as using the bsr instruction.
         x--;
         x |= x >> 1;
         x |= x >> 2;
         x |= x >> 4;
         x |= x >> 8;
         x |= x >> 16;
         return x+1;	
 #else
         uint p = 1;
         while( x > p ) {
             p += p;
         }
         return p;
 #endif
     }
 
     /// Return true if @a n is a power of two.
     inline bool isPowerOfTwo( uint n )
     {
         return (n & (n-1)) == 0;
     }
 
 
     // @@ Move this to utils?
     /// Delete all the elements of a container.
     template <typename T>
     void deleteAll(T & container)
     {
         for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
         {
             delete container[i];
         }
     }
 
 
 
     // @@ Specialize these methods for numeric, pointer, and pod types.
 
     template <typename T>
     void construct_range(T * restrict ptr, uint new_size, uint old_size) {
         for (uint i = old_size; i < new_size; i++) {
             new(ptr+i) T; // placement new
         }
     }
 
     template <typename T>
     void construct_range(T * restrict ptr, uint new_size, uint old_size, const T & elem) {
         for (uint i = old_size; i < new_size; i++) {
             new(ptr+i) T(elem); // placement new
         }
     }
 
     template <typename T>
     void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) {
         for (uint i = old_size; i < new_size; i++) {
             new(ptr+i) T(src[i]); // placement new
         }
     }
 
     template <typename T>
     void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
         for (uint i = new_size; i < old_size; i++) {
             (ptr+i)->~T(); // Explicit call to the destructor
         }
     }
 
     template <typename T>
     void fill(T * restrict dst, uint count, const T & value) {
         for (uint i = 0; i < count; i++) {
             dst[i] = value;
         }
     }
 
     template <typename T>
     void copy_range(T * restrict dst, const T * restrict src, uint count) {
         for (uint i = 0; i < count; i++) {
             dst[i] = src[i];
         }
     }
 
     template <typename T>
     bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) {
         for (uint i = begin; i < end; i++) {
             if (ptr[i] == element) {
                 if (index != NULL) *index = i;
                 return true;
             }
         }
         return false;
     }
 
 } // nv namespace
 
 #endif // NV_CORE_UTILS_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h	(revision 23380)
@@ -1,307 +1,307 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_H
 #define NV_CORE_H
 
 // Function linkage
 #if NVCORE_SHARED
 #ifdef NVCORE_EXPORTS
 #define NVCORE_API DLL_EXPORT
 #define NVCORE_CLASS DLL_EXPORT_CLASS
 #else
 #define NVCORE_API DLL_IMPORT
 #define NVCORE_CLASS DLL_IMPORT
 #endif
 #else // NVCORE_SHARED
 #define NVCORE_API
 #define NVCORE_CLASS
 #endif // NVCORE_SHARED
 
 
 // Platform definitions
 #include <posh.h>
 
 // OS:
 // NV_OS_WIN32
 // NV_OS_WIN64
 // NV_OS_MINGW
 // NV_OS_CYGWIN
 // NV_OS_LINUX
 // NV_OS_UNIX
 // NV_OS_DARWIN
 // NV_OS_XBOX
 // NV_OS_ORBIS
 // NV_OS_IOS
 
 #define NV_OS_STRING POSH_OS_STRING
 
 #if defined POSH_OS_LINUX
 #   define NV_OS_LINUX 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_ORBIS
 #   define NV_OS_ORBIS 1
 #elif defined POSH_OS_FREEBSD
 #   define NV_OS_FREEBSD 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_NETBSD
 #   define NV_OS_NETBSD 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_OPENBSD
 #   define NV_OS_OPENBSD 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_CYGWIN32
 #   define NV_OS_CYGWIN 1
 #elif defined POSH_OS_MINGW
 #   define NV_OS_MINGW 1
 #   define NV_OS_WIN32 1
 #elif defined POSH_OS_OSX
 #   define NV_OS_DARWIN 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_IOS
 #   define NV_OS_DARWIN 1 //ACS should we keep this on IOS?
 #   define NV_OS_UNIX 1
 #   define NV_OS_IOS 1
 #elif defined POSH_OS_UNIX
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_WIN64
 #   define NV_OS_WIN32 1
 #   define NV_OS_WIN64 1
 #elif defined POSH_OS_WIN32
 #   define NV_OS_WIN32 1
 #elif defined POSH_OS_XBOX
 #   define NV_OS_XBOX 1
 #else
 #   error "Unsupported OS"
 #endif
 
 
 // Threading:
 // some platforms don't implement __thread or similar for thread-local-storage
 #if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
 #   define NV_OS_USE_PTHREAD 1
 #   if NV_OS_DARWIN || NV_OS_IOS
 #       define NV_OS_HAS_TLS_QUALIFIER 0
 #   else
 #       define NV_OS_HAS_TLS_QUALIFIER 1
 #   endif
 #else
 #   define NV_OS_USE_PTHREAD 0
 #   define NV_OS_HAS_TLS_QUALIFIER 1
 #endif
 
 
 // CPUs:
 // NV_CPU_X86
 // NV_CPU_X86_64
 // NV_CPU_PPC
 // NV_CPU_ARM
 // NV_CPU_AARCH64
 
 #define NV_CPU_STRING   POSH_CPU_STRING
 
 #if defined POSH_CPU_X86_64
 //#   define NV_CPU_X86 1
 #   define NV_CPU_X86_64 1
 #elif defined POSH_CPU_X86
 #   define NV_CPU_X86 1
 #elif defined POSH_CPU_PPC
 #   define NV_CPU_PPC 1
 #elif defined POSH_CPU_STRONGARM
 #   define NV_CPU_ARM 1
 #elif defined POSH_CPU_AARCH64
 #   define NV_CPU_AARCH64 1
 #else
 #   error "Unsupported CPU"
 #endif
 
 
 // Compiler:
 // NV_CC_GNUC
 // NV_CC_MSVC
 // NV_CC_CLANG
 
 #if defined POSH_COMPILER_CLANG
 #   define NV_CC_CLANG  1
 #   define NV_CC_GNUC   1    // Clang is compatible with GCC.
 #   define NV_CC_STRING "clang"
 #elif defined POSH_COMPILER_GCC
 #   define NV_CC_GNUC   1
 #   define NV_CC_STRING "gcc"
 #elif defined POSH_COMPILER_MSVC
 #   define NV_CC_MSVC   1
 #   define NV_CC_STRING "msvc"
 #else
 #   error "Unsupported compiler"
 #endif
 
 #if NV_CC_MSVC
 #define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet.
 #else
 // @@ IC: This works in CLANG, about GCC?
 // @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4.
 #ifdef __clang__
 #define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert))
 #elif defined __GNUC__ 
 #define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
 #endif
 #endif
 
 // Endiannes:
 #define NV_LITTLE_ENDIAN    POSH_LITTLE_ENDIAN
 #define NV_BIG_ENDIAN       POSH_BIG_ENDIAN
 #define NV_ENDIAN_STRING    POSH_ENDIAN_STRING
 
 
 // Define the right printf prefix for size_t arguments:
 #if POSH_64BIT_POINTER
 #  define NV_SIZET_PRINTF_PREFIX POSH_I64_PRINTF_PREFIX
 #else
 #  define NV_SIZET_PRINTF_PREFIX
 #endif
 
 
 // cmake config
 #include "nvconfig.h"
 
 
 // Type definitions:
 typedef posh_u8_t   uint8;
 typedef posh_i8_t   int8;
 
 typedef posh_u16_t  uint16;
 typedef posh_i16_t  int16;
 
 typedef posh_u32_t  uint32;
 typedef posh_i32_t  int32;
 
 typedef posh_u64_t  uint64;
 typedef posh_i64_t  int64;
 
 // Aliases
 typedef uint32      uint;
 
 
 // Version string:
 #define NV_VERSION_STRING \
     NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \
     NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
 
 
 // Disable copy constructor and assignment operator. 
 #if NV_CC_CPP11
 #define NV_FORBID_COPY(C) \
     C( const C & ) = delete; \
     C &operator=( const C & ) = delete
 #else
 #define NV_FORBID_COPY(C) \
     private: \
     C( const C & ); \
     C &operator=( const C & )
 #endif
 
 // Disable dynamic allocation on the heap. 
 // See Prohibiting Heap-Based Objects in More Effective C++.
 #define NV_FORBID_HEAPALLOC() \
     private: \
     void *operator new(size_t size); \
     void *operator new[](size_t size)
     //static void *operator new(size_t size); \
     //static void *operator new[](size_t size);
 
 // String concatenation macros.
 #define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
 #define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
 #define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
 #define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
 #define NV_STRING2(x) #x
 #define NV_STRING(x) NV_STRING2(x)
 
 #if NV_CC_MSVC
 #define NV_MULTI_LINE_MACRO_BEGIN do {  
 #define NV_MULTI_LINE_MACRO_END \
     __pragma(warning(push)) \
     __pragma(warning(disable:4127)) \
     } while(false) \
     __pragma(warning(pop))  
 #else
 #define NV_MULTI_LINE_MACRO_BEGIN do {
 #define NV_MULTI_LINE_MACRO_END } while(false)
 #endif
 
 #if NV_CC_CPP11
 #define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed")
 #else
 #define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
 #endif
 #define NV_COMPILER_CHECK(x) nvStaticCheck(x)   // I like this name best.
 
 // Make sure type definitions are fine.
 NV_COMPILER_CHECK(sizeof(int8) == 1);
 NV_COMPILER_CHECK(sizeof(uint8) == 1);
 NV_COMPILER_CHECK(sizeof(int16) == 2);
 NV_COMPILER_CHECK(sizeof(uint16) == 2);
 NV_COMPILER_CHECK(sizeof(int32) == 4);
 NV_COMPILER_CHECK(sizeof(uint32) == 4);
 NV_COMPILER_CHECK(sizeof(int32) == 4);
 NV_COMPILER_CHECK(sizeof(uint32) == 4);
 
 
 #define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
 
 #if 0 // Disabled in The Witness.
 #if NV_CC_MSVC
 #define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x)
 #else
 #define NV_MESSAGE(x) message(x)
 #endif
 #else
 #define NV_MESSAGE(x) 
 #endif
 
 
 // Startup initialization macro.
 #define NV_AT_STARTUP(some_code) \
     namespace { \
         static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \
             NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \
         } \
         NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
     }
 
 // Indicate the compiler that the parameter is not used to suppress compier warnings.
 #define NV_UNUSED(a) ((a)=(a))
 
 // Null index. @@ Move this somewhere else... it's only used by nvmesh.
 //const unsigned int NIL = unsigned int(~0);
 //#define NIL uint(~0)
 
 // Null pointer.
 #ifndef NULL
 #define NULL 0
 #endif
 
 // Platform includes
 #if NV_CC_MSVC
 #   if NV_OS_WIN32
 #       include "DefsVcWin32.h"
 #   elif NV_OS_XBOX
 #       include "DefsVcXBox.h"
 #   else
 #       error "MSVC: Platform not supported"
 #   endif
 #elif NV_CC_GNUC
 #   if NV_OS_LINUX
 #       include "DefsGnucLinux.h"
 #   elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD
 #       include "DefsGnucDarwin.h"
 #   elif NV_OS_MINGW
 #       include "DefsGnucWin32.h"
 #   elif NV_OS_CYGWIN
 #       error "GCC: Cygwin not supported"
 #   else
 #       error "GCC: Platform not supported"
 #   endif
 #endif
 
 #endif // NV_CORE_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.cpp	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.cpp	(revision 23380)
@@ -1,83 +1,83 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #include "KtxFile.h"
 
 using namespace nv;
 
 static const uint8 fileIdentifier[12] = {
     0xAB, 0x4B, 0x54, 0x58,
     0x20, 0x31, 0x31, 0xBB,
     0x0D, 0x0A, 0x1A, 0x0A
 };
 
 
 KtxHeader::KtxHeader() {
     memcpy(identifier, fileIdentifier, 12);
 
     endianness = 0x04030201;
 
     glType = 0;
     glTypeSize = 1;
     glFormat = 0;
     glInternalFormat = KTX_RGBA;
     glBaseInternalFormat = KTX_RGBA;
     pixelWidth = 0;
     pixelHeight = 0;
     pixelDepth = 0;
     numberOfArrayElements = 0;
     numberOfFaces = 1;
     numberOfMipmapLevels = 0;
     bytesOfKeyValueData = 0;
 }
 
 
 Stream & operator<< (Stream & s, DDSHeader & header) {
     s.serialize(header.identifier, 12);
     s << header.endiannes << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat;
     s << header.pixelWidth << header.pixelHeight << header.pixelDepth;
     s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels;
     s << header.bytesOfKeyValueData;
     return s;
 }
 
 
 KtxFile::KtxFile() {
 }
 KtxFile::~KtxFile() {
 }
 
 void KtxFile::addKeyValue(const char * key, const char * value) {
     keyArray.append(key);
     valueArray.append(value);
     bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1;
 }
 
 
 Stream & operator<< (Stream & s, KtxFile & file) {
     s << header;
 
     if (s.isSaving()) {
 
         int keyValueCount = keyArray.count();
         for (int i = 0; i < keyValueCount; i++) {
             const String & key = keyArray[i];
             const String & value = valueArray[i];
             uint keySize = key.length() + 1;
             uint valueSize = value.length() + 1;
             uint keyValueSize = keySize + valueSize;
 
             s << keyValueSize;
 
             s.serialize(key.str(), keySize);
             s.serialize(value.str(), valueSize);
         }
     }
     else {
         // @@ Read key value pairs.
     }
 
     return s;
 }
 
 
 
Index: ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvimage/KtxFile.h	(revision 23380)
@@ -1,102 +1,102 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_IMAGE_KTXFILE_H
 #define NV_IMAGE_KTXFILE_H
 
 #include "nvimage.h"
 #include "nvcore/StrLib.h"
 
 // KTX File format specification:
 // http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key
 
 namespace nv
 {
     class Stream;
 
     // GL types (Table 3.2)
     const uint KTX_UNSIGNED_BYTE;
     const uint KTX_UNSIGNED_SHORT_5_6_5;
     // ...
 
     // GL formats (Table 3.3)
     // ...
 
     // GL internal formats (Table 3.12, 3.13)
     // ...
 
     // GL base internal format. (Table 3.11)
     const uint KTX_RGB;
     const uint KTX_RGBA;
     const uint KTX_ALPHA;
     // ...
 
 
     struct KtxHeader {
         uint8 identifier[12];
         uint32 endianness;
         uint32 glType;
         uint32 glTypeSize;
         uint32 glFormat;
         uint32 glInternalFormat;
         uint32 glBaseInternalFormat;
         uint32 pixelWidth;
         uint32 pixelHeight;
         uint32 pixelDepth;
         uint32 numberOfArrayElements;
         uint32 numberOfFaces;
         uint32 numberOfMipmapLevels;
         uint32 bytesOfKeyValueData;
 
         KtxHeader();
 
     };
 
     NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header);
 
 
     struct KtxFile {
         KtxFile();
         ~KtxFile();
 
         void addKeyValue(const char * key, const char * value);
 
     private:
         KtxHeader header;
 
         Array<String> keyArray;
         Array<String> valueArray;
 
     };
 
     NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file);
 
 
     /*
     for each keyValuePair that fits in bytesOfKeyValueData
         UInt32   keyAndValueByteSize
         Byte     keyAndValue[keyAndValueByteSize]
         Byte     valuePadding[3 - ((keyAndValueByteSize + 3) % 4)]
     end
 
     for each mipmap_level in numberOfMipmapLevels*
         UInt32 imageSize;
         for each array_element in numberOfArrayElements*
            for each face in numberOfFaces
                for each z_slice in pixelDepth*
                    for each row or row_of_blocks in pixelHeight*
                        for each pixel or block_of_pixels in pixelWidth
                            Byte data[format-specific-number-of-bytes]**
                        end
                    end
                end
                Byte cubePadding[0-3]
            end
         end
         Byte mipPadding[3 - ((imageSize + 3) % 4)]
     end
     */
 
 } // nv namespace
 
 #endif // NV_IMAGE_KTXFILE_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp	(revision 23380)
@@ -1,787 +1,787 @@
 // Branch-free implementation of half-precision (16 bit) floating point
 // Copyright 2006 Mike Acton <macton@gmail.com>
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a 
 // copy of this software and associated documentation files (the "Software"),
 // to deal in the Software without restriction, including without limitation
 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 // and/or sell copies of the Software, and to permit persons to whom the 
 // Software is furnished to do so, subject to the following conditions:
 // 
 // The above copyright notice and this permission notice shall be included 
 // in all copies or substantial portions of the Software.
 // 
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE
 //
 // Half-precision floating point format
 // ------------------------------------
 //
 //   | Field    | Last | First | Note
 //   |----------|------|-------|----------
 //   | Sign     | 15   | 15    |
 //   | Exponent | 14   | 10    | Bias = 15
 //   | Mantissa | 9    | 0     |
 //
 // Compiling
 // ---------
 //
 //  Preferred compile flags for GCC: 
 //     -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
 //
 //     This file is a C99 source file, intended to be compiled with a C99 
 //     compliant compiler. However, for the moment it remains combatible
 //     with C++98. Therefore if you are using a compiler that poorly implements
 //     C standards (e.g. MSVC), it may be compiled as C++. This is not
 //     guaranteed for future versions. 
 //
 // Features
 // --------
 //
 //  * QNaN + <x>  = QNaN
 //  * <x>  + +INF = +INF
 //  * <x>  - -INF = -INF
 //  * INF  - INF  = SNaN
 //  * Denormalized values
 //  * Difference of ZEROs is always +ZERO
 //  * Sum round with guard + round + sticky bit (grs)
 //  * And of course... no branching
 // 
 // Precision of Sum
 // ----------------
 //
 //  (SUM)        uint16 z = half_add( x, y );
 //  (DIFFERENCE) uint16 z = half_add( x, -y );
 //
 //     Will have exactly (0 ulps difference) the same result as:
 //     (For 32 bit IEEE 784 floating point and same rounding mode)
 //
 //     union FLOAT_32
 //     {
 //       float    f32;
 //       uint32 u32;
 //     };
 //
 //     union FLOAT_32 fx = { .u32 = half_to_float( x ) };
 //     union FLOAT_32 fy = { .u32 = half_to_float( y ) };
 //     union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32    };
 //     uint16       z  = float_to_half( fz );
 //
 
 #include "Half.h"
 #include <stdio.h>
 
 
 // Load immediate
 static inline uint32 _uint32_li( uint32 a )
 {
     return (a);
 }
 
 // Decrement
 static inline uint32 _uint32_dec( uint32 a )
 {
     return (a - 1);
 }
 
 // Increment
 static inline uint32 _uint32_inc( uint32 a )
 {
   return (a + 1);
 }
 
 // Complement
 static inline uint32 _uint32_not( uint32 a )
 {
     return (~a);
 }
 
 // Negate
 static inline uint32 _uint32_neg( uint32 a )
 {
 #pragma warning(disable : 4146)     // unary minus operator applied to unsigned type, result still unsigned
     return (-a);
 #pragma warning(default : 4146)
 }
 
 // Extend sign
 static inline uint32 _uint32_ext( uint32 a )
 {
     return (((int32)a)>>31);
 }
 
 // And
 static inline uint32 _uint32_and( uint32 a, uint32 b )
 {
     return (a & b);
 }
 
 // And with Complement
 static inline uint32 _uint32_andc( uint32 a, uint32 b )
 {
     return (a & ~b);
 }
 
 // Or
 static inline uint32 _uint32_or( uint32 a, uint32 b )
 {
     return (a | b);
 }
 
 // Shift Right Logical
 static inline uint32 _uint32_srl( uint32 a, int sa )
 {
     return (a >> sa);
 }
 
 // Shift Left Logical
 static inline uint32 _uint32_sll( uint32 a, int sa )
 {
     return (a << sa);
 }
 
 // Add
 static inline uint32 _uint32_add( uint32 a, uint32 b )
 {
     return (a + b);
 }
 
 // Subtract
 static inline uint32 _uint32_sub( uint32 a, uint32 b )
 {
     return (a - b);
 }
 
 // Select on Sign bit
 static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b )
 {
     const uint32 mask   = _uint32_ext( test );
     const uint32 sel_a  = _uint32_and(  a,     mask  );
     const uint32 sel_b  = _uint32_andc( b,     mask  );
     const uint32 result = _uint32_or(   sel_a, sel_b );
 
     return (result);
 }
 
 // Load Immediate
 static inline uint16 _uint16_li( uint16 a )
 {
     return (a);
 }
 
 // Extend sign
 static inline uint16 _uint16_ext( uint16 a )
 {
     return (((int16)a)>>15);
 }
 
 // Negate
 static inline uint16 _uint16_neg( uint16 a )
 {
     return (-a);
 }
 
 // Complement
 static inline uint16 _uint16_not( uint16 a )
 {
     return (~a);
 }
 
 // Decrement
 static inline uint16 _uint16_dec( uint16 a )
 {
     return (a - 1);
 }
 
 // Shift Left Logical
 static inline uint16 _uint16_sll( uint16 a, int sa )
 {
     return (a << sa);
 }
 
 // Shift Right Logical
 static inline uint16 _uint16_srl( uint16 a, int sa )
 {
     return (a >> sa);
 }
 
 // Add
 static inline uint16 _uint16_add( uint16 a, uint16 b )
 {
     return (a + b);
 }
 
 // Subtract
 static inline uint16 _uint16_sub( uint16 a, uint16 b )
 {
     return (a - b);
 }
 
 // And
 static inline uint16 _uint16_and( uint16 a, uint16 b )
 {
     return (a & b);
 }
 
 // Or
 static inline uint16 _uint16_or( uint16 a, uint16 b )
 {
     return (a | b);
 }
 
 // Exclusive Or
 static inline uint16 _uint16_xor( uint16 a, uint16 b )
 {
     return (a ^ b);
 }
 
 // And with Complement
 static inline uint16 _uint16_andc( uint16 a, uint16 b )
 {
     return (a & ~b);
 }
 
 // And then Shift Right Logical
 static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa )
 {
     return ((a & b) >> sa);
 }
 
 // Shift Right Logical then Mask
 static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask )
 {
     return ((a >> sa) & mask);
 }
 
 // Add then Mask
 static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask )
 {
     return ((a + b) & mask);
 }
 
 
 // Select on Sign bit
 static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b )
 {
     const uint16 mask   = _uint16_ext( test );
     const uint16 sel_a  = _uint16_and(  a,     mask  );
     const uint16 sel_b  = _uint16_andc( b,     mask  );
     const uint16 result = _uint16_or(   sel_a, sel_b );
 
     return (result);
 }
 
 #if NV_OS_XBOX
 #include <PPCIntrinsics.h>
 #elif NV_CC_MSVC
 
 #include <intrin.h>
 #pragma intrinsic(_BitScanReverse)
 
 uint32 _uint32_nlz( uint32 x ) {
     unsigned long index;
     _BitScanReverse(&index, x);
     return 31 - index;
 }
 #endif
 
 
 // Count Leading Zeros
 static inline uint32 _uint32_cntlz( uint32 x )
 {
 #if NV_CC_GCC
     /* On PowerPC, this will map to insn: cntlzw */
     /* On Pentium, this will map to insn: clz    */
     uint32 is_x_nez_msb = _uint32_neg( x );
     uint32 nlz          = __builtin_clz( x );
     uint32 result       = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 );
     return (result);
 #elif NV_OS_XBOX
     // Xbox PPC has this as an intrinsic.
     return _CountLeadingZeros(x);
 #elif NV_CC_MSVC
     uint32 is_x_nez_msb = _uint32_neg( x );
     uint32 nlz          = _uint32_nlz( x );
     uint32 result       = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 );
     return (result);
 #else
     const uint32 x0  = _uint32_srl(  x,  1 );
     const uint32 x1  = _uint32_or(   x,  x0 );
     const uint32 x2  = _uint32_srl(  x1, 2 );
     const uint32 x3  = _uint32_or(   x1, x2 );
     const uint32 x4  = _uint32_srl(  x3, 4 );
     const uint32 x5  = _uint32_or(   x3, x4 );
     const uint32 x6  = _uint32_srl(  x5, 8 );
     const uint32 x7  = _uint32_or(   x5, x6 );
     const uint32 x8  = _uint32_srl(  x7, 16 );
     const uint32 x9  = _uint32_or(   x7, x8 );
     const uint32 xA  = _uint32_not(  x9 );
     const uint32 xB  = _uint32_srl(  xA, 1 );
     const uint32 xC  = _uint32_and(  xB, 0x55555555 );
     const uint32 xD  = _uint32_sub(  xA, xC );
     const uint32 xE  = _uint32_and(  xD, 0x33333333 );
     const uint32 xF  = _uint32_srl(  xD, 2 );
     const uint32 x10 = _uint32_and(  xF, 0x33333333 );
     const uint32 x11 = _uint32_add(  xE, x10 );
     const uint32 x12 = _uint32_srl(  x11, 4 );
     const uint32 x13 = _uint32_add(  x11, x12 );
     const uint32 x14 = _uint32_and(  x13, 0x0f0f0f0f );
     const uint32 x15 = _uint32_srl(  x14, 8 );
     const uint32 x16 = _uint32_add(  x14, x15 );
     const uint32 x17 = _uint32_srl(  x16, 16 );
     const uint32 x18 = _uint32_add(  x16, x17 );
     const uint32 x19 = _uint32_and(  x18, 0x0000003f );
     return ( x19 );
 #endif
 }
 
 // Count Leading Zeros
 static inline uint16 _uint16_cntlz( uint16 x )
 {
 #ifdef __GNUC__
     /* On PowerPC, this will map to insn: cntlzw */
     /* On Pentium, this will map to insn: clz    */
     uint16 nlz32 = (uint16)_uint32_cntlz( (uint32)x );
     uint32 nlz   = _uint32_sub( nlz32, 16 );
     return (nlz);
 #elif _NV_OS_XBOX_
     uint16 nlz32 = (uint16)_CountLeadingZeros( (uint32)x );
     return _uint32_sub( nlz32, 16);
 #else
     const uint16 x0  = _uint16_srl(  x,  1 );
     const uint16 x1  = _uint16_or(   x,  x0 );
     const uint16 x2  = _uint16_srl(  x1, 2 );
     const uint16 x3  = _uint16_or(   x1, x2 );
     const uint16 x4  = _uint16_srl(  x3, 4 );
     const uint16 x5  = _uint16_or(   x3, x4 );
     const uint16 x6  = _uint16_srl(  x5, 8 );
     const uint16 x7  = _uint16_or(   x5, x6 );
     const uint16 x8  = _uint16_not(  x7 );
     const uint16 x9  = _uint16_srlm( x8, 1, 0x5555 );
     const uint16 xA  = _uint16_sub(  x8, x9 );
     const uint16 xB  = _uint16_and(  xA, 0x3333 );
     const uint16 xC  = _uint16_srlm( xA, 2, 0x3333 );
     const uint16 xD  = _uint16_add(  xB, xC );
     const uint16 xE  = _uint16_srl(  xD, 4 );
     const uint16 xF  = _uint16_addm( xD, xE, 0x0f0f );
     const uint16 x10 = _uint16_srl(  xF, 8 );
     const uint16 x11 = _uint16_addm( xF, x10, 0x001f );
     return ( x11 );
 #endif
 }
 
 uint16
 nv::half_from_float( uint32 f )
 {
     const uint32 one                        = _uint32_li( 0x00000001 );
     const uint32 f_s_mask                   = _uint32_li( 0x80000000 );
     const uint32 f_e_mask                   = _uint32_li( 0x7f800000 );
     const uint32 f_m_mask                   = _uint32_li( 0x007fffff );
     const uint32 f_m_hidden_bit             = _uint32_li( 0x00800000 );
     const uint32 f_m_round_bit              = _uint32_li( 0x00001000 );
     const uint32 f_snan_mask                = _uint32_li( 0x7fc00000 );
     const uint32 f_e_pos                    = _uint32_li( 0x00000017 );
     const uint32 h_e_pos                    = _uint32_li( 0x0000000a );
     const uint32 h_e_mask                   = _uint32_li( 0x00007c00 );
     const uint32 h_snan_mask                = _uint32_li( 0x00007e00 );
     const uint32 h_e_mask_value             = _uint32_li( 0x0000001f );
     const uint32 f_h_s_pos_offset           = _uint32_li( 0x00000010 );
     const uint32 f_h_bias_offset            = _uint32_li( 0x00000070 );
     const uint32 f_h_m_pos_offset           = _uint32_li( 0x0000000d );
     const uint32 h_nan_min                  = _uint32_li( 0x00007c01 );
     const uint32 f_h_e_biased_flag          = _uint32_li( 0x0000008f );
     const uint32 f_s                        = _uint32_and( f,               f_s_mask         );
     const uint32 f_e                        = _uint32_and( f,               f_e_mask         );
     const uint16 h_s                        = _uint32_srl( f_s,             f_h_s_pos_offset );
     const uint32 f_m                        = _uint32_and( f,               f_m_mask         );
     const uint16 f_e_amount                 = _uint32_srl( f_e,             f_e_pos          );
     const uint32 f_e_half_bias              = _uint32_sub( f_e_amount,      f_h_bias_offset  );
     const uint32 f_snan                     = _uint32_and( f,               f_snan_mask      );
     const uint32 f_m_round_mask             = _uint32_and( f_m,             f_m_round_bit    );
     const uint32 f_m_round_offset           = _uint32_sll( f_m_round_mask,  one              );
     const uint32 f_m_rounded                = _uint32_add( f_m,             f_m_round_offset );
     const uint32 f_m_denorm_sa              = _uint32_sub( one,             f_e_half_bias    );
     const uint32 f_m_with_hidden            = _uint32_or(  f_m_rounded,     f_m_hidden_bit   );
     const uint32 f_m_denorm                 = _uint32_srl( f_m_with_hidden, f_m_denorm_sa    );
     const uint32 h_m_denorm                 = _uint32_srl( f_m_denorm,      f_h_m_pos_offset );
     const uint32 f_m_rounded_overflow       = _uint32_and( f_m_rounded,     f_m_hidden_bit   );
     const uint32 m_nan                      = _uint32_srl( f_m,             f_h_m_pos_offset );
     const uint32 h_em_nan                   = _uint32_or(  h_e_mask,        m_nan            );
     const uint32 h_e_norm_overflow_offset   = _uint32_inc( f_e_half_bias );
     const uint32 h_e_norm_overflow          = _uint32_sll( h_e_norm_overflow_offset, h_e_pos          );
     const uint32 h_e_norm                   = _uint32_sll( f_e_half_bias,            h_e_pos          );
     const uint32 h_m_norm                   = _uint32_srl( f_m_rounded,              f_h_m_pos_offset );
     const uint32 h_em_norm                  = _uint32_or(  h_e_norm,                 h_m_norm         );
     const uint32 is_h_ndenorm_msb           = _uint32_sub( f_h_bias_offset,   f_e_amount    );
     const uint32 is_f_e_flagged_msb         = _uint32_sub( f_h_e_biased_flag, f_e_half_bias );
     const uint32 is_h_denorm_msb            = _uint32_not( is_h_ndenorm_msb );
     const uint32 is_f_m_eqz_msb             = _uint32_dec( f_m   );
     const uint32 is_h_nan_eqz_msb           = _uint32_dec( m_nan );
     const uint32 is_f_inf_msb               = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb   );
     const uint32 is_f_nan_underflow_msb     = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb );
     const uint32 is_e_overflow_msb          = _uint32_sub( h_e_mask_value,     f_e_half_bias    );
     const uint32 is_h_inf_msb               = _uint32_or(  is_e_overflow_msb,  is_f_inf_msb     );
     const uint32 is_f_nsnan_msb             = _uint32_sub( f_snan,             f_snan_mask      );
     const uint32 is_m_norm_overflow_msb     = _uint32_neg( f_m_rounded_overflow );
     const uint32 is_f_snan_msb              = _uint32_not( is_f_nsnan_msb );
     const uint32 h_em_overflow_result       = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm                 );
     const uint32 h_em_nan_result            = _uint32_sels( is_f_e_flagged_msb,     h_em_nan,          h_em_overflow_result      );
     const uint32 h_em_nan_underflow_result  = _uint32_sels( is_f_nan_underflow_msb, h_nan_min,         h_em_nan_result           );
     const uint32 h_em_inf_result            = _uint32_sels( is_h_inf_msb,           h_e_mask,          h_em_nan_underflow_result );
     const uint32 h_em_denorm_result         = _uint32_sels( is_h_denorm_msb,        h_m_denorm,        h_em_inf_result           );
     const uint32 h_em_snan_result           = _uint32_sels( is_f_snan_msb,          h_snan_mask,       h_em_denorm_result        );
     const uint32 h_result                   = _uint32_or( h_s, h_em_snan_result );
 
     return (uint16)(h_result);
 }
 
 uint32 
 nv::half_to_float( uint16 h )
 {
     const uint32 h_e_mask              = _uint32_li( 0x00007c00 );
     const uint32 h_m_mask              = _uint32_li( 0x000003ff );
     const uint32 h_s_mask              = _uint32_li( 0x00008000 );
     const uint32 h_f_s_pos_offset      = _uint32_li( 0x00000010 );
     const uint32 h_f_e_pos_offset      = _uint32_li( 0x0000000d );
     const uint32 h_f_bias_offset       = _uint32_li( 0x0001c000 );
     const uint32 f_e_mask              = _uint32_li( 0x7f800000 );
     const uint32 f_m_mask              = _uint32_li( 0x007fffff );
     const uint32 h_f_e_denorm_bias     = _uint32_li( 0x0000007e );
     const uint32 h_f_m_denorm_sa_bias  = _uint32_li( 0x00000008 );
     const uint32 f_e_pos               = _uint32_li( 0x00000017 );
     const uint32 h_e_mask_minus_one    = _uint32_li( 0x00007bff );
     const uint32 h_e                   = _uint32_and( h, h_e_mask );
     const uint32 h_m                   = _uint32_and( h, h_m_mask );
     const uint32 h_s                   = _uint32_and( h, h_s_mask );
     const uint32 h_e_f_bias            = _uint32_add( h_e, h_f_bias_offset );
     const uint32 h_m_nlz               = _uint32_cntlz( h_m );
     const uint32 f_s                   = _uint32_sll( h_s,        h_f_s_pos_offset );
     const uint32 f_e                   = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
     const uint32 f_m                   = _uint32_sll( h_m,        h_f_e_pos_offset );
     const uint32 f_em                  = _uint32_or(  f_e,        f_m              );
     const uint32 h_f_m_sa              = _uint32_sub( h_m_nlz,             h_f_m_denorm_sa_bias );
     const uint32 f_e_denorm_unpacked   = _uint32_sub( h_f_e_denorm_bias,   h_f_m_sa             );
     const uint32 h_f_m                 = _uint32_sll( h_m,                 h_f_m_sa             );
     const uint32 f_m_denorm            = _uint32_and( h_f_m,               f_m_mask             );
     const uint32 f_e_denorm            = _uint32_sll( f_e_denorm_unpacked, f_e_pos              );
     const uint32 f_em_denorm           = _uint32_or(  f_e_denorm,          f_m_denorm           );
     const uint32 f_em_nan              = _uint32_or(  f_e_mask,            f_m                  );
     const uint32 is_e_eqz_msb          = _uint32_dec(  h_e );
     const uint32 is_m_nez_msb          = _uint32_neg(  h_m );
     const uint32 is_e_flagged_msb      = _uint32_sub(  h_e_mask_minus_one, h_e );
     const uint32 is_zero_msb           = _uint32_andc( is_e_eqz_msb,       is_m_nez_msb );
     const uint32 is_inf_msb            = _uint32_andc( is_e_flagged_msb,   is_m_nez_msb );
     const uint32 is_denorm_msb         = _uint32_and(  is_m_nez_msb,       is_e_eqz_msb );
     const uint32 is_nan_msb            = _uint32_and(  is_e_flagged_msb,   is_m_nez_msb ); 
     const uint32 is_zero               = _uint32_ext(  is_zero_msb );
     const uint32 f_zero_result         = _uint32_andc( f_em, is_zero );
     const uint32 f_denorm_result       = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
     const uint32 f_inf_result          = _uint32_sels( is_inf_msb,    f_e_mask,    f_denorm_result );
     const uint32 f_nan_result          = _uint32_sels( is_nan_msb,    f_em_nan,    f_inf_result    );
     const uint32 f_result              = _uint32_or( f_s, f_nan_result );
 
     return (f_result);
 }
 
 
 #if !NV_OS_IOS && (defined(__i386__) || defined(__x86_64__))
 
 #if NV_CC_GNUC
 #if defined(__i386__) || defined(__x86_64__)
 #include <xmmintrin.h>
 #endif
 #endif
 
 #include "nvcore/Memory.h" // NV_ALIGN_16
 
 static __m128 half_to_float4_SSE2(__m128i h)
 {
 #define SSE_CONST4(name, val) static const NV_ALIGN_16 uint name[4] = { (val), (val), (val), (val) }
     
 #define CONST(name) *(const __m128i *)&name
 
     SSE_CONST4(mask_nosign,         0x7fff);
     SSE_CONST4(mask_justsign,       0x8000);
     SSE_CONST4(mask_shifted_exp,    0x7c00 << 13);
     SSE_CONST4(expadjust_normal,    (127 - 15) << 23);
     SSE_CONST4(expadjust_infnan,    (128 - 16) << 23);
     SSE_CONST4(expadjust_denorm,    1 << 23);
     SSE_CONST4(magic_denorm,        113 << 23);
 
     __m128i mnosign     = CONST(mask_nosign);
     __m128i expmant     = _mm_and_si128(mnosign, h);
     __m128i justsign    = _mm_and_si128(h, CONST(mask_justsign));
     __m128i mshiftexp   = CONST(mask_shifted_exp);
     __m128i eadjust     = CONST(expadjust_normal);
     __m128i shifted     = _mm_slli_epi32(expmant, 13);
     __m128i adjusted    = _mm_add_epi32(eadjust, shifted);
     __m128i justexp     = _mm_and_si128(shifted, mshiftexp);
 
     __m128i zero        = _mm_setzero_si128();
     __m128i b_isinfnan  = _mm_cmpeq_epi32(mshiftexp, justexp);
     __m128i b_isdenorm  = _mm_cmpeq_epi32(zero, justexp);
 
     __m128i adj_infnan  = _mm_and_si128(b_isinfnan, CONST(expadjust_infnan));
     __m128i adjusted2   = _mm_add_epi32(adjusted, adj_infnan);
 
     __m128i adj_den     = CONST(expadjust_denorm);
     __m128i den1        = _mm_add_epi32(adj_den, adjusted2);
     __m128  den2        = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm);
     __m128  adjusted3   = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
     __m128  adjusted4   = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2));
     __m128  adjusted5   = _mm_or_ps(adjusted3, adjusted4);
     __m128i sign        = _mm_slli_epi32(justsign, 16);
     __m128  final       = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));
 
     // ~21 SSE2 ops.
     return final;
 
 #undef SSE_CONST4
 #undef CONST
 }
 
 
 void nv::half_to_float_array_SSE2(const uint16 * vin, float * vout, int count) {
     nvDebugCheck((intptr_t(vin) & 15) == 0);
     nvDebugCheck((intptr_t(vout) & 15) == 0);
     nvDebugCheck((count & 7) == 0);
 
     __m128i zero = _mm_setzero_si128();
 
     for (int i = 0; i < count; i += 8)
     {
         __m128i in = _mm_loadu_si128((const __m128i *)(vin + i));
         __m128i a = _mm_unpacklo_epi16(in, zero);
         __m128i b = _mm_unpackhi_epi16(in, zero);
         
         __m128 outa = half_to_float4_SSE2(a);
         _mm_storeu_ps((float *)(vout + i), outa);
         
         __m128 outb = half_to_float4_SSE2(b);
         _mm_storeu_ps((float *)(vout + i + 4), outb);
     }
 }
 
 #endif 
 
 
 // @@ These tables could be smaller.
 namespace nv {
     uint32 mantissa_table[2048] = { 0xDEADBEEF };
     uint32 exponent_table[64];
     uint32 offset_table[64];
 }
 
 void nv::half_init_tables()
 {
     // Init mantissa table.
     mantissa_table[0] = 0;
 
     // denormals
     for (int i = 1; i < 1024; i++) {
         uint m = i << 13;
         uint e = 0;
 
         while ((m & 0x00800000) == 0) {
             e -= 0x00800000;
             m <<= 1;
         }
         m &= ~0x00800000;
         e += 0x38800000;
         mantissa_table[i] = m | e;
     }
 
     // normals
     for (int i = 1024; i < 2048; i++) {
         mantissa_table[i] = (i - 1024) << 13;
     }
 
 
     // Init exponent table.
     exponent_table[0] = 0;
 
     for (int i = 1; i < 31; i++) {
         exponent_table[i] = 0x38000000 + (i << 23);
     }
 
     exponent_table[31] = 0x7f800000;
     exponent_table[32] = 0x80000000;
 
     for (int i = 33; i < 63; i++) {
         exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
     }
 
     exponent_table[63] = 0xff800000;
 
 
     // Init offset table.
     offset_table[0] = 0;
 
     for (int i = 1; i < 32; i++) {
         offset_table[i] = 1024;
     }
 
     offset_table[32] = 0;
 
     for (int i = 33; i < 64; i++) {
         offset_table[i] = 1024;
     }
 }
 
 // Fast half to float conversion based on:
 // http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
 uint32 nv::fast_half_to_float(uint16 h)
 {
 	// Initialize table if necessary.
 	if (mantissa_table[0] != 0)
 		half_init_tables();
 	uint exp = h >> 10;
 	return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
 }
 
 #if 0
 
 // Inaccurate conversion suggested at the ffmpeg mailing list:
 // http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2009-July/068949.html
 uint32 nv::fast_half_to_float(uint16 v)
 {
     if (v & 0x8000) return 0;
     uint exp = v >> 10;
     if (!exp) return (v>>9)&1;
     if (exp >= 15) return 0xffff;
     v <<= 6;
     return (v+(1<<16)) >> (15-exp);
 }
 
 #endif
 
 #if 0
 
 // Some more from a gamedev thread:
 // http://www.devmaster.net/forums/showthread.php?t=10924
 
 // I believe it does not handle specials either.
 
 // Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though.
 
 
 static __declspec(align(16)) unsigned half_sign[4]    = {0x00008000, 0x00008000, 0x00008000, 0x00008000};
 static __declspec(align(16)) unsigned half_exponent[4]    = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00};
 static __declspec(align(16)) unsigned half_mantissa[4]    = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF};
 static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000};
 
 __asm
 {
     movaps  xmm1, xmm0  // Input in xmm0
     movaps  xmm2, xmm0
 
     andps   xmm0, half_sign
     andps   xmm1, half_exponent
     andps   xmm2, half_mantissa
     paddd   xmm1, half_bias_offset
 
     pslld   xmm0, 16
     pslld   xmm1, 13
     pslld   xmm2, 13
 
     orps    xmm1, xmm2
     orps    xmm0, xmm1  // Result in xmm0
 }
 
 
 #endif
 
 #if 0
 // These version computes the tables at compile time:
 // http://gamedev.stackexchange.com/questions/17326/conversion-of-a-number-from-single-precision-floating-point-representation-to-a
 
 /* This method is faster than the OpenEXR implementation (very often
  * used, eg. in Ogre), with the additional benefit of rounding, inspired
- * by James Tursa’s half-precision code. */
+ * by James Tursa's half-precision code. */
 static inline uint16_t float_to_half_branch(uint32_t x)
 {
     uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */
     uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
     unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */
 
     /* If zero, or denormal, or exponent underflows too much for a denormal
      * half, return signed zero. */
     if (e < 103)
         return bits;
 
     /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
     if (e > 142)
     {
         bits |= 0x7c00u;
         /* If exponent was 0xff and one mantissa bit was set, it means NaN,
          * not Inf, so make sure we set one mantissa bit too. */
         bits |= e == 255 && (x & 0x007fffffu);
         return bits;
     }
 
     /* If exponent underflows but not too much, return a denormal */
     if (e < 113)
     {
         m |= 0x0800u;
         /* Extra rounding may overflow and set mantissa to 0 and exponent
          * to 1, which is OK. */
         bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
         return bits;
     }
 
     bits |= ((e - 112) << 10) | (m >> 1);
     /* Extra rounding. An overflow will set mantissa to 0 and increment
      * the exponent, which is OK. */
     bits += m & 1;
     return bits;
 }
 
 /* These macros implement a finite iterator useful to build lookup
  * tables. For instance, S64(0) will call S1(x) for all values of x
  * between 0 and 63.
  * Due to the exponential behaviour of the calls, the stress on the
  * compiler may be important. */
 #define S4(x)    S1((x)),   S1((x)+1),     S1((x)+2),     S1((x)+3)
 #define S16(x)   S4((x)),   S4((x)+4),     S4((x)+8),     S4((x)+12)
 #define S64(x)   S16((x)),  S16((x)+16),   S16((x)+32),   S16((x)+48)
 #define S256(x)  S64((x)),  S64((x)+64),   S64((x)+128),  S64((x)+192)
 #define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768)
 
-/* Lookup table-based algorithm from “Fast Half Float Conversions”
+/* Lookup table-based algorithm from "Fast Half Float Conversions"
  * by Jeroen van der Zijp, November 2008. No rounding is performed,
  * and some NaN values may be incorrectly converted to Inf. */
 static inline uint16_t float_to_half_nobranch(uint32_t x)
 {
     static uint16_t const basetable[512] =
     {
 #define S1(i) (((i) < 103) ? 0x0000 : \
                ((i) < 113) ? 0x0400 >> (113 - (i)) : \
                ((i) < 143) ? ((i) - 112) << 10 : 0x7c00)
         S256(0),
 #undef S1
 #define S1(i) (0x8000 | (((i) < 103) ? 0x0000 : \
                          ((i) < 113) ? 0x0400 >> (113 - (i)) : \
                          ((i) < 143) ? ((i) - 112) << 10 : 0x7c00))
         S256(0),
 #undef S1
     };
 
     static uint8_t const shifttable[512] =
     {
 #define S1(i) (((i) < 103) ? 24 : \
                ((i) < 113) ? 126 - (i) : \
                ((i) < 143 || (i) == 255) ? 13 : 24)
         S256(0), S256(0),
 #undef S1
     };
 
     uint16_t bits = basetable[(x >> 23) & 0x1ff];
     bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff];
     return bits;
 }
 #endif
Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.inl
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.inl	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Matrix.inl	(revision 23380)
@@ -1,1274 +1,1274 @@
 // This code is in the public domain -- castanyo@yahoo.es
 
 #pragma once
 #ifndef NV_MATH_MATRIX_INL
 #define NV_MATH_MATRIX_INL
 
 #include "Matrix.h"
 
 namespace nv
 {
     inline Matrix3::Matrix3() {}
     
     inline Matrix3::Matrix3(float f)
     {
         for(int i = 0; i < 9; i++) {
             m_data[i] = f;
         }
     }
 
     inline Matrix3::Matrix3(identity_t)
     {
         for(int i = 0; i < 3; i++) {
             for(int j = 0; j < 3; j++) {
                 m_data[3*j+i] = (i == j) ? 1.0f : 0.0f;
             }
         }
     }
 
     inline Matrix3::Matrix3(const Matrix3 & m)
     {
         for(int i = 0; i < 9; i++) {
             m_data[i] = m.m_data[i];
         }
     }
     
     inline Matrix3::Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2)
     {
         m_data[0] = v0.x; m_data[1] = v0.y; m_data[2] = v0.z;
         m_data[3] = v1.x; m_data[4] = v1.y; m_data[5] = v1.z;
         m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z;
     }
 
     inline float Matrix3::data(uint idx) const
     {
         nvDebugCheck(idx < 9);
         return m_data[idx];
     }
     inline float & Matrix3::data(uint idx)
     {
         nvDebugCheck(idx < 9);
         return m_data[idx];
     }
     inline float Matrix3::get(uint row, uint col) const
     {
         nvDebugCheck(row < 3 && col < 3);
         return m_data[col * 3 + row];
     }
     inline float Matrix3::operator()(uint row, uint col) const
     {
         nvDebugCheck(row < 3 && col < 3);
         return m_data[col * 3 + row];
     }
     inline float & Matrix3::operator()(uint row, uint col)
     {
         nvDebugCheck(row < 3 && col < 3);
         return m_data[col * 3 + row];
     }
 
     inline Vector3 Matrix3::row(uint i) const
     {
         nvDebugCheck(i < 3);
         return Vector3(get(i, 0), get(i, 1), get(i, 2));
     }
     inline Vector3 Matrix3::column(uint i) const
     {
         nvDebugCheck(i < 3);
         return Vector3(get(0, i), get(1, i), get(2, i));
     }
 
     inline void Matrix3::operator*=(float s)
     {
         for(int i = 0; i < 9; i++) {
             m_data[i] *= s;
         }
     }
 
     inline void Matrix3::operator/=(float s)
     {
         float is = 1.0f /s;
         for(int i = 0; i < 9; i++) {
             m_data[i] *= is;
         }
     }
 
     inline void Matrix3::operator+=(const Matrix3 & m)
     {
         for(int i = 0; i < 9; i++) {
             m_data[i] += m.m_data[i];
         }
     }
 
     inline void Matrix3::operator-=(const Matrix3 & m)
     {
         for(int i = 0; i < 9; i++) {
             m_data[i] -= m.m_data[i];
         }
     }
 
     inline Matrix3 operator+(const Matrix3 & a, const Matrix3 & b)
     {
         Matrix3 m = a;
         m += b;
         return m;
     }
 
     inline Matrix3 operator-(const Matrix3 & a, const Matrix3 & b)
     {
         Matrix3 m = a;
         m -= b;
         return m;
     }
 
     inline Matrix3 operator*(const Matrix3 & a, float s)
     {
         Matrix3 m = a;
         m *= s;
         return m;
     }
 
     inline Matrix3 operator*(float s, const Matrix3 & a)
     {
         Matrix3 m = a;
         m *= s;
         return m;
     }
 
     inline Matrix3 operator/(const Matrix3 & a, float s)
     {
         Matrix3 m = a;
         m /= s;
         return m;
     }
 
     inline Matrix3 mul(const Matrix3 & a, const Matrix3 & b)
     {
         Matrix3 m;
 
         for(int i = 0; i < 3; i++) {
             const float ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2);
             m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0);
             m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1);
             m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2);
         }
 
         return m;
     }
 
     inline Matrix3 operator*(const Matrix3 & a, const Matrix3 & b)
     {
         return mul(a, b);
     }
 
     // Transform the given 3d vector with the given matrix.
     inline Vector3 transform(const Matrix3 & m, const Vector3 & p)
     {
         return Vector3(
             p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2),
             p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2),
             p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2));
     }
 
     inline void Matrix3::scale(float s)
     {
         for (int i = 0; i < 9; i++) {
             m_data[i] *= s;
         }
     }
 
     inline void Matrix3::scale(Vector3::Arg s)
     {
         m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x;
         m_data[3] *= s.y; m_data[4] *= s.y; m_data[5] *= s.y;
         m_data[6] *= s.z; m_data[7] *= s.z; m_data[8] *= s.z;
     }
 
     inline float Matrix3::determinant() const
     {
         return 
             get(0,0) * get(1,1) * get(2,2) + 
             get(0,1) * get(1,2) * get(2,0) + 
             get(0,2) * get(1,0) * get(2,1) -
             get(0,2) * get(1,1) * get(2,0) - 
             get(0,1) * get(1,0) * get(2,2) -
             get(0,0) * get(1,2) * get(2,1);
     }
 
     // Inverse using Cramer's rule.
     inline Matrix3 inverseCramer(const Matrix3 & m)
     {
         const float det = m.determinant();
         if (equal(det, 0.0f, 0.0f)) {
             return Matrix3(0);
         }
 
         Matrix3 r;
 
         r.data(0) =  - m.data(5) * m.data(7) + m.data(4) * m.data(8);
         r.data(1) =  + m.data(5) * m.data(6) - m.data(3) * m.data(8);
         r.data(2) =  - m.data(4) * m.data(6) + m.data(3) * m.data(7);
 
         r.data(3) =  + m.data(2) * m.data(7) - m.data(1) * m.data(8);
         r.data(4) =  - m.data(2) * m.data(6) + m.data(0) * m.data(8);
         r.data(5) =  + m.data(1) * m.data(6) - m.data(0) * m.data(7);
 
         r.data(6) =  - m.data(2) * m.data(4) + m.data(1) * m.data(5);
         r.data(7) =  + m.data(2) * m.data(3) - m.data(0) * m.data(5);
         r.data(8) =  - m.data(1) * m.data(3) + m.data(0) * m.data(4);
 
         r.scale(1.0f / det);
 
         return r;
     }
 
 
 
     inline Matrix::Matrix()
     {
     }
 
     inline Matrix::Matrix(float f)
     {
         for(int i = 0; i < 16; i++) {
             m_data[i] = 0.0f;
         }
     }
 
     inline Matrix::Matrix(identity_t)
     {
         for(int i = 0; i < 4; i++) {
             for(int j = 0; j < 4; j++) {
                 m_data[4*j+i] = (i == j) ? 1.0f : 0.0f;
             }
         }
     }
 
     inline Matrix::Matrix(const Matrix & m)
     {
         for(int i = 0; i < 16; i++) {
             m_data[i] = m.m_data[i];
         }
     }
 
     inline Matrix::Matrix(const Matrix3 & m)
     {
         for(int i = 0; i < 3; i++) {
             for(int j = 0; j < 3; j++) {
                 operator()(i, j) = m.get(i, j);
             }
         }
         for(int i = 0; i < 4; i++) {
             operator()(3, i) = 0;
             operator()(i, 3) = 0;
         }
     }
 
     inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3)
     {
         m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w;
         m_data[ 4] = v1.x; m_data[ 5] = v1.y; m_data[ 6] = v1.z; m_data[ 7] = v1.w;
         m_data[ 8] = v2.x; m_data[ 9] = v2.y; m_data[10] = v2.z; m_data[11] = v2.w;
         m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w;
     }
 
     /*inline Matrix::Matrix(const float m[])
     {
         for(int i = 0; i < 16; i++) {
             m_data[i] = m[i];
         }
     }*/
 
 
     // Accessors
     inline float Matrix::data(uint idx) const
     {
         nvDebugCheck(idx < 16);
         return m_data[idx];
     }
     inline float & Matrix::data(uint idx)
     {
         nvDebugCheck(idx < 16);
         return m_data[idx];
     }
     inline float Matrix::get(uint row, uint col) const
     {
         nvDebugCheck(row < 4 && col < 4);
         return m_data[col * 4 + row];
     }
     inline float Matrix::operator()(uint row, uint col) const
     {
         nvDebugCheck(row < 4 && col < 4);
         return m_data[col * 4 + row];
     }
     inline float & Matrix::operator()(uint row, uint col)
     {
         nvDebugCheck(row < 4 && col < 4);
         return m_data[col * 4 + row];
     }
 
     inline const float * Matrix::ptr() const
     {
         return m_data;
     }
 
     inline Vector4 Matrix::row(uint i) const
     {
         nvDebugCheck(i < 4);
         return Vector4(get(i, 0), get(i, 1), get(i, 2), get(i, 3));
     }
 
     inline Vector4 Matrix::column(uint i) const
     {
         nvDebugCheck(i < 4);
         return Vector4(get(0, i), get(1, i), get(2, i), get(3, i));
     }
 
     inline void Matrix::zero()
     {
         m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
         m_data[4] = 0; m_data[5] = 0; m_data[6] = 0; m_data[7] = 0;
         m_data[8] = 0; m_data[9] = 0; m_data[10] = 0; m_data[11] = 0;
         m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 0;
     }
 
     inline void Matrix::identity()
     {
         m_data[0] = 1; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
         m_data[4] = 0; m_data[5] = 1; m_data[6] = 0; m_data[7] = 0;
         m_data[8] = 0; m_data[9] = 0; m_data[10] = 1; m_data[11] = 0;
         m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 1;
     }
 
     // Apply scale.
     inline void Matrix::scale(float s)
     {
         m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s;
         m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s;
         m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s;
         m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s;
     }
 
     // Apply scale.
     inline void Matrix::scale(Vector3::Arg s)
     {
         m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; m_data[3] *= s.x;
         m_data[4] *= s.y; m_data[5] *= s.y; m_data[6] *= s.y; m_data[7] *= s.y;
         m_data[8] *= s.z; m_data[9] *= s.z; m_data[10] *= s.z; m_data[11] *= s.z;
     }
 
     // Apply translation.
     inline void Matrix::translate(Vector3::Arg t)
     {
         m_data[12] = m_data[0] * t.x + m_data[4] * t.y + m_data[8]  * t.z + m_data[12];
         m_data[13] = m_data[1] * t.x + m_data[5] * t.y + m_data[9]  * t.z + m_data[13];
         m_data[14] = m_data[2] * t.x + m_data[6] * t.y + m_data[10] * t.z + m_data[14];
         m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15];
     }
 
     Matrix rotation(float theta, float v0, float v1, float v2);
 
     // Apply rotation.
     inline void Matrix::rotate(float theta, float v0, float v1, float v2)
     {
         Matrix R(rotation(theta, v0, v1, v2));
         apply(R);
     }
 
     // Apply transform.
     inline void Matrix::apply(Matrix::Arg m)
     {
         nvDebugCheck(this != &m);
 
         for(int i = 0; i < 4; i++) {
             const float ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3);
             m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0);
             m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1);
             m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2);
             m_data[12+ i] = ai0 * m(0,3) + ai1 * m(1,3) + ai2 * m(2,3) + ai3 * m(3,3);
         }
     }
 
     // Get scale matrix.
     inline Matrix scale(Vector3::Arg s)
     {
         Matrix m(identity);
         m(0,0) = s.x;
         m(1,1) = s.y;
         m(2,2) = s.z;
         return m;
     }
 
     // Get scale matrix.
     inline Matrix scale(float s)
     {
         Matrix m(identity);
         m(0,0) = m(1,1) = m(2,2) = s;
         return m;
     }
 
     // Get translation matrix.
     inline Matrix translation(Vector3::Arg t)
     {
         Matrix m(identity);
         m(0,3) = t.x;
         m(1,3) = t.y;
         m(2,3) = t.z;
         return m;
     }
 
     // Get rotation matrix.
     inline Matrix rotation(float theta, float v0, float v1, float v2)
     {
         float cost = cosf(theta);
         float sint = sinf(theta);
 
         Matrix m(identity);
 
         if( 1 == v0 && 0 == v1 && 0 == v2 ) {
             m(1,1) = cost; m(2,1) = -sint;
             m(1,2) = sint; m(2,2) = cost;
         }
         else if( 0 == v0  && 1 == v1 && 0 == v2 ) {
             m(0,0) = cost; m(2,0) = sint;
             m(1,2) = -sint; m(2,2) = cost;
         }
         else if( 0 == v0 && 0 == v1 && 1 == v2 ) {
             m(0,0) = cost; m(1,0) = -sint;
             m(0,1) = sint; m(1,1) = cost;
         } 
         else {
             float a2, b2, c2;
             a2 = v0 * v0;
             b2 = v1 * v1;
             c2 = v2 * v2;
 
             float iscale = 1.0f / sqrtf(a2 + b2 + c2);
             v0 *= iscale;
             v1 *= iscale;
             v2 *= iscale;
 
             float abm, acm, bcm;
             float mcos, asin, bsin, csin;
             mcos = 1.0f - cost;
             abm = v0 * v1 * mcos;
             acm = v0 * v2 * mcos;
             bcm = v1 * v2 * mcos;
             asin = v0 * sint;
             bsin = v1 * sint;
             csin = v2 * sint;
             m(0,0) = a2 * mcos + cost;
             m(1,0) = abm - csin;
             m(2,0) = acm + bsin;
             m(3,0) = abm + csin;
             m(1,1) = b2 * mcos + cost;
             m(2,1) = bcm - asin;
             m(3,1) = acm - bsin;
             m(1,2) = bcm + asin;
             m(2,2) = c2 * mcos + cost;
         }
         return m;
     }
 
     //Matrix rotation(float yaw, float pitch, float roll);
     //Matrix skew(float angle, Vector3::Arg v1, Vector3::Arg v2);
 
     // Get frustum matrix.
     inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
     {
         Matrix m(0.0f);
 
         float doubleznear = 2.0f * zNear;
         float one_deltax = 1.0f / (xmax - xmin);
         float one_deltay = 1.0f / (ymax - ymin);
         float one_deltaz = 1.0f / (zFar - zNear);
 
         m(0,0) = doubleznear * one_deltax;
         m(1,1) = doubleznear * one_deltay;
         m(0,2) = (xmax + xmin) * one_deltax;
         m(1,2) = (ymax + ymin) * one_deltay;
         m(2,2) = -(zFar + zNear) * one_deltaz;
         m(3,2) = -1.0f;
         m(2,3) = -(zFar * doubleznear) * one_deltaz;
 
         return m;
     }
 
     // Get inverse frustum matrix.
     inline Matrix frustumInverse(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
     {
         Matrix m(0.0f);
 
         float one_doubleznear = 1.0f / (2.0f * zNear);
         float one_doubleznearzfar = 1.0f / (2.0f * zNear * zFar);
 
         m(0,0) = (xmax - xmin) * one_doubleznear;
         m(0,3) = (xmax + xmin) * one_doubleznear;
         m(1,1) = (ymax - ymin) * one_doubleznear;
         m(1,3) = (ymax + ymin) * one_doubleznear;
         m(2,3) = -1;
         m(3,2) = -(zFar - zNear) * one_doubleznearzfar;
         m(3,3) = (zFar + zNear) * one_doubleznearzfar;
 
         return m;
     }
 
     // Get infinite frustum matrix.
     inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear)
     {
         Matrix m(0.0f);
 
         float doubleznear = 2.0f * zNear;
         float one_deltax = 1.0f / (xmax - xmin);
         float one_deltay = 1.0f / (ymax - ymin);
         float nudge = 1.0; // 0.999;
 
         m(0,0) = doubleznear * one_deltax;
         m(1,1) = doubleznear * one_deltay;
         m(0,2) = (xmax + xmin) * one_deltax;
         m(1,2) = (ymax + ymin) * one_deltay;
         m(2,2) = -1.0f * nudge;
         m(3,2) = -1.0f;
         m(2,3) = -doubleznear * nudge;
 
         return m;
     }
 
     // Get perspective matrix.
     inline Matrix perspective(float fovy, float aspect, float zNear, float zFar)
     {
         float xmax = zNear * tan(fovy / 2);
         float xmin = -xmax;
 
         float ymax = xmax / aspect;
         float ymin = -ymax;
 
         return frustum(xmin, xmax, ymin, ymax, zNear, zFar);	
     }
 
     // Get inverse perspective matrix.
     inline Matrix perspectiveInverse(float fovy, float aspect, float zNear, float zFar)
     {
         float xmax = zNear * tan(fovy / 2);
         float xmin = -xmax;
 
         float ymax = xmax / aspect;
         float ymin = -ymax;
 
         return frustumInverse(xmin, xmax, ymin, ymax, zNear, zFar);	
     }
 
     // Get infinite perspective matrix.
     inline Matrix perspective(float fovy, float aspect, float zNear)
     {
         float x = zNear * tan(fovy / 2);
         float y = x / aspect;
         return frustum( -x, x, -y, y, zNear );	
     }
 
     // Get matrix determinant.
     inline float Matrix::determinant() const
     {
         return 
             m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] +
             m_data[2] * m_data[5] * m_data[11] * m_data[12] - m_data[1] * m_data[6] * m_data[11] * m_data[12] - m_data[3] * m_data[6] * m_data[ 8] * m_data[13] + m_data[2] * m_data[7] * m_data[ 8] * m_data[13] +
             m_data[3] * m_data[4] * m_data[10] * m_data[13] - m_data[0] * m_data[7] * m_data[10] * m_data[13] - m_data[2] * m_data[4] * m_data[11] * m_data[13] + m_data[0] * m_data[6] * m_data[11] * m_data[13] +
             m_data[3] * m_data[5] * m_data[ 8] * m_data[14] - m_data[1] * m_data[7] * m_data[ 8] * m_data[14] - m_data[3] * m_data[4] * m_data[ 9] * m_data[14] + m_data[0] * m_data[7] * m_data[ 9] * m_data[14] +
             m_data[1] * m_data[4] * m_data[11] * m_data[14] - m_data[0] * m_data[5] * m_data[11] * m_data[14] - m_data[2] * m_data[5] * m_data[ 8] * m_data[15] + m_data[1] * m_data[6] * m_data[ 8] * m_data[15] +
             m_data[2] * m_data[4] * m_data[ 9] * m_data[15] - m_data[0] * m_data[6] * m_data[ 9] * m_data[15] - m_data[1] * m_data[4] * m_data[10] * m_data[15] + m_data[0] * m_data[5] * m_data[10] * m_data[15];
     }
 
     inline Matrix transpose(Matrix::Arg m)
     {
         Matrix r;
         for (int i = 0; i < 4; i++)
         {
             for (int j = 0; j < 4; j++)
             {
                 r(i, j) = m(j, i);
             }
         }
         return r;
     }
 
     // Inverse using Cramer's rule.
     inline Matrix inverseCramer(Matrix::Arg m)
     {
         Matrix r;
         r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15);
         r.data( 1) = m.data(3)*m.data(10)*m.data(13) - m.data(2)*m.data(11)*m.data(13) - m.data(3)*m.data(9)*m.data(14) + m.data(1)*m.data(11)*m.data(14) + m.data(2)*m.data(9)*m.data(15) - m.data(1)*m.data(10)*m.data(15);
         r.data( 2) = m.data(2)*m.data( 7)*m.data(13) - m.data(3)*m.data( 6)*m.data(13) + m.data(3)*m.data(5)*m.data(14) - m.data(1)*m.data( 7)*m.data(14) - m.data(2)*m.data(5)*m.data(15) + m.data(1)*m.data( 6)*m.data(15);
         r.data( 3) = m.data(3)*m.data( 6)*m.data( 9) - m.data(2)*m.data( 7)*m.data( 9) - m.data(3)*m.data(5)*m.data(10) + m.data(1)*m.data( 7)*m.data(10) + m.data(2)*m.data(5)*m.data(11) - m.data(1)*m.data( 6)*m.data(11);
         r.data( 4) = m.data(7)*m.data(10)*m.data(12) - m.data(6)*m.data(11)*m.data(12) - m.data(7)*m.data(8)*m.data(14) + m.data(4)*m.data(11)*m.data(14) + m.data(6)*m.data(8)*m.data(15) - m.data(4)*m.data(10)*m.data(15);
         r.data( 5) = m.data(2)*m.data(11)*m.data(12) - m.data(3)*m.data(10)*m.data(12) + m.data(3)*m.data(8)*m.data(14) - m.data(0)*m.data(11)*m.data(14) - m.data(2)*m.data(8)*m.data(15) + m.data(0)*m.data(10)*m.data(15);
         r.data( 6) = m.data(3)*m.data( 6)*m.data(12) - m.data(2)*m.data( 7)*m.data(12) - m.data(3)*m.data(4)*m.data(14) + m.data(0)*m.data( 7)*m.data(14) + m.data(2)*m.data(4)*m.data(15) - m.data(0)*m.data( 6)*m.data(15);
         r.data( 7) = m.data(2)*m.data( 7)*m.data( 8) - m.data(3)*m.data( 6)*m.data( 8) + m.data(3)*m.data(4)*m.data(10) - m.data(0)*m.data( 7)*m.data(10) - m.data(2)*m.data(4)*m.data(11) + m.data(0)*m.data( 6)*m.data(11);
         r.data( 8) = m.data(5)*m.data(11)*m.data(12) - m.data(7)*m.data( 9)*m.data(12) + m.data(7)*m.data(8)*m.data(13) - m.data(4)*m.data(11)*m.data(13) - m.data(5)*m.data(8)*m.data(15) + m.data(4)*m.data( 9)*m.data(15);
         r.data( 9) = m.data(3)*m.data( 9)*m.data(12) - m.data(1)*m.data(11)*m.data(12) - m.data(3)*m.data(8)*m.data(13) + m.data(0)*m.data(11)*m.data(13) + m.data(1)*m.data(8)*m.data(15) - m.data(0)*m.data( 9)*m.data(15);
         r.data(10) = m.data(1)*m.data( 7)*m.data(12) - m.data(3)*m.data( 5)*m.data(12) + m.data(3)*m.data(4)*m.data(13) - m.data(0)*m.data( 7)*m.data(13) - m.data(1)*m.data(4)*m.data(15) + m.data(0)*m.data( 5)*m.data(15);
         r.data(11) = m.data(3)*m.data( 5)*m.data( 8) - m.data(1)*m.data( 7)*m.data( 8) - m.data(3)*m.data(4)*m.data( 9) + m.data(0)*m.data( 7)*m.data( 9) + m.data(1)*m.data(4)*m.data(11) - m.data(0)*m.data( 5)*m.data(11);
         r.data(12) = m.data(6)*m.data( 9)*m.data(12) - m.data(5)*m.data(10)*m.data(12) - m.data(6)*m.data(8)*m.data(13) + m.data(4)*m.data(10)*m.data(13) + m.data(5)*m.data(8)*m.data(14) - m.data(4)*m.data( 9)*m.data(14);
         r.data(13) = m.data(1)*m.data(10)*m.data(12) - m.data(2)*m.data( 9)*m.data(12) + m.data(2)*m.data(8)*m.data(13) - m.data(0)*m.data(10)*m.data(13) - m.data(1)*m.data(8)*m.data(14) + m.data(0)*m.data( 9)*m.data(14);
         r.data(14) = m.data(2)*m.data( 5)*m.data(12) - m.data(1)*m.data( 6)*m.data(12) - m.data(2)*m.data(4)*m.data(13) + m.data(0)*m.data( 6)*m.data(13) + m.data(1)*m.data(4)*m.data(14) - m.data(0)*m.data( 5)*m.data(14);
         r.data(15) = m.data(1)*m.data( 6)*m.data( 8) - m.data(2)*m.data( 5)*m.data( 8) + m.data(2)*m.data(4)*m.data( 9) - m.data(0)*m.data( 6)*m.data( 9) - m.data(1)*m.data(4)*m.data(10) + m.data(0)*m.data( 5)*m.data(10);
         r.scale(1.0f / m.determinant());
         return r;
     }
 
     inline Matrix isometryInverse(Matrix::Arg m)
     {
         Matrix r(identity);
 
         // transposed 3x3 upper left matrix
         for (int i = 0; i < 3; i++)
         {
             for (int j = 0; j < 3; j++)
             {
                 r(i, j) = m(j, i);
             }
         }
 
         // translate by the negative offsets
         r.translate(-Vector3(m.data(12), m.data(13), m.data(14)));
 
         return r;
     }
 
     // Transform the given 3d point with the given matrix.
     inline Vector3 transformPoint(Matrix::Arg m, Vector3::Arg p)
     {
         return Vector3(
             p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + m(0,3),
             p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + m(1,3),
             p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + m(2,3));
     }
 
     // Transform the given 3d vector with the given matrix.
     inline Vector3 transformVector(Matrix::Arg m, Vector3::Arg p)
     {
         return Vector3(
             p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2),
             p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2),
             p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2));
     }
 
     // Transform the given 4d vector with the given matrix.
     inline Vector4 transform(Matrix::Arg m, Vector4::Arg p)
     {
         return Vector4(
             p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + p.w * m(0,3),
             p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + p.w * m(1,3),
             p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + p.w * m(2,3),
             p.x * m(3,0) + p.y * m(3,1) + p.z * m(3,2) + p.w * m(3,3));
     }
 
     inline Matrix mul(Matrix::Arg a, Matrix::Arg b)
     {
         // @@ Is this the right order? mul(a, b) = b * a
         Matrix m = a;
         m.apply(b);
         return m;
     }
 
     inline void Matrix::operator+=(const Matrix & m)
     {
         for(int i = 0; i < 16; i++) {
             m_data[i] += m.m_data[i];
         }
     }
 
     inline void Matrix::operator-=(const Matrix & m)
     {
         for(int i = 0; i < 16; i++) {
             m_data[i] -= m.m_data[i];
         }
     }
 
     inline Matrix operator+(const Matrix & a, const Matrix & b)
     {
         Matrix m = a;
         m += b;
         return m;
     }
 
     inline Matrix operator-(const Matrix & a, const Matrix & b)
     {
         Matrix m = a;
         m -= b;
         return m;
     }
 
 
 } // nv namespace
 
 
 #if 0 // old code.
 /** @name Special matrices. */
 //@{
 /** Generate a translation matrix. */
 void TranslationMatrix(const Vec3 & v) {
     data[0] = 1; data[1] = 0; data[2] = 0; data[3] = 0;
     data[4] = 0; data[5] = 1; data[6] = 0; data[7] = 0;
     data[8] = 0; data[9] = 0; data[10] = 1; data[11] = 0;
     data[12] = v.x; data[13] = v.y; data[14] = v.z; data[15] = 1;
 }
 
 /** Rotate theta degrees around v. */
 void RotationMatrix( float theta, float v0, float v1, float v2 ) {
     float cost = cos(theta);
     float sint = sin(theta);
 
     if( 1 == v0 && 0 == v1 && 0 == v2 ) {
         data[0] = 1.0f;	data[1] = 0.0f;	data[2] = 0.0f;	data[3] = 0.0f;
         data[4] = 0.0f;	data[5] = cost;	data[6] = -sint;data[7] = 0.0f;
         data[8] = 0.0f;	data[9] = sint;	data[10] = cost;data[11] = 0.0f;
         data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
     }
     else if( 0 == v0  && 1 == v1 && 0 == v2 ) {
         data[0] = cost;	data[1] = 0.0f;	data[2] = sint;	data[3] = 0.0f;
         data[4] = 0.0f;	data[5] = 1.0f;	data[6] = 0.0f;	data[7] = 0.0f;
         data[8] = -sint;data[9] = 0.0f;data[10] = cost;	data[11] = 0.0f;
         data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
     }
     else if( 0 == v0 && 0 == v1 && 1 == v2 ) {
         data[0] = cost;	data[1] = -sint;data[2] = 0.0f;	data[3] = 0.0f;
         data[4] = sint; data[5] = cost;	data[6] = 0.0f;	data[7] = 0.0f;
         data[8] = 0.0f;	data[9] = 0.0f;	data[10] = 1.0f;data[11] = 0.0f;
         data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
     } 
     else {
         //we need scale a,b,c to unit length.
         float a2, b2, c2;
         a2 = v0 * v0;
         b2 = v1 * v1;
         c2 = v2 * v2;
 
         float iscale = 1.0f / sqrtf(a2 + b2 + c2);
         v0 *= iscale;
         v1 *= iscale;
         v2 *= iscale;
 
         float abm, acm, bcm;
         float mcos, asin, bsin, csin;
         mcos = 1.0f - cost;
         abm = v0 * v1 * mcos;
         acm = v0 * v2 * mcos;
         bcm = v1 * v2 * mcos;
         asin = v0 * sint;
         bsin = v1 * sint;
         csin = v2 * sint;
         data[0] = a2 * mcos + cost;
         data[1] = abm - csin;
         data[2] = acm + bsin;
         data[3] = abm + csin;
         data[4] = 0.0f;
         data[5] = b2 * mcos + cost;
         data[6] = bcm - asin;
         data[7] = acm - bsin;
         data[8] = 0.0f;
         data[9] = bcm + asin;
         data[10] = c2 * mcos + cost;
         data[11] = 0.0f;
         data[12] = 0.0f;
         data[13] = 0.0f;
         data[14] = 0.0f;
         data[15] = 1.0f;
     }
 }
 
 /*
 void SkewMatrix(float angle, const Vec3 & v1, const Vec3 & v2) {
 v1.Normalize();
 v2.Normalize();
 
 Vec3 v3;
 v3.Cross(v1, v2);
 v3.Normalize();
 
 // Get skew factor.
 float costheta = Vec3DotProduct(v1, v2);
 float sintheta = Real.Sqrt(1 - costheta * costheta);
 float skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta;
 
 // Build orthonormal matrix.
 v1 = FXVector3.Cross(v3, v2);
 v1.Normalize();
 
 Matrix R = Matrix::Identity;
-R[0, 0] = v3.X; // Not sure this is in the correct order...
+R[0, 0] = v3.X; // Not sure this is in the correct order...
 R[1, 0] = v3.Y;
 R[2, 0] = v3.Z;
 R[0, 1] = v1.X;
 R[1, 1] = v1.Y;
 R[2, 1] = v1.Z;
 R[0, 2] = v2.X;
 R[1, 2] = v2.Y;
 R[2, 2] = v2.Z;
 
 // Build skew matrix.
 Matrix S = Matrix::Identity;
 S[2, 1] = -skew;
 
 // Return skew transform.
 return R * S * R.Transpose;	// Not sure this is in the correct order...
 }
 */
 
 /**
 * Generate rotation matrix for the euler angles. This is the same as computing
 * 3 rotation matrices and multiplying them together in our custom order.
 *
 * @todo Have to recompute this code for our new convention.
 **/
 void RotationMatrix( float yaw, float pitch, float roll ) {
     float sy = sin(yaw+ToRadian(90));
     float cy = cos(yaw+ToRadian(90));
     float sp = sin(pitch-ToRadian(90));
     float cp = cos(pitch-ToRadian(90));
     float sr = sin(roll);
     float cr = cos(roll);
 
     data[0] = cr*cy + sr*sp*sy;
     data[1] = cp*sy;
     data[2] = -sr*cy + cr*sp*sy;
     data[3] = 0;
 
     data[4] = -cr*sy + sr*sp*cy;
     data[5] = cp*cy;
     data[6] = sr*sy + cr*sp*cy;
     data[7] = 0;
 
     data[8] = sr*cp;
     data[9] = -sp;
     data[10] = cr*cp;
     data[11] = 0;
 
     data[12] = 0;
     data[13] = 0;
     data[14] = 0;
     data[15] = 1;
 }
 
 /** Create a frustum matrix with the far plane at the infinity. */
 void Frustum( float xmin, float xmax, float ymin, float ymax, float zNear, float zFar ) {
     float one_deltax, one_deltay, one_deltaz, doubleznear;
 
     doubleznear = 2.0f * zNear;
     one_deltax = 1.0f / (xmax - xmin);
     one_deltay = 1.0f / (ymax - ymin);
     one_deltaz = 1.0f / (zFar - zNear);
 
     data[0] = (float)(doubleznear * one_deltax);
     data[1] = 0.0f;
     data[2] = 0.0f;
     data[3] = 0.0f;
     data[4] = 0.0f;
     data[5] = (float)(doubleznear * one_deltay);
     data[6] = 0.f;
     data[7] = 0.f;
     data[8] = (float)((xmax + xmin) * one_deltax);
     data[9] = (float)((ymax + ymin) * one_deltay);
     data[10] = (float)(-(zFar + zNear) * one_deltaz);
     data[11] = -1.f;
     data[12] = 0.f;
     data[13] = 0.f;
     data[14] = (float)(-(zFar * doubleznear) * one_deltaz);
     data[15] = 0.f;
 }
 
 /** Create a frustum matrix with the far plane at the infinity. */
 void FrustumInf( float xmin, float xmax, float ymin, float ymax, float zNear ) {
     float one_deltax, one_deltay, doubleznear, nudge;
 
     doubleznear = 2.0f * zNear;
     one_deltax = 1.0f / (xmax - xmin);
     one_deltay = 1.0f / (ymax - ymin);
     nudge = 1.0; // 0.999;
 
     data[0] = doubleznear * one_deltax;
     data[1] = 0.0f;
     data[2] = 0.0f;
     data[3] = 0.0f;
 
     data[4] = 0.0f;
     data[5] = doubleznear * one_deltay;
     data[6] = 0.f;
     data[7] = 0.f;
 
     data[8] = (xmax + xmin) * one_deltax;
     data[9] = (ymax + ymin) * one_deltay;
     data[10] = -1.0f * nudge;
     data[11] = -1.0f;
 
     data[12] = 0.f;
     data[13] = 0.f;
     data[14] = -doubleznear * nudge;
     data[15] = 0.f;
 }
 
 /** Create an inverse frustum matrix with the far plane at the infinity. */
 void FrustumInfInv( float left, float right, float bottom, float top, float zNear ) {
     // this matrix is wrong (not tested floatly) I think it should be transposed.
     data[0] = (right - left) / (2 * zNear);
     data[1] = 0;
     data[2] = 0;
     data[3] = (right + left) / (2 * zNear);
     data[4] = 0;
     data[5] = (top - bottom) / (2 * zNear);
     data[6] = 0;
     data[7] = (top + bottom) / (2 * zNear);
     data[8] = 0;
     data[9] = 0;
     data[10] = 0;
     data[11] = -1;
     data[12] = 0;
     data[13] = 0;
     data[14] = -1 / (2 * zNear);
     data[15] = 1 / (2 * zNear);
 }
 
 /** Create an homogeneous projection matrix. */
 void Perspective( float fov, float aspect, float zNear, float zFar ) {
     float xmin, xmax, ymin, ymax;
 
     xmax = zNear * tan( fov/2 );
     xmin = -xmax;
 
     ymax = xmax / aspect;
     ymin = -ymax;
 
     Frustum(xmin, xmax, ymin, ymax, zNear, zFar);
 }
 
 /** Create a projection matrix with the far plane at the infinity. */
 void PerspectiveInf( float fov, float aspect, float zNear ) {
     float x = zNear * tan( fov/2 );
     float y = x / aspect;
     FrustumInf( -x, x, -y, y, zNear );
 }
 
 /** Create an inverse projection matrix with far plane at the infinity. */
 void PerspectiveInfInv( float fov, float aspect, float zNear ) {
     float x = zNear * tan( fov/2 );
     float y = x / aspect;
     FrustumInfInv( -x, x, -y, y, zNear );
 }
 
 /** Build bone matrix from quatertion and offset. */
 void BoneMatrix(const Quat & q, const Vec3 & offset) {
     float x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz;
 
     // calculate coefficients
     x2 = q.x + q.x;
     y2 = q.y + q.y;
     z2 = q.z + q.z;
 
     xx = q.x * x2;   xy = q.x * y2;   xz = q.x * z2;
     yy = q.y * y2;   yz = q.y * z2;   zz = q.z * z2;
     wx = q.w * x2;   wy = q.w * y2;   wz = q.w * z2;
 
     data[0] = 1.0f - (yy + zz); 	
     data[1] = xy - wz;
     data[2] = xz + wy;		
     data[3] = 0.0f;
 
     data[4] = xy + wz;		
     data[5] = 1.0f - (xx + zz);
     data[6] = yz - wx;		
     data[7] = 0.0f;
 
     data[8] = xz - wy;		
     data[9] = yz + wx;
     data[10] = 1.0f - (xx + yy);		
     data[11] = 0.0f;
 
     data[12] = offset.x;
     data[13] = offset.y;
     data[14] = offset.z;			
     data[15] = 1.0f;
 }
 
 //@}
 
 
 /** @name Transformations: */
 //@{
 
 /** Apply a general scale. */
 void Scale( float x, float y, float z ) {
     data[0] *= x;	data[4] *= y;	data[8]  *= z;
     data[1] *= x;	data[5] *= y;	data[9]  *= z;
     data[2] *= x;	data[6] *= y;	data[10] *= z;
     data[3] *= x;	data[7] *= y;	data[11] *= z;
 }
 
 /** Apply a rotation of theta degrees around the axis v*/
 void Rotate( float theta, const Vec3 & v ) {
     Matrix b;
     b.RotationMatrix( theta, v[0], v[1], v[2] );
     Multiply4x3( b );
 }
 
 /** Apply a rotation of theta degrees around the axis v*/
 void Rotate( float theta, float v0, float v1, float v2 ) {
     Matrix b;
     b.RotationMatrix( theta, v0, v1, v2 );
     Multiply4x3( b );
 }
 
 /**
 * Translate the matrix by t. This is the same as multiplying by a
 * translation matrix with the given offset.
 * this = T * this
 */
 void Translate( const Vec3 &t ) {
     data[12] = data[0] * t.x + data[4] * t.y + data[8]  * t.z + data[12];
     data[13] = data[1] * t.x + data[5] * t.y + data[9]  * t.z + data[13];
     data[14] = data[2] * t.x + data[6] * t.y + data[10] * t.z + data[14];
     data[15] = data[3] * t.x + data[7] * t.y + data[11] * t.z + data[15];
 }
 
 /** 
 * Translate the matrix by x, y, z. This is the same as multiplying by a 
 * translation matrix with the given offsets.
 */
 void Translate( float x, float y, float z ) {
     data[12] = data[0] * x + data[4] * y + data[8]  * z + data[12];
     data[13] = data[1] * x + data[5] * y + data[9]  * z + data[13];
     data[14] = data[2] * x + data[6] * y + data[10] * z + data[14];
     data[15] = data[3] * x + data[7] * y + data[11] * z + data[15];
 }
 
 /** Compute the transposed matrix. */
 void Transpose() {
     piSwap(data[1], data[4]);
     piSwap(data[2], data[8]);
     piSwap(data[6], data[9]);
     piSwap(data[3], data[12]);
     piSwap(data[7], data[13]);
     piSwap(data[11], data[14]);
 }
 
 /** Compute the inverse of a rigid-body/isometry/orthonormal matrix. */
 void IsometryInverse() {
     // transposed 3x3 upper left matrix
     piSwap(data[1], data[4]);
     piSwap(data[2], data[8]);
     piSwap(data[6], data[9]);
 
     // translate by the negative offsets
     Vec3 v(-data[12], -data[13], -data[14]);
     data[12] = data[13] = data[14] = 0;
     Translate(v);
 }
 
 /** Compute the inverse of the affine portion of this matrix. */
 void AffineInverse() {
     data[12] = data[13] = data[14] = 0;
     Transpose();
 }
 //@}
 
 /** @name Matrix operations: */
 //@{
 
 /** Return the determinant of this matrix. */
 float Determinant() const {
     return	data[0] * data[5] * data[10] * data[15] + 
         data[1] * data[6] * data[11] * data[12] +
         data[2] * data[7] * data[ 8] * data[13] +
         data[3] * data[4] * data[ 9] * data[14] -
         data[3] * data[6] * data[ 9] * data[12] -
         data[2] * data[5] * data[ 8] * data[15] -
         data[1] * data[4] * data[11] * data[14] -
         data[0] * data[7] * data[10] * data[12];
 }
 
 
 /** Standard matrix product: this *= B. */
 void Multiply4x4( const Matrix & restrict B ) {
     Multiply4x4(*this, B);
 }
 
 /** Standard matrix product: this = A * B. this != B*/
 void Multiply4x4( const Matrix & A, const Matrix & restrict B ) {
     piDebugCheck(this != &B);
 
     for(int i = 0; i < 4; i++) {
         const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
         GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
         GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
         GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
         GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3);
     }
 
     /* Unrolled but does not allow this == A
     data[0] = A.data[0] * B.data[0] + A.data[4] * B.data[1] + A.data[8] * B.data[2] + A.data[12] * B.data[3];
     data[1] = A.data[1] * B.data[0] + A.data[5] * B.data[1] + A.data[9] * B.data[2] + A.data[13] * B.data[3];
     data[2] = A.data[2] * B.data[0] + A.data[6] * B.data[1] + A.data[10] * B.data[2] + A.data[14] * B.data[3];
     data[3] = A.data[3] * B.data[0] + A.data[7] * B.data[1] + A.data[11] * B.data[2] + A.data[15] * B.data[3];
     data[4] = A.data[0] * B.data[4] + A.data[4] * B.data[5] + A.data[8] * B.data[6] + A.data[12] * B.data[7];
     data[5] = A.data[1] * B.data[4] + A.data[5] * B.data[5] + A.data[9] * B.data[6] + A.data[13] * B.data[7];
     data[6] = A.data[2] * B.data[4] + A.data[6] * B.data[5] + A.data[10] * B.data[6] + A.data[14] * B.data[7];
     data[7] = A.data[3] * B.data[4] + A.data[7] * B.data[5] + A.data[11] * B.data[6] + A.data[15] * B.data[7];
     data[8] = A.data[0] * B.data[8] + A.data[4] * B.data[9] + A.data[8] * B.data[10] + A.data[12] * B.data[11];
     data[9] = A.data[1] * B.data[8] + A.data[5] * B.data[9] + A.data[9] * B.data[10] + A.data[13] * B.data[11];
     data[10]= A.data[2] * B.data[8] + A.data[6] * B.data[9] + A.data[10] * B.data[10] + A.data[14] * B.data[11];
     data[11]= A.data[3] * B.data[8] + A.data[7] * B.data[9] + A.data[11] * B.data[10] + A.data[15] * B.data[11];
     data[12]= A.data[0] * B.data[12] + A.data[4] * B.data[13] + A.data[8] * B.data[14] + A.data[12] * B.data[15];
     data[13]= A.data[1] * B.data[12] + A.data[5] * B.data[13] + A.data[9] * B.data[14] + A.data[13] * B.data[15];
     data[14]= A.data[2] * B.data[12] + A.data[6] * B.data[13] + A.data[10] * B.data[14] + A.data[14] * B.data[15];
     data[15]= A.data[3] * B.data[12] + A.data[7] * B.data[13] + A.data[11] * B.data[14] + A.data[15] * B.data[15];
     */
 }
 
 /** Standard matrix product: this *= B. */
 void Multiply4x3( const Matrix & restrict B ) {
     Multiply4x3(*this, B);
 }
 
 /** Standard product of matrices, where the last row is [0 0 0 1]. */
 void Multiply4x3( const Matrix & A, const Matrix & restrict B ) {
     piDebugCheck(this != &B);
 
     for(int i = 0; i < 3; i++) {
         const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
         GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
         GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
         GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
         GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3);
     }
     data[3] = 0.0f; data[7] = 0.0f; data[11] = 0.0f; data[15] = 1.0f;
 
     /* Unrolled but does not allow this == A
     data[0] = a.data[0] * b.data[0] + a.data[4] * b.data[1] + a.data[8] * b.data[2] + a.data[12] * b.data[3];
     data[1] = a.data[1] * b.data[0] + a.data[5] * b.data[1] + a.data[9] * b.data[2] + a.data[13] * b.data[3];
     data[2] = a.data[2] * b.data[0] + a.data[6] * b.data[1] + a.data[10] * b.data[2] + a.data[14] * b.data[3];
     data[3] = 0.0f;
     data[4] = a.data[0] * b.data[4] + a.data[4] * b.data[5] + a.data[8] * b.data[6] + a.data[12] * b.data[7];
     data[5] = a.data[1] * b.data[4] + a.data[5] * b.data[5] + a.data[9] * b.data[6] + a.data[13] * b.data[7];
     data[6] = a.data[2] * b.data[4] + a.data[6] * b.data[5] + a.data[10] * b.data[6] + a.data[14] * b.data[7];
     data[7] = 0.0f;
     data[8] = a.data[0] * b.data[8] + a.data[4] * b.data[9] + a.data[8] * b.data[10] + a.data[12] * b.data[11];
     data[9] = a.data[1] * b.data[8] + a.data[5] * b.data[9] + a.data[9] * b.data[10] + a.data[13] * b.data[11];
     data[10]= a.data[2] * b.data[8] + a.data[6] * b.data[9] + a.data[10] * b.data[10] + a.data[14] * b.data[11];
     data[11]= 0.0f;
     data[12]= a.data[0] * b.data[12] + a.data[4] * b.data[13] + a.data[8] * b.data[14] + a.data[12] * b.data[15];
     data[13]= a.data[1] * b.data[12] + a.data[5] * b.data[13] + a.data[9] * b.data[14] + a.data[13] * b.data[15];
     data[14]= a.data[2] * b.data[12] + a.data[6] * b.data[13] + a.data[10] * b.data[14] + a.data[14] * b.data[15];
     data[15]= 1.0f;
     */
 }
 //@}
 
 
 /** @name Vector operations: */
 //@{
 
 /** Transform 3d vector (w=0). */
 void TransformVec3(const Vec3 & restrict orig, Vec3 * restrict dest) const {
     piDebugCheck(&orig != dest);
     dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8];
     dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9];
     dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10];
 }
 /** Transform 3d vector by the transpose (w=0). */
 void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const {
     piDebugCheck(&orig != dest);
     dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2];
     dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6];
     dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10];
 }
 
 /** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */
 void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const {
     piDebugCheck(&orig != dest);
     dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
     dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
     dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
 }
 
 /** Transform a point, normalize it, and return w. */
 float TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const {
     piDebugCheck(&orig != dest);
     float w;
     dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
     dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
     dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
     w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]);
     *dest *= w;
     return w;
 }
 
 /** Transform a point and return w. */
 float TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const {
     piDebugCheck(&orig != dest);
     dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
     dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
     dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
     return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15];
 }
 
 /** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */
 void TransformVec4(const Vec3 & orig, Vec4 * dest) const {
     dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
     dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
     dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
     dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15];
 }
 //@}
 
 /** @name Matrix analysis. */
 //@{
 
 /** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */
 void GetEulerAnglesZYZ(float * s, float * t, float * r) const {
     if( GetElem(2,2) < 1.0f ) {
         if( GetElem(2,2) > -1.0f ) {
             // 	cs*ct*cr-ss*sr 		-ss*ct*cr-cs*sr		st*cr
             //	cs*ct*sr+ss*cr		-ss*ct*sr+cs*cr		st*sr
             //	-cs*st				ss*st				ct
             *s = atan2(GetElem(1,2), -GetElem(0,2));
             *t = acos(GetElem(2,2));
             *r = atan2(GetElem(2,1), GetElem(2,0));		
         }
         else {
             // 	-c(s-r)	 	s(s-r)		0
             //	s(s-r)		c(s-r)		0
             //	0			0			-1
             *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r
             *t = PI;
             *r = 0;
         }
     }
     else {
         // 	c(s+r)		-s(s+r)		0
         //	s(s+r)		c(s+r)		0
         //	0			0			1
         *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r
         *t = 0;
         *r = 0;
     }
 }
 
 //@}
 
 MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m );
 
 /** Print to debug output. */
 void Print() const {
     piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] );
     piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] );
     piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] );
     piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] );
 }
 
 
 public:
 
     float data[16];
 
 };
 #endif
 
 
 #endif // NV_MATH_MATRIX_INL
Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.cpp	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvmath/PackedFloat.cpp	(revision 23380)
@@ -1,61 +1,61 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #include "PackedFloat.h"
 #include "Vector.inl"
 #include "ftoi.h"
 
 using namespace nv;
 
 Vector3 nv::rgb9e5_to_vector3(FloatRGB9E5 v) {
 }
 
 FloatRGB9E5 nv::vector3_to_rgb9e5(const Vector3 & v) {
 }
 
 
 float nv::float11_to_float32(uint v) {
 }
 
 float nv::float10_to_float32(uint v) {
 }
 
 Vector3 nv::r11g11b10_to_vector3(FloatR11G11B10 v) {
 }
 
 FloatR11G11B10 nv::vector3_to_r11g11b10(const Vector3 & v) {
 }
 
 // These are based on: 
 // http://www.graphics.cornell.edu/~bjw/rgbe/rgbe.c
 // While this may not be the best way to encode/decode RGBE8, I'm not making any changes to maintain compatibility.
 FloatRGBE8 nv::vector3_to_rgbe8(const Vector3 & v) {
 
     float m = max3(v.x, v.y, v.z);
 
     FloatRGBE8 rgbe;
 
     if (m < 1e-32) {
         rgbe.v = 0;
     }
     else {
         int e;
         float scale = frexpf(m, &e) * 256.0f / m;
         rgbe.r = U8(ftoi_round(v.x * scale));
         rgbe.g = U8(ftoi_round(v.y * scale));
         rgbe.b = U8(ftoi_round(v.z * scale));
         rgbe.e = U8(e + 128);
     }
 
     return rgbe;
 }
 
 
 Vector3 nv::rgbe8_to_vector3(FloatRGBE8 v) {
     if (v.e != 0) {
         float scale = ldexpf(1.0f, v.e-(int)(128+8));             // +8 to divide by 256. @@ Shouldn't we divide by 255 instead?
         return scale * Vector3(float(v.r), float(v.g), float(v.b));
     }
     
     return Vector3(0);
 }
 
Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.h	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.h	(revision 23380)
@@ -1,45 +1,45 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_MATH_PLANE_H
 #define NV_MATH_PLANE_H
 
 #include "nvmath.h"
 #include "Vector.h"
 
 #if NV_USE_ALTIVEC
 #undef vector
 #endif
 
 namespace nv
 {
     class Matrix;
 
     class NVMATH_CLASS Plane
     {
     public:
         Plane();
         Plane(float x, float y, float z, float w);
         Plane(const Vector4 & v);
         Plane(const Vector3 & v, float d);
         Plane(const Vector3 & normal, const Vector3 & point);
         Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2);
 
         const Plane & operator=(const Plane & v);
 
         Vector3 vector() const;
         float offset() const;
 
         void operator*=(float s);
 
         Vector4 v;
     };
 
     Plane transformPlane(const Matrix &, const Plane &);
 
     Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c);
 
 
 } // nv namespace
 
 #endif // NV_MATH_PLANE_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.inl
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.inl	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Plane.inl	(revision 23380)
@@ -1,49 +1,49 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #pragma once
 #ifndef NV_MATH_PLANE_INL
 #define NV_MATH_PLANE_INL
 
 #include "Plane.h"
 #include "Vector.inl"
 
 namespace nv
 {
     inline Plane::Plane() {}
     inline Plane::Plane(float x, float y, float z, float w) : v(x, y, z, w) {}
     inline Plane::Plane(const Vector4 & v) : v(v) {}
     inline Plane::Plane(const Vector3 & v, float d) : v(v, d) {}
     inline Plane::Plane(const Vector3 & normal, const Vector3 & point) : v(normal, -dot(normal, point)) {}
     inline Plane::Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2) {
         Vector3 n = cross(v1-v0, v2-v0);
         float d = -dot(n, v0);
         v = Vector4(n, d);
     }
 
     inline const Plane & Plane::operator=(const Plane & p) { v = p.v; return *this; }
 
     inline Vector3 Plane::vector() const { return v.xyz(); }
     inline float Plane::offset() const { return v.w; }
 
     // Normalize plane.
     inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON)
     {
         const float len = length(plane.vector());
         const float inv = isZero(len, epsilon) ? 0 : 1.0f / len;
         return Plane(plane.v * inv);
     }
 
     // Get the signed distance from the given point to this plane.
     inline float distance(const Plane & plane, const Vector3 & point)
     {
         return dot(plane.vector(), point) + plane.offset();
     }
 
     inline void Plane::operator*=(float s)
     {
         v *= s;
     }
 
 } // nv namespace
 
 #endif // NV_MATH_PLANE_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.cpp	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvthread/ParallelFor.cpp	(revision 23380)
@@ -1,61 +1,61 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+// This code is in the public domain -- Ignacio CastaÃ±o <castano@gmail.com>
 
 #include "ParallelFor.h"
 #include "Thread.h"
 #include "Atomic.h"
 #include "ThreadPool.h"
 
 #include "nvcore/Utils.h" // toI32
 
 using namespace nv;
 
 #define ENABLE_PARALLEL_FOR 1
 
 static void worker(void * arg, int tid) {
     ParallelFor * owner = (ParallelFor *)arg;
 
     while(true) {
         uint new_idx = atomicFetchAndAdd(&owner->idx, owner->step);
         if (new_idx >= owner->count) {
             break;
         }
 
         const uint count = min(owner->count, new_idx + owner->step);
         for (uint i = new_idx; i < count; i++) {
             owner->task(owner->context, /*tid, */i);
         }
     }
 }
 
 
 ParallelFor::ParallelFor(ForTask * task, void * context) : task(task), context(context) {
 #if ENABLE_PARALLEL_FOR
     pool = ThreadPool::acquire();
 #endif
 }
 
 ParallelFor::~ParallelFor() {
 #if ENABLE_PARALLEL_FOR
     ThreadPool::release(pool);
 #endif
 }
 
 void ParallelFor::run(uint count, uint step/*= 1*/) {
 #if ENABLE_PARALLEL_FOR
     storeRelease(&this->count, count);
     storeRelease(&this->step, step);
 
     // Init atomic counter to zero.
     storeRelease(&idx, 0);
 
     // Start threads.
     pool->run(worker, this);
 
     nvDebugCheck(idx >= count);
 #else
     for (int i = 0; i < toI32(count); i++) {
         task(context, i);
     }
 #endif
 }
 
Index: ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.cpp	(revision 23379)
+++ ps/trunk/libraries/source/nvtt/src/src/nvtt/CubeSurface.cpp	(revision 23380)
@@ -1,1042 +1,1042 @@
 // Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
 // 
 // Permission is hereby granted, free of charge, to any person
 // obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without
 // restriction, including without limitation the rights to use,
 // copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the
 // Software is furnished to do so, subject to the following
 // conditions:
 // 
 // The above copyright notice and this permission notice shall be
 // included in all copies or substantial portions of the Software.
 // 
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.
 
 #include "CubeSurface.h"
 #include "Surface.h"
 
 #include "nvimage/DirectDrawSurface.h"
 
 #include "nvmath/Vector.inl"
 
 #include "nvcore/Array.inl"
 #include "nvcore/StrLib.h"
 
 using namespace nv;
 using namespace nvtt;
 
 
 
 // Solid angle of an axis aligned quad from (0,0,1) to (x,y,1)
 // See: http://www.fizzmoll11.com/thesis/ for a derivation of this formula.
 static float areaElement(float x, float y) {
     return atan2(x*y, sqrtf(x*x + y*y + 1));
 }
 
 // Solid angle of a hemicube texel.
 static float solidAngleTerm(uint x, uint y, float inverseEdgeLength) {
     // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center.
     float u = (float(x) + 0.5f) * (2 * inverseEdgeLength) - 1.0f;
     float v = (float(y) + 0.5f) * (2 * inverseEdgeLength) - 1.0f;
     nvDebugCheck(u >= -1.0f && u <= 1.0f);
     nvDebugCheck(v >= -1.0f && v <= 1.0f);
 
 #if 1
     // Exact solid angle:
     float x0 = u - inverseEdgeLength;
     float y0 = v - inverseEdgeLength;
     float x1 = u + inverseEdgeLength;
     float y1 = v + inverseEdgeLength;
     float solidAngle = areaElement(x0, y0) - areaElement(x0, y1) - areaElement(x1, y0) + areaElement(x1, y1);
     nvDebugCheck(solidAngle > 0.0f);
 
     return solidAngle;
 #else
     // This formula is equivalent, but not as precise.
     float pixel_area = nv::square(2.0f * inverseEdgeLength);
     float dist_square = 1.0f + nv::square(u) + nv::square(v);
     float cos_theta = 1.0f / sqrt(dist_square);
     float cos_theta_d2 = cos_theta / dist_square; // Funny this is just 1/dist^3 or cos(tetha)^3
 
     return pixel_area * cos_theta_d2;
 #endif
 }
 
 
 static Vector3 texelDirection(uint face, uint x, uint y, int edgeLength, EdgeFixup fixupMethod)
 {
     float u, v;
     if (fixupMethod == EdgeFixup_Stretch) {
         // Transform x,y to [-1, 1] range, match up edges exactly.
         u = float(x) * 2.0f / (edgeLength - 1) - 1.0f;
         v = float(y) * 2.0f / (edgeLength - 1) - 1.0f;
     }
     else {
         // Transform x,y to [-1, 1] range, offset by 0.5 to point to texel center.
         u = (float(x) + 0.5f) * (2.0f / edgeLength) - 1.0f;
         v = (float(y) + 0.5f) * (2.0f / edgeLength) - 1.0f;
     }
 
     if (fixupMethod == EdgeFixup_Warp) {
         // Warp texel centers in the proximity of the edges.
         float a = powf(float(edgeLength), 2.0f) / powf(float(edgeLength - 1), 3.0f);
         u = a * powf(u, 3) + u;
         v = a * powf(v, 3) + v;
     }
 
     nvDebugCheck(u >= -1.0f && u <= 1.0f);
     nvDebugCheck(v >= -1.0f && v <= 1.0f);
 
     Vector3 n;
 
     if (face == 0) {
         n.x = 1;
         n.y = -v;
         n.z = -u;
     }
     if (face == 1) {
         n.x = -1;
         n.y = -v;
         n.z = u;
     }
 
     if (face == 2) {
         n.x = u;
         n.y = 1;
         n.z = v;
     }
     if (face == 3) {
         n.x = u;
         n.y = -1;
         n.z = -v;
     }
 
     if (face == 4) {
         n.x = u;
         n.y = -v;
         n.z = 1;
     }
     if (face == 5) {
         n.x = -u;
         n.y = -v;
         n.z = -1;
     }
 
     return normalizeFast(n);
 }
 
 
 TexelTable::TexelTable(uint edgeLength) : size(edgeLength) {
 
     uint hsize = size/2;
 
     // Allocate a small solid angle table that takes into account cube map symmetry.
     solidAngleArray.resize(hsize * hsize);
 
     for (uint y = 0; y < hsize; y++) {
         for (uint x = 0; x < hsize; x++) {
             solidAngleArray[y * hsize + x] = solidAngleTerm(hsize+x, hsize+y, 1.0f/edgeLength);
         }
     }
 
 
     directionArray.resize(size*size*6);
 
     for (uint f = 0; f < 6; f++) {
         for (uint y = 0; y < size; y++) {
             for (uint x = 0; x < size; x++) {
                 directionArray[(f * size + y) * size + x] = texelDirection(f, x, y, edgeLength, EdgeFixup_None);
             }
         }
     }
 }
 
 const Vector3 & TexelTable::direction(uint f, uint x, uint y) const {
     nvDebugCheck(f < 6 && x < size && y < size);
     return directionArray[(f * size + y) * size + x];
 }
 
 float TexelTable::solidAngle(uint f, uint x, uint y) const {
     uint hsize = size/2;
     if (x >= hsize) x -= hsize;
     else if (x < hsize) x = hsize - x - 1;
     if (y >= hsize) y -= hsize;
     else if (y < hsize) y = hsize - y - 1;
 
     return solidAngleArray[y * hsize + x];
 }
 
 
 static const Vector3 faceNormals[6] = {
     Vector3(1, 0, 0),
     Vector3(-1, 0, 0),
     Vector3(0, 1, 0),
     Vector3(0, -1, 0),
     Vector3(0, 0, 1),
     Vector3(0, 0, -1),
 };
 
 static const Vector3 faceU[6] = {
     Vector3(0, 0, -1),
     Vector3(0, 0, 1),
     Vector3(1, 0, 0),
     Vector3(1, 0, 0),
     Vector3(1, 0, 0),
     Vector3(-1, 0, 0),
 };
 
 static const Vector3 faceV[6] = {
     Vector3(0, -1, 0),
     Vector3(0, -1, 0),
     Vector3(0, 0, 1),
     Vector3(0, 0, -1),
     Vector3(0, -1, 0),
     Vector3(0, -1, 0),
 };
 
 
 static Vector2 toPolar(Vector3::Arg v) {
     Vector2 p;
     p.x = atan2(v.x, v.y);  // theta
     p.y = acosf(v.z);       // phi
     return p;
 }
 
 static Vector2 toPlane(float theta, float phi) {
     float x = sin(phi) * cos(theta);
     float y = sin(phi) * sin(theta);
     float z = cos(phi);
 
     Vector2 p;
     p.x = x / fabs(z);
     p.y = y / fabs(z);
     //p.x = tan(phi) * cos(theta);
     //p.y = tan(phi) * sin(theta);
 
     return p;
 }
 
 static Vector2 toPlane(Vector3::Arg v) {
     Vector2 p;
     p.x = v.x / fabs(v.z);
     p.y = v.y / fabs(v.z);
     return p;
 }
 
 
 
 
 
 CubeSurface::CubeSurface() : m(new CubeSurface::Private())
 {
     m->addRef();
 }
 
 CubeSurface::CubeSurface(const CubeSurface & cube) : m(cube.m)
 {
     if (m != NULL) m->addRef();
 }
 
 CubeSurface::~CubeSurface()
 {
     if (m != NULL) m->release();
     m = NULL;
 }
 
 void CubeSurface::operator=(const CubeSurface & cube)
 {
     if (cube.m != NULL) cube.m->addRef();
     if (m != NULL) m->release();
     m = cube.m;
 }
 
 void CubeSurface::detach()
 {
     if (m->refCount() > 1)
     {
         m->release();
         m = new CubeSurface::Private(*m);
         m->addRef();
         nvDebugCheck(m->refCount() == 1);
     }
 }
 
 
 
 bool CubeSurface::isNull() const
 {
     return m->edgeLength == 0;
 }
 
 int CubeSurface::edgeLength() const
 {
     return m->edgeLength;
 }
 
 int CubeSurface::countMipmaps() const
 {
     return nv::countMipmaps(m->edgeLength);
 }
 
 Surface & CubeSurface::face(int f)
 {
     nvDebugCheck(f >= 0 && f < 6);
     return m->face[f];
 }
 
 const Surface & CubeSurface::face(int f) const
 {
     nvDebugCheck(f >= 0 && f < 6);
     return m->face[f];
 }
 
 
 bool CubeSurface::load(const char * fileName, int mipmap)
 {
     if (strEqual(Path::extension(fileName), ".dds")) {
         nv::DirectDrawSurface dds(fileName);
 
         if (!dds.isValid()/* || !dds.isSupported()*/) {
             return false;
         }
 
         if (!dds.isTextureCube()) {
             return false;
         }
 
         // Make sure it's a valid cube.
         if (dds.header.width != dds.header.height) return false;
         //if ((dds.header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES) return false;
 
         if (mipmap < 0) {
             mipmap = dds.mipmapCount() - 1 - mipmap;
         }
         if (mipmap < 0 || mipmap > I32(dds.mipmapCount())) return false;
         
 
         nvtt::InputFormat inputFormat = nvtt::InputFormat_RGBA_16F;
 
         if (dds.header.hasDX10Header()) {
             if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R16G16B16A16_FLOAT) inputFormat = nvtt::InputFormat_RGBA_16F;
             else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32G32B32A32_FLOAT) inputFormat = nvtt::InputFormat_RGBA_32F;
             else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32_FLOAT) inputFormat = nvtt::InputFormat_R_32F;
             else return false;
         }
         else {
             if ((dds.header.pf.flags & DDPF_FOURCC) != 0) {
                 if (dds.header.pf.fourcc == D3DFMT_A16B16G16R16F) inputFormat = nvtt::InputFormat_RGBA_16F;
                 else if (dds.header.pf.fourcc == D3DFMT_A32B32G32R32F) inputFormat = nvtt::InputFormat_RGBA_32F;
                 else if (dds.header.pf.fourcc == D3DFMT_R32F) inputFormat = nvtt::InputFormat_R_32F;
                 else return false;
             }
             else {
                 if (dds.header.pf.bitcount == 32 /*&& ...*/) inputFormat = nvtt::InputFormat_BGRA_8UB;
                 else return false;  // @@ Do pixel format conversions!
             }
         }
         
         uint edgeLength = dds.surfaceWidth(mipmap);
         uint size = dds.surfaceSize(mipmap);
 
         void * data = malloc(size);
 
         for (int f = 0; f < 6; f++) {
             dds.readSurface(f, mipmap, data, size);
             m->face[f].setImage(inputFormat, edgeLength, edgeLength, 1, data);
         }
 
         m->edgeLength = edgeLength;
 
         free(data);
 
         return true;
     }
 
     return false;
 }
 
 bool CubeSurface::save(const char * fileName) const
 {
     // @@ TODO
     return false;
 }
 
 struct ivec2 {
     uint x;
     uint y;
 };
 //                                                   posx    negx    posy    negy    posz    negz
 static const ivec2 foldOffsetVerticalCross[6]   = { {2, 1}, {0, 1}, {1, 0}, {1, 2}, {1, 1}, {1, 3} };
 static const ivec2 foldOffsetHorizontalCross[6] = { {2, 1}, {0, 1}, {1, 0}, {1, 2}, {1, 1}, {3, 1} };
 static const ivec2 foldOffsetColumn[6]          = { {0, 0}, {0, 1}, {0, 2}, {0, 3}, {0, 4}, {0, 5} };
 static const ivec2 foldOffsetRow[6]             = { {0, 0}, {1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0} };
 
 void CubeSurface::fold(const Surface & tex, CubeLayout layout)
 {
     ivec2 const* offsets = 0;
     uint edgeLength;
 
     switch(layout) {
         case CubeLayout_LatitudeLongitude:
         case CubeLayout_VerticalCross:
             edgeLength = tex.height() / 4;
             offsets = foldOffsetVerticalCross;
             break;
         case CubeLayout_HorizontalCross:
             edgeLength = tex.width() / 4;
             offsets = foldOffsetHorizontalCross;
             break;
         case CubeLayout_Column:
             edgeLength = tex.width();
             offsets = foldOffsetColumn;
             break;
         case CubeLayout_Row:
             edgeLength = tex.height();
             offsets = foldOffsetRow;
             break;
     }
 
     m->edgeLength = edgeLength;
     for(uint f = 0; f < 6; f++) {
         uint x = offsets[f].x * edgeLength;
         uint y = offsets[f].y * edgeLength;
         m->face[f] = tex.createSubImage(x, x + edgeLength - 1, y, y + edgeLength - 1, 0, 0);
     }
 
     if(layout == CubeLayout_VerticalCross || layout == CubeLayout_LatitudeLongitude) {
         // Back face needs to be rotated 180 degrees
         m->face[5].flipX();
         m->face[5].flipY();
     }
 }
 
 Surface CubeSurface::unfold(CubeLayout layout) const
 {
     ivec2 const* offsets = 0;
     uint edgeLength = m->edgeLength;
     uint width;
     uint height;
 
     switch(layout) {
         case CubeLayout_LatitudeLongitude:
         case CubeLayout_VerticalCross:
             offsets = foldOffsetVerticalCross;
             width = 3 * edgeLength;
             height = 4 * edgeLength;
             // Back face needs to be rotated 180 degrees
             m->face[5].flipX();
             m->face[5].flipY();
             break;
         case CubeLayout_HorizontalCross:
             offsets = foldOffsetHorizontalCross;
             width = 4 * edgeLength;
             height = 3 * edgeLength;
             break;
         case CubeLayout_Column:
             offsets = foldOffsetColumn;
             width = edgeLength;
             height = 6 * edgeLength;
             break;
         case CubeLayout_Row:
             offsets = foldOffsetRow;
             width = 6 * edgeLength;
             height = edgeLength;
             break;
     }
 
     Surface surface;
     surface.setImage(width, height, 1);
     for(uint f = 0; f < 6; f++) {
         uint x = offsets[f].x * edgeLength;
         uint y = offsets[f].y * edgeLength;
         surface.copy(m->face[f], 0, 0, 0, edgeLength, edgeLength, 1, x, y, 0);
     }
 
     if(layout == CubeLayout_VerticalCross || layout == CubeLayout_LatitudeLongitude) {
         // Undo back face rotation
         m->face[5].flipY();
         m->face[5].flipX();
     }
     return surface;
 }
 
 float CubeSurface::average(int channel) const
 {
     const uint edgeLength = m->edgeLength;
     m->allocateTexelTable();
 
     float total = 0.0f;
     float sum = 0.0f;
 
     for (int f = 0; f < 6; f++) {
         float * c = m->face[f].m->image->channel(channel);
 
          for (uint y = 0; y < edgeLength; y++) {
              for (uint x = 0; x < edgeLength; x++) {
                 float solidAngle = m->texelTable->solidAngle(f, x, y);
 
                 total += solidAngle;
                 sum += c[y * edgeLength + x] * solidAngle;
             }
         }
     }
 
     return sum / total;
 }
 
 void CubeSurface::range(int channel, float * minimum_ptr, float * maximum_ptr) const
 {
     const uint edgeLength = m->edgeLength;
     m->allocateTexelTable();
 
     float minimum = NV_FLOAT_MAX;
     float maximum = 0.0f;
 
     for (int f = 0; f < 6; f++) {
         float * c = m->face[f].m->image->channel(channel);
 
          for (uint y = 0; y < edgeLength; y++) {
              for (uint x = 0; x < edgeLength; x++) {
 
                  minimum = nv::min(minimum, c[y * edgeLength + x]);
                  maximum = nv::max(maximum, c[y * edgeLength + x]);
             }
         }
     }
 
     *minimum_ptr = minimum;
     *maximum_ptr = maximum;
 }
 
 void CubeSurface::clamp(int channel, float low/*= 0.0f*/, float high/*= 1.0f*/) {
     for (int f = 0; f < 6; f++) {
         m->face[f].clamp(channel, low, high);
     }
 }
 
 
 
 #include "nvmath/SphericalHarmonic.h"
 
 CubeSurface CubeSurface::irradianceFilter(int size, EdgeFixup fixupMethod) const
 {
     m->allocateTexelTable();
 
     // Transform this cube to spherical harmonic basis
     Sh2 sh;
 
     // For each texel of the input cube.
     const uint edgeLength = m->edgeLength;
     for (uint f = 0; f < 6; f++) {
         for (uint y = 0; y < edgeLength; y++) {
             for (uint x = 0; x < edgeLength; x++) {
 
                 Vector3 dir = m->texelTable->direction(f, x, y);
                 float solidAngle = m->texelTable->solidAngle(f, x, y);
 
                 Sh2 shDir;
                 shDir.eval(dir);
 
                 sh.addScaled(sh, solidAngle);
             }
         }
     }
 
 
     // Evaluate spherical harmonic for each output texel.
     CubeSurface output;
     output.m->allocate(size);
 
 
 
 
     // @@ TODO
     return CubeSurface();
 }
 
 
 
 
 // Convolve filter against this cube.
 Vector3 CubeSurface::Private::applyAngularFilter(const Vector3 & filterDir, float coneAngle, float * filterTable, int tableSize)
 {
     const float cosineConeAngle = cos(coneAngle);
     nvDebugCheck(cosineConeAngle >= 0);
 
     Vector3 color(0);
     float sum = 0;
 
     // Things I have tried to speed this up:
     // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere.
     // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case.
     // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex.
 
     // What AMD CubeMapGen does:
     // - Compute conservative bounds on the primary face, wrap around the adjacent faces.
 
 
     // For each texel of the input cube.
     for (uint f = 0; f < 6; f++) {
 
         // Test face cone agains filter cone.
         float cosineFaceAngle = dot(filterDir, faceNormals[f]);
         float faceAngle = acosf(cosineFaceAngle);
 
         if (faceAngle > coneAngle + atanf(sqrtf(2))) {
             // Skip face.
             continue;
         }
 
         const int L = I32(edgeLength-1);
         int x0 = 0, x1 = L;
         int y0 = 0, y1 = L;
 
 #if 0
         float u0 = -1;
         float u1 = 1;
         float v0 = -1;
         float v1 = 1;
 
         // @@ Compute uvs.
 
         // Expand uv coordinates from [-1,1] to [0, edgeLength)
         u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f;
         v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f;
         u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f;
         v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f;
         nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f);
         nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f);
         nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f);
         nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f);
 
         x0 = clamp(ifloor(u0), 0, L);
         y0 = clamp(ifloor(v0), 0, L);
         x1 = clamp(iceil(u1), 0, L);
         y1 = clamp(iceil(v1), 0, L);
 #endif
 
         nvDebugCheck(x1 >= x0);
         nvDebugCheck(y1 >= y0);
 
         if (x1 == x0 || y1 == y0) {
             // Skip this face.
             continue;
         }
 
 
         const Surface & inputFace = face[f];
         const FloatImage * inputImage = inputFace.m->image;
 
         for (int y = y0; y <= y1; y++) {
             bool inside = false;
             for (int x = x0; x <= x1; x++) {
 
                 Vector3 dir = texelTable->direction(f, x, y);
                 float cosineAngle = dot(dir, filterDir);
 
                 if (cosineAngle > cosineConeAngle) {
                     float solidAngle = texelTable->solidAngle(f, x, y);
                     //float scale = powf(saturate(cosineAngle), cosinePower);
                     
                     int idx = int(saturate(cosineAngle) * (tableSize - 1));
                     float scale = filterTable[idx]; // @@ Do bilinear interpolation?
 
                     float contribution = solidAngle * scale;
 
                     sum += contribution;
                     color.x += contribution * inputImage->pixel(0, x, y, 0);
                     color.y += contribution * inputImage->pixel(1, x, y, 0);
                     color.z += contribution * inputImage->pixel(2, x, y, 0);
 
                     inside = true;
                 }
                 else if (inside) {
                     // Filter scale is monotonic, if we have been inside once and we just exit, then we can skip the rest of the row.
                     // We could do the same thing for the columns and skip entire rows.
                     break;
                 }
             }
         }
     }
 
     color *= (1.0f / sum);
 
     return color;
 }
 
 // We want to find the alpha such that:
 // cos(alpha)^cosinePower = epsilon
 // That's: acos(epsilon^(1/cosinePower))
 
 // We can cull texels in two different ways:
 // - culling faces that do not touch the cone.
 // - computing one rectangle per face, find intersection between cone and face.
 // -
 
 // Other speedups:
 // - parallelize. Done.
 // - use ISPC?
 
 
 // Convolve filter against this cube.
 Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, float coneAngle, float cosinePower)
 {
     const float cosineConeAngle = cos(coneAngle);
     nvDebugCheck(cosineConeAngle >= 0);
 
     Vector3 color(0);
     float sum = 0;
 
     // Things I have tried to speed this up:
     // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere.
     // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case.
     // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex.
 
     // What AMD CubeMapGen does:
     // - Compute conservative bounds on the primary face, wrap around the adjacent faces.
 
 
     // For each texel of the input cube.
     for (uint f = 0; f < 6; f++) {
 
         // Test face cone agains filter cone.
         float cosineFaceAngle = dot(filterDir, faceNormals[f]);
         float faceAngle = acosf(cosineFaceAngle);
 
         if (faceAngle > coneAngle + atanf(sqrtf(2))) {
             // Skip face.
             continue;
         }
 
         const int L = I32(edgeLength-1);
         int x0 = 0, x1 = L;
         int y0 = 0, y1 = L;
 
 #if 0
         float u0 = -1;
         float u1 = 1;
         float v0 = -1;
         float v1 = 1;
 
         // @@ Compute uvs.
 
         // Expand uv coordinates from [-1,1] to [0, edgeLength)
         u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f;
         v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f;
         u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f;
         v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f;
         nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f);
         nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f);
         nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f);
         nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f);
 
         x0 = clamp(ifloor(u0), 0, L);
         y0 = clamp(ifloor(v0), 0, L);
         x1 = clamp(iceil(u1), 0, L);
         y1 = clamp(iceil(v1), 0, L);
 #endif
 
         nvDebugCheck(x1 >= x0);
         nvDebugCheck(y1 >= y0);
 
         if (x1 == x0 || y1 == y0) {
             // Skip this face.
             continue;
         }
 
 
         const Surface & inputFace = face[f];
         const FloatImage * inputImage = inputFace.m->image;
 
         for (int y = y0; y <= y1; y++) {
             bool inside = false;
             for (int x = x0; x <= x1; x++) {
 
                 Vector3 dir = texelTable->direction(f, x, y);
                 float cosineAngle = dot(dir, filterDir);
 
                 if (cosineAngle > cosineConeAngle) {
                     float solidAngle = texelTable->solidAngle(f, x, y);
                     float scale = powf(saturate(cosineAngle), cosinePower);
                     float contribution = solidAngle * scale;
 
                     sum += contribution;
                     color.x += contribution * inputImage->pixel(0, x, y, 0);
                     color.y += contribution * inputImage->pixel(1, x, y, 0);
                     color.z += contribution * inputImage->pixel(2, x, y, 0);
 
                     inside = true;
                 }
                 else if (inside) {
                     // Filter scale is monotonic, if we have been inside once and we just exit, then we can skip the rest of the row.
                     // We could do the same thing for the columns and skip entire rows.
                     break;
                 }
             }
         }
     }
 
     color *= (1.0f / sum);
 
     return color;
 }
 
 #include "nvthread/ParallelFor.h"
 
 struct ApplyAngularFilterContext {
     CubeSurface::Private * inputCube;
     CubeSurface::Private * filteredCube;
     float coneAngle;
     float * filterTable;
     int tableSize;
     EdgeFixup fixupMethod;
 };
 
 void ApplyAngularFilterTask(void * context, int id)
 {
     ApplyAngularFilterContext * ctx = (ApplyAngularFilterContext *)context;
 
     int size = ctx->filteredCube->edgeLength;
 
     int f = id / (size * size);
     int idx = id % (size * size);
     int y = idx / size;
     int x = idx % size;
 
     nvtt::Surface & filteredFace = ctx->filteredCube->face[f];
     FloatImage * filteredImage = filteredFace.m->image;
 
     const Vector3 filterDir = texelDirection(f, x, y, size, ctx->fixupMethod);
 
     // Convolve filter against cube.
     Vector3 color = ctx->inputCube->applyAngularFilter(filterDir, ctx->coneAngle, ctx->filterTable, ctx->tableSize);
 
     filteredImage->pixel(0, idx) = color.x;
     filteredImage->pixel(1, idx) = color.y;
     filteredImage->pixel(2, idx) = color.z;
 }
 
 
 CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, EdgeFixup fixupMethod) const
 {
     // Allocate output cube.
     CubeSurface filteredCube;
     filteredCube.m->allocate(size);
 
     // Texel table is stored along with the surface so that it's compute only once.
     m->allocateTexelTable();
 
     const float threshold = 0.001f;
     const float coneAngle = acosf(powf(threshold, 1.0f/cosinePower));
 
 
     // For each texel of the output cube.
     /*for (uint f = 0; f < 6; f++) {
         nvtt::Surface filteredFace = filteredCube.m->face[f];
         FloatImage * filteredImage = filteredFace.m->image;
 
         for (uint y = 0; y < uint(size); y++) {
             for (uint x = 0; x < uint(size); x++) {
 
                 const Vector3 filterDir = texelDirection(f, x, y, size, fixupMethod);
 
                 // Convolve filter against cube.
                 Vector3 color = m->applyCosinePowerFilter(filterDir, coneAngle, cosinePower);
 
                 filteredImage->pixel(0, x, y, 0) = color.x;
                 filteredImage->pixel(1, x, y, 0) = color.y;
                 filteredImage->pixel(2, x, y, 0) = color.z;
             }
         }
     }*/
 
     ApplyAngularFilterContext context;
     context.inputCube = m;
     context.filteredCube = filteredCube.m;
     context.coneAngle = coneAngle;
     context.fixupMethod = fixupMethod;
 
     context.tableSize = 512;
     context.filterTable = new float[context.tableSize];
 
     // @@ Instead of looking up table between [0 - 1] we should probably use [cos(coneAngle), 1]
 
     for (int i = 0; i < context.tableSize; i++) {
         float f = float(i) / (context.tableSize - 1);
         context.filterTable[i] = powf(f, cosinePower);
     }
     
 
     nv::ParallelFor parallelFor(ApplyAngularFilterTask, &context);
     parallelFor.run(6 * size * size);
 
     // @@ Implement edge averaging.
     if (fixupMethod == EdgeFixup_Average) {
         for (uint f = 0; f < 6; f++) {
             nvtt::Surface filteredFace = filteredCube.m->face[f];
             FloatImage * filteredImage = filteredFace.m->image;
 
             // For each component.
             for (uint c = 0; c < 3; c++) {
                 // @@ For each corner, sample the two adjacent faces.
                 filteredImage->pixel(c, 0, 0, 0);
                 filteredImage->pixel(c, size-1, 0, 0);
                 filteredImage->pixel(c, 0, size-1, 0);
                 filteredImage->pixel(c, size-1, size-1, 0);
 
                 // @@ For each edge, sample the adjacent face.
 
             }
         }
     }
 
     return filteredCube;
 }
 
 
 // Sample cubemap in the given direction.
 Vector3 CubeSurface::Private::sample(const Vector3 & dir)
 {
     int f = -1;
     if (fabs(dir.x) > fabs(dir.y) && fabs(dir.x) > fabs(dir.z)) {
         if (dir.x > 0) f = 0;
         else f = 1;
     }
     else if (fabs(dir.y) > fabs(dir.z)) {
         if (dir.y > 0) f = 2;
         else f = 3;
     }
     else {
         if (dir.z > 0) f = 4;
         else f = 5;
     }
     nvDebugCheck(f != -1);
 
     // uv coordinates corresponding to filterDir.
     float u = dot(dir, faceU[f]);
     float v = dot(dir, faceV[f]);
 
     FloatImage * img = face[f].m->image;
 
     Vector3 color;
     color.x = img->sampleLinearClamp(0, u, v);
     color.y = img->sampleLinearClamp(1, u, v);
     color.z = img->sampleLinearClamp(2, u, v);
 
     return color;
 }
 
 // @@ Not tested!
 CubeSurface CubeSurface::fastResample(int size, EdgeFixup fixupMethod) const
 {
     // Allocate output cube.
     CubeSurface resampledCube;
     resampledCube.m->allocate(size);
 
     // For each texel of the output cube.
     for (uint f = 0; f < 6; f++) {
         nvtt::Surface resampledFace = resampledCube.m->face[f];
         FloatImage * resampledImage = resampledFace.m->image;
 
         for (uint y = 0; y < uint(size); y++) {
             for (uint x = 0; x < uint(size); x++) {
 
                 const Vector3 filterDir = texelDirection(f, x, y, size, fixupMethod);
 
                 Vector3 color = m->sample(filterDir);
 
                 resampledImage->pixel(0, x, y, 0) = color.x;
                 resampledImage->pixel(1, x, y, 0) = color.y;
                 resampledImage->pixel(2, x, y, 0) = color.z;
             }
         }
     }
 
     // @@ Implement edge averaging. Share this code with cosinePowerFilter
     if (fixupMethod == EdgeFixup_Average) {
     }
 
     return resampledCube;
 }
 
 
 void CubeSurface::toLinear(float gamma)
 {
     if (isNull()) return;
 
     detach();
 
     for (int i = 0; i < 6; i++) {
         m->face[i].toLinear(gamma);
     }
 }
 
 void CubeSurface::toGamma(float gamma)
 {
     if (isNull()) return;
 
     detach();
 
     for (int i = 0; i < 6; i++) {
         m->face[i].toGamma(gamma);
     }
 }
 
 
 #if 0
 // @@ Provide solar azimuth.
 #include "ArHoseSkyModel.h"
 void CubeSurface::sky(float turbidity, float albedo[3], float solarElevation) {
 
     ArHosekSkyModelState * skymodel_state[3];
 
     for (int i = 0; i < num_channels; i++) {
         skymodel_state[i] = arhosekskymodelstate_alloc_init(turbidity, albedo[i], solarElevation);
     }
 
     // 700 nm (red), 546.1 nm (green) and 435.8 nm (blue).
     float channel_center[3] = {
-        700,    // Red 620–740,
-        546.1,  // Green 520–570,
-        435.8,  // Blue 450–490,
+        700,    // Red 620-740,
+        546.1,  // Green 520-570,
+        435.8,  // Blue 450-490,
     };
 
     // @@ For each pixel:
     // What's the channel center for the RGB model?
     double  skydome_result[3];
     for (unsigned int i = 0; i < num_channels; i++) {
         skydome_result[i] = arhosekskymodel_radiance(skymodel_state[i], theta, gamma, channel_center[i]);
     }
 
     for (int i = 0; i < num_channels; i++) {
         arhosek_skymodelstate_free(skymodel_state[i]);
     }
 
     /*
     ArHosekXYZSkyModelState * skymodel_state[3];
 
     for (int i = 0; i < num_channels; i++) {
         skymodel_state[i] = arhosek_xyz_skymodelstate_alloc_init(turbidity, albedo[i], solarElevation);
     }
 
     // @@ For each pixel.
     double  skydome_result[3];
     for (unsigned int i = 0; i < num_channels; i++) {
         skydome_result[i] = arhosek_xyz_skymodel_radiance(skymodel_state[i], theta, gamma, i);
     }
 
     for (int i = 0; i < num_channels; i++) {
         arhosek_xyz_skymodelstate_free(skymodel_state[i]);
     }
     */
 }
-#endif
\ No newline at end of file
+#endif