Index: ps/trunk/binaries/data/mods/public/gui/credits/texts/programming.json
===================================================================
--- ps/trunk/binaries/data/mods/public/gui/credits/texts/programming.json	(revision 27269)
+++ ps/trunk/binaries/data/mods/public/gui/credits/texts/programming.json	(revision 27270)
@@ -1,310 +1,311 @@
 {
 	"Title": "Programming",
 	"Content": [
 		{
 			"Title": "Programming managers",
 			"List": [
 				{ "nick": "Acumen", "name": "Stuart Walpole" },
 				{ "nick": "Dak Lozar", "name": "Dave Loeser" },
 				{ "nick": "h20", "name": "Daniel Wilhelm" },
 				{ "nick": "Janwas", "name": "Jan Wassenberg" },
 				{ "nick": "Raj", "name": "Raj Sharma" }
 			]
 		},
 		{
 			"Subtitle": "Special thanks to",
 			"List": [
 				{ "nick": "leper", "name": "Georg Kilzer" },
 				{ "nick": "Ykkrosh", "name": "Philip Taylor" }
 			]
 		},
 		{
 			"List": [
 				{ "nick": "01d55" },
 				{ "nick": "aBothe", "name": "Alexander Bothe" },
 				{ "nick": "Acumen", "name": "Stuart Walpole" },
 				{ "nick": "adrian", "name": "Adrian Boguszewszki" },
 				{ "name": "Adrian Fatol" },
 				{ "nick": "AI-Amsterdam" },
 				{ "nick": "Alan", "name": "Alan Kemp" },
 				{ "nick": "Alex", "name": "Alexander Yakobovich" },
 				{ "nick": "alpha123", "name": "Peter P. Cannici" },
 				{ "nick": "alre" },
 				{ "nick": "Ampaex", "name": "Antonio Vazquez" },
 				{ "name": "André Puel" },
 				{ "nick": "andy5995", "name": "Andy Alt" },
 				{ "nick": "Angen" },
 				{ "nick": "Arfrever", "name": "Arfrever Frehtes Taifersar Arahesis" },
 				{ "nick": "ArnH", "name": "Arno Hemelhof" },
 				{ "nick": "Aurium", "name": "Aurélio Heckert" },
 				{ "nick": "azayrahmad", "name": "Aziz Rahmad" },
 				{ "nick": "baco", "name": "Dionisio E Alonso" },
 				{ "nick": "badmadblacksad", "name": "Martin F" },
 				{ "nick": "badosu", "name": "Amadeus Folego" },
 				{ "nick": "bb", "name": "Bouke Jansen" },
 				{ "nick": "Bellaz89", "name": "Andrea Bellandi" },
 				{ "nick": "Ben", "name": "Ben Vinegar" },
 				{ "nick": "Bird" },
 				{ "nick": "Blue", "name": "Richard Welsh" },
 				{ "nick": "bmwiedemann" },
 				{ "nick": "boeseRaupe", "name": "Michael Kluge" },
 				{ "nick": "bog_dan_ro", "name": "BogDan Vatra" },
 				{ "nick": "Bonk", "name": "Christopher Ebbert" },
 				{ "nick": "Boudica" },
 				{ "nick": "Caius", "name": "Lars Kemmann" },
 				{ "nick": "Calefaction", "name": "Matt Holmes" },
 				{ "nick": "Calvinh", "name": "Carl-Johan Höiby" },
 				{ "nick": "causative", "name": "Bart Parkis" },
 				{ "name": "Cédric Houbart" },
 				{ "nick": "Ceres" },
 				{ "nick": "Chakakhan", "name": "Kenny Long" },
 				{ "nick": "Clockwork-Muse", "name": "Stephen A. Imhoff" },
 				{ "nick": "cpc", "name": "Clément Pit-Claudel" },
 				{ "nick": "Cracker78", "name": "Chad Heim" },
 				{ "nick": "Crynux", "name": "Stephen J. Fewer" },
 				{ "nick": "cwprogger" },
 				{ "nick": "cygal", "name": "Quentin Pradet" },
 				{ "nick": "Dak Lozar", "name": "Dave Loeser" },
 				{ "nick": "dalerank", "name": "Sergey Kushnirenko" },
 				{ "nick": "dan", "name": "Dan Strandberg" },
 				{ "nick": "DanCar", "name": "Daniel Cardenas" },
 				{ "nick": "danger89", "name": "Melroy van den Berg" },
 				{ "name": "Daniel Trevitz" },
 				{ "nick": "Dariost", "name": "Dario Ostuni" },
 				{ "nick": "Dave", "name": "David Protasowski" },
 				{ "name": "David Marshall" },
 				{ "nick": "dax", "name": "Dacian Fiordean" },
 				{ "nick": "deebee", "name": "Deepak Anthony" },
 				{ "nick": "Deiz" },
 				{ "nick": "Dietger", "name": "Dietger van Antwerpen" },
 				{ "nick": "DigitalSeraphim", "name": "Nick Owens" },
 				{ "nick": "dp304" },
 				{ "nick": "dpiquet", "name": "Damien Piquet" },
 				{ "nick": "dumbo" },
 				{ "nick": "Dunedan", "name": "Daniel Roschka" },
 				{ "nick": "dvangennip", "name": "Doménique" },
 				{ "nick": "DynamoFox" },
 				{ "nick": "Echelon9", "name": "Rhys Kidd" },
 				{ "nick": "echotangoecho" },
 				{ "nick": "edoput", "name": "Edoardo Putti"},
 				{ "nick": "eihrul", "name": "Lee Salzman" },
 				{ "nick": "elexis", "name": "Alexander Heinsius" },
 				{ "nick": "EmjeR", "name": "Matthijs de Rijk" },
 				{ "nick": "EMontana" },
 				{ "nick": "ericb" },
 				{ "nick": "evanssthomas", "name": "Evans Thomas" },
 				{ "nick": "Evulant", "name": "Alexander S." },
 				{ "nick": "fabio", "name": "Fabio Pedretti" },
 				{ "nick": "falsevision", "name": "Mahdi Khodadadifard" },
 				{ "nick": "fatherbushido", "name": "Nicolas Tisserand" },
+				{ "nick": "Fatton", "name": "Alexey Beloyarov" },
 				{ "nick": "fcxSanya", "name": "Alexander Olkhovskiy" },
 				{ "nick": "FeXoR", "name": "Florian Finke" },
 				{ "nick": "Fire Giant", "name": "Malte Schwarzkopf" },
 				{ "name": "Fork AD" },
 				{ "nick": "fpre", "name": "Frederick Stallmeyer" },
 				{ "nick": "Freagarach" },
 				{ "nick": "freenity", "name": "Anton Galitch" },
 				{ "nick": "Gallaecio", "name": "Adrián Chaves" },
 				{ "nick": "gbish (aka Iny)", "name": "Grant Bishop" },
 				{ "nick": "Gee", "name": "Gustav Larsson" },
 				{ "nick": "Gentz", "name": "Hal Gentz" },
 				{ "nick": "gerbilOFdoom" },
 				{ "nick": "godlikeldh" },
 				{ "nick": "greybeard", "name": "Joe Cocovich" },
 				{ "nick": "grillaz" },
 				{ "nick": "Grugnas", "name": "Giuseppe Tranchese" },
 				{ "nick": "gudo" },
 				{ "nick": "Guuts", "name": "Matthew Guttag" },
 				{ "nick": "h20", "name": "Daniel Wilhelm" },
 				{ "nick": "Hannibal_Barca", "name": "Clive Juhász S." },
 				{ "nick": "Haommin" },
 				{ "nick": "happyconcepts", "name": "Ben Bird" },
 				{ "nick": "historic_bruno", "name": "Ben Brian" },
 				{ "nick": "hyiltiz", "name": "Hormet Yiltiz" },
 				{ "nick": "idanwin" },
 				{ "nick": "Imarok", "name": "J. S." },
 				{ "nick": "Inari" },
 				{ "nick": "infyquest", "name": "Vijay Kiran Kamuju" },
 				{ "nick": "irishninja", "name": "Brian Broll" },
 				{ "nick": "IronNerd", "name": "Matthew McMullan" },
 				{ "nick": "Itms", "name": "Nicolas Auvray" },
 				{ "nick": "Jaison", "name": "Marco tom Suden" },
 				{ "nick": "jammus", "name": "James Scott" },
 				{ "nick": "Jammyjamjamman", "name": "James Sherratt" },
 				{ "nick": "Janwas", "name": "Jan Wassenberg" },
 				{ "nick": "javiergodas", "name": "Javier Godas Vieitez" },
 				{ "nick": "JCWasmx86" },
 				{ "nick": "Jgwman" },
 				{ "nick": "JonBaer", "name": "Jon Baer" },
 				{ "nick": "Josh", "name": "Joshua J. Bakita" },
 				{ "nick": "joskar", "name": "Johnny Oskarsson" },
 				{ "nick": "jP_wanN", "name": "Jonas Platte" },
 				{ "nick": "jprahman", "name": "Jason Rahman" },
 				{ "nick": "Jubalbarca", "name": "James Baillie" },
 				{ "nick": "JubJub", "name": "Sebastian Vetter" },
 				{ "nick": "jurgemaister" },
 				{ "nick": "kabzerek", "name": "Grzegorz Kabza" },
 				{ "nick": "Kai", "name": "Kai Chen" },
 				{ "nick": "kalev", "name": "Kalev Lember" },
 				{ "name": "Kareem Ergawy" },
 				{ "nick": "karmux", "name": "Karmo Rosental" },
 				{ "nick": "kevmo", "name": "Kevin Caffrey" },
 				{ "nick": "kezz", "name": "Graeme Kerry" },
 				{ "nick": "kingadami", "name": "Adam Winsor" },
 				{ "nick": "kingbasil", "name": "Giannis Fafalios" },
 				{ "nick": "Krinkle", "name": "Timo Tijhof" },
 				{ "nick": "Kuba386", "name": "Jakub Kośmicki" },
 				{ "nick": "lafferjm", "name": "Justin Lafferty" },
 				{ "nick": "Langbart" },
 				{ "nick": "LeanderH", "name": "Leander Hemelhof" },
 				{ "nick": "leper", "name": "Georg Kilzer" },
 				{ "nick": "Link Mauve", "name": "Emmanuel Gil Peyrot" },
 				{ "nick": "LittleDev" },
 				{ "nick": "livingaftermidnight", "name": "Will Dull" },
 				{ "nick": "lonehawk", "name": "Vignesh Krishnan" },
 				{ "nick": "Louhike" },
 				{ "nick": "lsdh" },
 				{ "nick": "Ludovic", "name": "Ludovic Rousseau" },
 				{ "nick": "luiko", "name": "Luis Carlos Garcia Barajas" },
 				{ "nick": "m0l0t0ph", "name": "Christoph Gielisch" },
 				{ "nick": "madmax", "name": "Abhijit Nandy" },
 				{ "nick": "madpilot", "name": "Guido Falsi" },
 				{ "nick": "mammadori", "name": "Marco Amadori" },
 				{ "nick": "marder", "name": "Stefan R. F." },
 				{ "nick": "markcho" },
 				{ "nick": "MarkT", "name": "Mark Thompson" },
 				{ "nick": "Markus" },
 				{ "nick": "Mate-86", "name": "Mate Kovacs" },
 				{ "nick": "Matei", "name": "Matei Zaharia" },
 				{ "nick": "MatSharrow" },
 				{ "nick": "MattDoerksen", "name": "Matt Doerksen" },
 				{ "nick": "mattlott", "name": "Matt Lott" },
 				{ "nick": "maveric", "name": "Anton Protko" },
 				{ "nick": "Micnasty", "name": "Travis Gorkin" },
 				{ "name": "Mikołaj \"Bajter\" Korcz" },
 				{ "nick": "mimo" },
 				{ "nick": "mk12", "name": "Mitchell Kember" },
 				{ "nick": "mmayfield45", "name": "Michael Mayfield" },
 				{ "nick": "mmoanis", "name": "Mohamed Moanis" },
 				{ "nick": "Molotov", "name": "Dario Alvarez" },
 				{ "nick": "mpmoreti", "name": "Marcos Paulo Moreti" },
 				{ "nick": "mreiland", "name": "Michael Reiland" },
 				{ "nick": "myconid" },
 				{ "nick": "n1xc0d3r", "name": "Luis Guerrero" },
 				{ "nick": "nani", "name": "S. N." },
 				{ "nick": "nd3c3nt", "name": "Gavin Fowler" },
 				{ "nick": "nephele" },
 				{ "nick": "Nescio" },
 				{ "nick": "niektb", "name": "Niek ten Brinke" },
 				{ "nick": "nikagra", "name": "Mikita Hradovich" },
 				{ "nick": "njm" },
 				{ "nick": "NoMonkey", "name": "John Mena" },
 				{ "nick": "norsnor" },
 				{ "nick": "notpete", "name": "Rich Cross" },
 				{ "nick": "Nullus" },
 				{ "nick": "nwtour" },
 				{ "nick": "odoaker", "name": "Ágoston Sipos" },
 				{ "nick": "Offensive ePeen", "name": "Jared Ryan Bills" },
 				{ "nick": "Ols", "name": "Oliver Whiteman" },
 				{ "nick": "olsner", "name": "Simon Brenner" },
 				{ "nick": "OptimusShepard", "name": "Pirmin Stanglmeier" },
 				{ "nick": "otero" },
 				{ "nick": "Palaxin", "name": "David A. Freitag" },
 				{ "name": "Paul Withers" },
 				{ "nick": "paulobezerr", "name": "Paulo George Gomes Bezerra" },
 				{ "nick": "pcpa", "name": "Paulo Andrade" },
 				{ "nick": "Pendingchaos" },
 				{ "nick": "PeteVasi", "name": "Pete Vasiliauskas" },
 				{ "nick": "phosit" },
 				{ "nick": "pilino1234" },
 				{ "nick": "PingvinBetyar", "name": "Schronk Tamás" },
 				{ "nick": "plugwash", "name": "Peter Michael Green" },
 				{ "nick": "Polakrity" },
 				{ "nick": "Poya", "name": "Poya Manouchehri" },
 				{ "nick": "prefect", "name": "Nicolai Hähnle" },
 				{ "nick": "Prodigal Son" },
 				{ "nick": "pstumpf", "name": "Pascal Stumpf" },
 				{ "nick": "pszemsza", "name": "Przemek Szałaj" },
 				{ "nick": "pyrolink", "name": "Andrew Decker" },
 				{ "nick": "quantumstate", "name": "Jonathan Waller" },
 				{ "nick": "QuickShot", "name": "Walter Krawec" },
 				{ "nick": "quonter" },
 				{ "nick": "qwertz" },
 				{ "nick": "Radagast" },
 				{ "nick": "Raj", "name": "Raj Sharma" },
 				{ "nick": "ramtzok1", "name": "Ram" },
 				{ "nick": "rapidelectron", "name": "Christian Weihsbach" },
 				{ "nick": "r-a-sattarov", "name": "Ramil Sattarov" },
 				{ "nick": "RedFox", "name": "Jorma Rebane" },
 				{ "nick": "RefinedCode" },
 				{ "nick": "Riemer" },
 				{ "name": "Rolf Sievers" },
 				{ "nick": "s0600204", "name": "Matthew Norwood" },
 				{ "nick": "sacha_vrand", "name": "Sacha Vrand" },
 				{ "nick": "SafaAlfulaij" },
 				{ "name": "Samuel Guarnieri" },
 				{ "nick": "Samulis", "name": "Sam Gossner" },
 				{ "nick": "Sandarac" },
 				{ "nick": "sanderd17", "name": "Sander Deryckere" },
 				{ "nick": "sathyam", "name": "Sathyam Vellal" },
 				{ "nick": "sbirmi", "name": "Sharad Birmiwal" },
 				{ "nick": "sbte", "name": "Sven Baars" },
 				{ "nick": "scroogie", "name": "André Gemünd" },
 				{ "nick": "scythetwirler", "name": "Casey X." },
 				{ "nick": "sera", "name": "Ralph Sennhauser" },
 				{ "nick": "serveurix" },
 				{ "nick": "Shane", "name": "Shane Grant" },
 				{ "nick": "shh" },
 				{ "nick": "Silk", "name": "Josh Godsiff" },
 				{ "nick": "silure" },
 				{ "nick": "Simikolon", "name": "Yannick & Simon" },
 				{ "nick": "smiley", "name": "M. L." },
 				{ "nick": "Spahbod", "name": "Omid Davoodi" },
 				{ "nick": "Stan", "name": "Stanislas Dolcini" },
 				{ "nick": "Stefan" },
 				{ "nick": "StefanBruens", "name": "Stefan Brüns" },
 				{ "nick": "stilz", "name": "Sławomir Zborowski" },
 				{ "nick": "stwf", "name": "Steven Fuchs" },
 				{ "nick": "svott", "name": "Sven Ott" },
 				{ "nick": "t4nk004" },
 				{ "nick": "tau" },
 				{ "nick": "tbm", "name": "Martin Michlmayr" },
 				{ "nick": "Teiresias" },
 				{ "nick": "temple" },
 				{ "nick": "texane" },
 				{ "nick": "thamlett", "name": "Timothy Hamlett" },
 				{ "nick": "thedrunkyak", "name": "Dan Fuhr" },
 				{ "nick": "Tobbi" },
 				{ "nick": "Toonijn", "name": "Toon Baeyens" },
 				{ "nick": "TrinityDeath", "name": "Jethro Lu" },
 				{ "nick": "triumvir", "name": "Corin Schedler" },
 				{ "nick": "trompetin17", "name": "Juan Guillermo" },
 				{ "nick": "tpearson", "name": "Timothy Pearson" },
 				{ "nick": "user1", "name": "A. C." },
 				{ "nick": "usey11" },
 				{ "nick": "vincent_c", "name": "Vincent Cheng" },
 				{ "nick": "vinhig", "name": "Vincent Higginson" },
 				{ "nick": "vladislavbelov", "name": "Vladislav Belov" },
 				{ "nick": "voroskoi" },
 				{ "nick": "vts", "name": "Jeroen DR" },
 				{ "nick": "wacko", "name": "Andrew Spiering" },
 				{ "nick": "WhiteTreePaladin", "name": "Brian Ashley" },
 				{ "nick": "wowgetoffyourcellphone", "name": "Justus Avramenko" },
 				{ "nick": "wraitii", "name": "Lancelot de Ferrière le Vayer" },
 				{ "nick": "Xentelian", "name": "Mark Strawson" },
 				{ "nick": "Xienen", "name": "Dayle Flowers" },
 				{ "nick": "xone47", "name": "Brent Johnson" },
 				{ "nick": "xtizer", "name": "Matt Green" },
 				{ "nick": "yashi", "name": "Yasushi Shoji" },
 				{ "nick": "Ykkrosh", "name": "Philip Taylor" },
 				{ "nick": "Yves" },
 				{ "nick": "z0rg", "name": "Sébastien Maire" },
 				{ "nick": "Zeusthor", "name": "Jeffrey Tavares" },
 				{ "nick": "zoot" },
 				{ "nick": "zsol", "name": "Zsolt Dollenstein" },
 				{ "nick": "ztamas", "name": "Tamas Zolnai" },
 				{ "nick": "Zyi", "name": "Charles De Meulenaer" }
 			]
 		}
 	]
 }
Index: ps/trunk/libraries/source/nvtt/build.sh
===================================================================
--- ps/trunk/libraries/source/nvtt/build.sh	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/build.sh	(revision 27270)
@@ -1,71 +1,71 @@
 #!/bin/sh
 set -e
-LIB_VERSION="nvtt-2.1.1+wildfiregames.4"
+LIB_VERSION="nvtt-2.1.1+wildfiregames.5"
 JOBS=${JOBS:="-j2"}
 MAKE=${MAKE:="make"}
 LDFLAGS=${LDFLAGS:=""}
 CFLAGS=${CFLAGS:=""}
 CXXFLAGS=${CXXFLAGS:=""}
 CMAKE_FLAGS=${CMAKE_FLAGS:=""}
 
 if [ -e .already-built ] && [ "$(cat .already-built)" = "${LIB_VERSION}" ]
 then
   echo "NVTT is already up to date."
   exit
 fi
 
 echo "Building NVTT..."
 echo
 
 rm -f .already-built
 rm -f lib/*.a
 rm -rf src/build/
 mkdir -p src/build/
 cd src/build/
 
 if [ "$(uname -s)" = "Darwin" ]; then
   # Could use CMAKE_OSX_DEPLOYMENT_TARGET and CMAKE_OSX_SYSROOT
   # but they're not as flexible for cross-compiling
   # Disable png support (avoids some conflicts with MacPorts)
   cmake .. \
     -DCMAKE_LINK_FLAGS="$LDFLAGS" \
     -DCMAKE_C_FLAGS="$CFLAGS" \
     -DCMAKE_CXX_FLAGS="$CXXFLAGS" \
     -DCMAKE_BUILD_TYPE=Release \
     $CMAKE_FLAGS \
     -DBINDIR=bin \
     -DLIBDIR=lib \
     -DPNG=0 \
     -G "Unix Makefiles"
 else
   cmake .. \
     -DCMAKE_LINK_FLAGS="$LDFLAGS" \
     -DCMAKE_C_FLAGS="$CFLAGS" \
     -DCMAKE_CXX_FLAGS="$CXXFLAGS" \
     -DCMAKE_BUILD_TYPE=Release \
     -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
     $CMAKE_FLAGS \
     -DNVTT_SHARED=1 \
     -DOpenGL_GL_PREFERENCE=GLVND \
     -DBINDIR=bin \
     -DLIBDIR=lib \
     -G "Unix Makefiles"
 fi
 
 ("${MAKE}" clean && "${MAKE}" nvtt "${JOBS}") || die "NVTT build failed"
 cd ../../
 mkdir -p lib/
 LIB_PREFIX=lib
 
 if [ "$(uname -s)" = "Darwin" ]; then
   LIB_EXTN=a
   cp src/build/src/bc*/"${LIB_PREFIX}"bc*."${LIB_EXTN}" lib/
   cp src/build/src/nvtt/squish/"${LIB_PREFIX}"squish."${LIB_EXTN}" lib/
 else
   LIB_EXTN=so
   cp src/build/src/nv*/"${LIB_PREFIX}"nv*."${LIB_EXTN}" ../../../binaries/system/
 fi
 
 cp src/build/src/nv*/"${LIB_PREFIX}"nv*."${LIB_EXTN}" lib/
 
 echo "$LIB_VERSION" > .already-built
Index: ps/trunk/libraries/source/nvtt/src/extern/poshlib/posh.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/extern/poshlib/posh.h	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/src/extern/poshlib/posh.h	(revision 27270)
@@ -1,1034 +1,1050 @@
 /**
 @file posh.h
 @author Brian Hook
 @version 1.3.001
 
 Header file for POSH, the Portable Open Source Harness project.
 
 NOTE: Unlike most header files, this one is designed to be included
 multiple times, which is why it does not have the @#ifndef/@#define
 preamble.
 
 POSH relies on environment specified preprocessor symbols in order
 to infer as much as possible about the target OS/architecture and
 the host compiler capabilities.
 
 NOTE: POSH is simple and focused. It attempts to provide basic
 functionality and information, but it does NOT attempt to emulate
 missing functionality.  I am also not willing to make POSH dirty
 and hackish to support truly ancient and/or outmoded and/or bizarre
 technologies such as non-ANSI compilers, systems with non-IEEE
 floating point formats, segmented 16-bit operating systems, etc.
 
 Please refer to the accompanying HTML documentation or visit
 http://www.poshlib.org for more information on how to use POSH.
 
 LICENSE:
 
 Copyright (c) 2004, Brian Hook
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
 met:
 
     * Redistributions of source code must retain the above copyright
       notice, this list of conditions and the following disclaimer.
 
     * Redistributions in binary form must reproduce the above
       copyright notice, this list of conditions and the following
       disclaimer in the documentation and/or other materials provided
       with the distribution.
 
     * The names of this package'ss contributors contributors may not
       be used to endorse or promote products derived from this
       software without specific prior written permission.
 
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 REVISION:
 
 I've been lax about revision histories, so this starts at, um, 1.3.001.
 Sorry for any inconveniences.
 
 1.3.001 - 2/23/2006 - Incorporated fix for bug reported by Bill Cary,
                       where I was not detecting Visual Studio
                       compilation on x86-64 systems.  Added check for
                       _M_X64 which should fix that.
 
 */
 /*
 I have yet to find an authoritative reference on preprocessor
 symbols, but so far this is what I've gleaned:
 
 GNU GCC/G++:
    - __GNUC__: GNU C version
    - __GNUG__: GNU C++ compiler
    - __sun__ : on Sun platforms
    - __svr4__: on Solaris and other SysV R4 platforms
    - __mips__: on MIPS processor platforms
    - __sparc_v9__: on Sparc 64-bit CPUs
    - __sparcv9: 64-bit Solaris
    - __MIPSEL__: mips processor, compiled for little endian
    - __MIPSEB__: mips processor, compiled for big endian
    - _R5900: MIPS/Sony/Toshiba R5900 (PS2)
    - mc68000: 68K
    - m68000: 68K
    - m68k: 68K
    - __palmos__: PalmOS
 
 Intel C/C++ Compiler:
    - __ECC      : compiler version, IA64 only
    - __EDG__
    - __ELF__
    - __GXX_ABI_VERSION
    - __i386     : IA-32 only
    - __i386__   : IA-32 only
    - i386       : IA-32 only
    - __ia64     : IA-64 only
    - __ia64__   : IA-64 only
    - ia64       : IA-64 only
    - __ICC      : IA-32 only
    - __INTEL_COMPILER : IA-32 or IA-64, newer versions only
 
 Apple's C/C++ Compiler for OS X:
    - __APPLE_CC__
    - __APPLE__
    - __BIG_ENDIAN__
    - __APPLE__
    - __ppc__
    - __MACH__
 
 DJGPP:
    - __MSDOS__
    - __unix__
    - __unix
    - __GNUC__
    - __GO32
    - DJGPP
    - __i386, __i386, i386
 
 Cray's C compiler:
    - _ADDR64: if 64-bit pointers
    - _UNICOS: 
    - __unix:
 
 SGI's CC compiler predefines the following (and more) with -ansi:
    - __sgi
    - __unix
    - __host_mips
    - _SYSTYPE_SVR4
    - __mips
    - _MIPSEB
    - anyone know if there is a predefined symbol for the compiler?!
 
 MinGW:
    - as GnuC but also defines _WIN32, __WIN32, WIN32, _X86_, __i386, __i386__, and several others
    - __MINGW32__
 
 Cygwin:
    - as Gnu C, but also
    - __unix__
    - __CYGWIN32__
 
 Microsoft Visual Studio predefines the following:
    - _MSC_VER
    - _WIN32: on Win32
    - _M_IX6 (on x86 systems)
    - _M_X64: on x86-64 systems
    - _M_ALPHA (on DEC AXP systems)
    - _SH3: WinCE, Hitachi SH-3
    - _MIPS: WinCE, MIPS
    - _ARM: WinCE, ARM
 
 Sun's C Compiler:
    - sun and _sun
    - unix and _unix
    - sparc and _sparc (SPARC systems only)
    - i386 and _i386 (x86 systems only)
    - __SVR4 (Solaris only)
    - __sparcv9: 64-bit solaris
    - __SUNPRO_C
    - _LP64: defined in 64-bit LP64 mode, but only if <sys/types.h> is included
 
 Borland C/C++ predefines the following:
    - __BORLANDC__:
 
 DEC/Compaq C/C++ on Alpha:
    - __alpha
    - __arch64__
    - __unix__ (on Tru64 Unix)
    - __osf__
    - __DECC
    - __DECCXX (C++ compilation)
    - __DECC_VER
    - __DECCXX_VER
 
 IBM's AIX compiler:
    - __64BIT__ if 64-bit mode
    - _AIX
    - __IBMC__: C compiler version
    - __IBMCPP__: C++ compiler version
    - _LONG_LONG: compiler allows long long
 
 Watcom:
    - __WATCOMC__
    - __DOS__ : if targeting DOS
    - __386__ : if 32-bit support
    - __WIN32__ : if targetin 32-bit Windows
 
 HP-UX C/C++ Compiler:
    - __hpux
    - __unix
    - __hppa (on PA-RISC)
    - __LP64__: if compiled in 64-bit mode
 
 Metrowerks:
    - __MWERKS__
    - __powerpc__
    - _powerc
    - __MC68K__
    - macintosh when compiling for MacOS
    - __INTEL__ for x86 targets
    - __POWERPC__
 
 LLVM:
    - __llvm__
    - __clang__
+
+MCST LCC (eLbrus Compiler Collection):
+   - __LCC__
+   - __MCST__
+   - __e2k__: on MCST E2K (Elbrus 2000) processor platforms
+   - __sparc__ and __sparc: on MCST R (SPARC v9) processor platforms
 */
 
 /*
 ** ----------------------------------------------------------------------------
 ** Include <limits.h> optionally
 ** ----------------------------------------------------------------------------
 */
 #ifdef POSH_USE_LIMITS_H
 #  include <limits.h>
 #endif
 
 /*
 ** ----------------------------------------------------------------------------
 ** Determine compilation environment
 ** ----------------------------------------------------------------------------
 */
 #if defined __ECC || defined __ICC || defined __INTEL_COMPILER
 #  define POSH_COMPILER_STRING "Intel C/C++"
 #  define POSH_COMPILER_INTEL 1
 #endif
 
 #if ( defined __host_mips || defined __sgi ) && !defined __GNUC__
 #  define POSH_COMPILER_STRING    "MIPSpro C/C++"
 #  define POSH_COMPILER_MIPSPRO 1 
 #endif
 
 #if defined __hpux && !defined __GNUC__
 #  define POSH_COMPILER_STRING "HP-UX CC"
 #  define POSH_COMPILER_HPCC 1 
 #endif
 
 #if defined __clang__
 #  define POSH_COMPILER_STRING "Clang"
 #  define POSH_COMPILER_CLANG 1
 #endif
 
 #if defined __GNUC__ && !defined __clang__
 #  define POSH_COMPILER_STRING "Gnu GCC"
 #  define POSH_COMPILER_GCC 1
 #endif
 
 #if defined __APPLE_CC__
    /* we don't define the compiler string here, let it be GNU */
 #  define POSH_COMPILER_APPLECC 1
 #endif
 
 #if defined __IBMC__ || defined __IBMCPP__
 #  define POSH_COMPILER_STRING "IBM C/C++"
 #  define POSH_COMPILER_IBM 1
 #endif
 
 #if defined _MSC_VER
 #  define POSH_COMPILER_STRING "Microsoft Visual C++"
 #  define POSH_COMPILER_MSVC 1
 #endif
 
 #if defined __SUNPRO_C
 #  define POSH_COMPILER_STRING "Sun Pro" 
 #  define POSH_COMPILER_SUN 1
 #endif
 
 #if defined __BORLANDC__
 #  define POSH_COMPILER_STRING "Borland C/C++"
 #  define POSH_COMPILER_BORLAND 1
 #endif
 
 #if defined __MWERKS__
 #  define POSH_COMPILER_STRING     "MetroWerks CodeWarrior"
 #  define POSH_COMPILER_METROWERKS 1
 #endif
 
 #if defined __DECC || defined __DECCXX
 #  define POSH_COMPILER_STRING "Compaq/DEC C/C++"
 #  define POSH_COMPILER_DEC 1
 #endif
 
 #if defined __WATCOMC__
 #  define POSH_COMPILER_STRING "Watcom C/C++"
 #  define POSH_COMPILER_WATCOM 1
 #endif
 
+#if defined __LCC__ && defined __MCST__
+   /* we don't define the compiler string here, let it be GNU */
+#  define POSH_COMPILER_MCST_LCC 1
+#endif
+
 #if !defined POSH_COMPILER_STRING
 #  define POSH_COMPILER_STRING "Unknown compiler"
 #endif
 
 /*
 ** ----------------------------------------------------------------------------
 ** Determine target operating system
 ** ----------------------------------------------------------------------------
 */
 #if defined linux || defined __linux__
 #  define POSH_OS_LINUX 1 
 #  define POSH_OS_STRING "Linux"
 #endif
 
 #if defined __FreeBSD__
 #  define POSH_OS_FREEBSD 1 
 #  define POSH_OS_STRING "FreeBSD"
 #endif
 
 #if defined __NetBSD__
 #  define POSH_OS_NETBSD 1
 #  define POSH_OS_STRING "NetBSD"
 #endif
 
 #if defined __OpenBSD__
 #  define POSH_OS_OPENBSD 1
 #  define POSH_OS_STRING "OpenBSD"
 #endif
 
 #if defined __CYGWIN32__
 #  define POSH_OS_CYGWIN32 1
 #  define POSH_OS_STRING "Cygwin"
 #endif
 
 #if defined GEKKO
 #  define POSH_OS_GAMECUBE
 #  define __powerpc__
 #  define POSH_OS_STRING "GameCube"
 #endif
 
 #if defined __MINGW32__
 #  define POSH_OS_MINGW 1
 #  define POSH_OS_STRING "MinGW"
 #endif
 
 #if defined GO32 && defined DJGPP && defined __MSDOS__
 #  define POSH_OS_GO32 1
 #  define POSH_OS_STRING "GO32/MS-DOS"
 #endif
 
 /* NOTE: make sure you use /bt=DOS if compiling for 32-bit DOS,
    otherwise Watcom assumes host=target */
 #if defined __WATCOMC__  && defined __386__ && defined __DOS__
 #  define POSH_OS_DOS32 1
 #  define POSH_OS_STRING "DOS/32-bit"
 #endif
 
 #if defined _UNICOS
 #  define POSH_OS_UNICOS 1
 #  define POSH_OS_STRING "UNICOS"
 #endif
 
 #if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
 #  define POSH_OS_OSX 1
 #  define POSH_OS_STRING "MacOS X"
 #endif
 
 #if defined __sun__ || defined sun || defined __sun || defined __solaris__
 #  if defined __SVR4 || defined __svr4__ || defined __solaris__
 #     define POSH_OS_STRING "Solaris"
 #     define POSH_OS_SOLARIS 1
 #  endif
 #  if !defined POSH_OS_STRING
 #     define POSH_OS_STRING "SunOS"
 #     define POSH_OS_SUNOS 1
 #  endif
 #endif
 
 #if defined __sgi__ || defined sgi || defined __sgi
 #  define POSH_OS_IRIX 1
 #  define POSH_OS_STRING "Irix"
 #endif
 
 #if defined __hpux__ || defined __hpux
 #  define POSH_OS_HPUX 1
 #  define POSH_OS_STRING "HP-UX"
 #endif
 
 #if defined _AIX
 #  define POSH_OS_AIX 1
 #  define POSH_OS_STRING "AIX"
 #endif
 
 #if ( defined __alpha && defined __osf__ )
 #  define POSH_OS_TRU64 1
 #  define POSH_OS_STRING "Tru64"
 #endif
 
 #if defined __BEOS__ || defined __beos__
 #  define POSH_OS_BEOS 1
 #  define POSH_OS_STRING "BeOS"
 #endif
 
 #if defined amiga || defined amigados || defined AMIGA || defined _AMIGA
 #  define POSH_OS_AMIGA 1
 #  define POSH_OS_STRING "Amiga"
 #endif
 
 #if defined __unix__
 #  define POSH_OS_UNIX 1 
 #  if !defined POSH_OS_STRING
 #     define POSH_OS_STRING "Unix-like(generic)"
 #  endif
 #endif
 
 #if defined _WIN32_WCE
 #  define POSH_OS_WINCE 1
 #  define POSH_OS_STRING "Windows CE"
 #endif
 
 #if defined _XBOX || defined _XBOX_VER
 #  define POSH_OS_XBOX 1
 #  define POSH_OS_STRING "XBOX"
 #endif
 
 #if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__
 #  define POSH_OS_WIN32 1
 #  if !defined POSH_OS_XBOX
 #     if defined _WIN64
 #        define POSH_OS_WIN64 1
 #        define POSH_OS_STRING "Win64"
 #     else
 #        if !defined POSH_OS_STRING
 #           define POSH_OS_STRING "Win32"
 #        endif
 #     endif
 #  endif
 #endif
 
 #if defined __palmos__
 #  define POSH_OS_PALM 1
 #  define POSH_OS_STRING "PalmOS"
 #endif
 
 #if defined THINK_C || defined macintosh
 #  define POSH_OS_MACOS 1
 #  define POSH_OS_STRING "MacOS"
 #endif
 
 /*
 ** -----------------------------------------------------------------------------
 ** Determine target CPU
 ** -----------------------------------------------------------------------------
 */
 
 #if defined GEKKO
 #  define POSH_CPU_PPC750 1
 #  define POSH_CPU_STRING "IBM PowerPC 750 (NGC)"
 #endif
 
 #if defined mc68000 || defined m68k || defined __MC68K__ || defined m68000
 #  define POSH_CPU_68K 1
 #  define POSH_CPU_STRING "MC68000"
 #endif
 
 #if defined __PPC__ || defined __POWERPC__  || defined powerpc || defined _POWER || defined __ppc__ || defined __powerpc__ || defined _M_PPC
 #  define POSH_CPU_PPC 1
 #  if !defined POSH_CPU_STRING
 #    if defined __powerpc64__
 #       define POSH_CPU_PPC64 1
 #       define POSH_CPU_STRING "PowerPC64"
 #    else
 #       define POSH_CPU_STRING "PowerPC"
 #    endif
 #  endif
 #endif
 
 #if defined _CRAYT3E || defined _CRAYMPP
 #  define POSH_CPU_CRAYT3E 1 /* target processor is a DEC Alpha 21164 used in a Cray T3E*/
 #  define POSH_CPU_STRING "Cray T3E (Alpha 21164)"
 #endif
 
 #if defined CRAY || defined _CRAY && !defined _CRAYT3E
 #  error Non-AXP Cray systems not supported
 #endif
 
 #if defined _SH3
 #  define POSH_CPU_SH3 1
 #  define POSH_CPU_STRING "Hitachi SH-3"
 #endif
 
 #if defined __sh4__ || defined __SH4__
 #  define POSH_CPU_SH3 1
 #  define POSH_CPU_SH4 1
 #  define POSH_CPU_STRING "Hitachi SH-4"
 #endif
 
 #if defined __sparc__ || defined __sparc
 #  if defined __arch64__ || defined __sparcv9 || defined __sparc_v9__
 #     define POSH_CPU_SPARC64 1 
 #     define POSH_CPU_STRING "Sparc/64"
 #  else
 #     define POSH_CPU_STRING "Sparc/32"
 #  endif
 #  define POSH_CPU_SPARC 1
 #endif
 
 #if defined ARM || defined __arm__ || defined _ARM
 #  define POSH_CPU_STRONGARM 1
 #  define POSH_CPU_STRING "ARM"
 #endif
 
 #if defined __aarch64__
 #  define POSH_CPU_AARCH64 1
 #  define POSH_CPU_STRING "ARM64"
 #endif
 
 #if defined mips || defined __mips__ || defined __MIPS__ || defined _MIPS
 #  define POSH_CPU_MIPS 1 
 #  if defined _R5900
 #    define POSH_CPU_STRING "MIPS R5900 (PS2)"
 #  else
 #    define POSH_CPU_STRING "MIPS"
 #  endif
 #endif
 
 #if defined __ia64 || defined _M_IA64 || defined __ia64__ 
 #  define POSH_CPU_IA64 1
 #  define POSH_CPU_STRING "IA64"
 #endif
 
 #if defined __X86__ || defined __i386__ || defined i386 || defined _M_IX86 || defined __386__ || defined __x86_64__ || defined _M_X64
 #  define POSH_CPU_X86 1
 #  if defined __x86_64__ || defined _M_X64
 #     define POSH_CPU_X86_64 1 
 #  endif
 #  if defined POSH_CPU_X86_64
 #     define POSH_CPU_STRING "AMD x86-64"
 #  else
 #     define POSH_CPU_STRING "Intel 386+"
 #  endif
 #endif
 
 #if defined __alpha || defined alpha || defined _M_ALPHA || defined __alpha__
 #  define POSH_CPU_AXP 1
 #  define POSH_CPU_STRING "AXP"
 #endif
 
 #if defined __hppa || defined hppa
 #  define POSH_CPU_HPPA 1
 #  define POSH_CPU_STRING "PA-RISC"
 #endif
 
+#if defined __e2k__
+#  define POSH_CPU_E2K 1
+#  define POSH_CPU_STRING "MCST E2K"
+#endif
+
 #if !defined POSH_CPU_STRING
 #  error POSH cannot determine target CPU
 #  define POSH_CPU_STRING "Unknown" /* this is here for Doxygen's benefit */
 #endif
 
 /*
 ** -----------------------------------------------------------------------------
 ** Attempt to autodetect building for embedded on Sony PS2
 ** -----------------------------------------------------------------------------
 */
 #if !defined POSH_OS_STRING
 #  if !defined FORCE_DOXYGEN
 #    define POSH_OS_EMBEDDED 1 
 #  endif
 #  if defined _R5900
 #     define POSH_OS_STRING "Sony PS2(embedded)"
 #  else
 #     define POSH_OS_STRING "Embedded/Unknown"
 #  endif
 #endif
 
 /*
 ** ---------------------------------------------------------------------------
 ** Handle cdecl, stdcall, fastcall, etc.
 ** ---------------------------------------------------------------------------
 */
 #if defined POSH_CPU_X86 && !defined POSH_CPU_X86_64
 #  if defined __GNUC__
 #     define POSH_CDECL __attribute__((cdecl))
 #     define POSH_STDCALL __attribute__((stdcall))
 #     define POSH_FASTCALL __attribute__((fastcall))
 #  elif ( defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__ || defined __MWERKS__ )
 #     define POSH_CDECL    __cdecl
 #     define POSH_STDCALL  __stdcall
 #     define POSH_FASTCALL __fastcall
 #  endif
 #else
 #  define POSH_CDECL    
 #  define POSH_STDCALL  
 #  define POSH_FASTCALL 
 #endif
 
 /*
 ** ---------------------------------------------------------------------------
 ** Define POSH_IMPORTEXPORT signature based on POSH_DLL and POSH_BUILDING_LIB
 ** ---------------------------------------------------------------------------
 */
 
 /*
 ** We undefine this so that multiple inclusions will work
 */
 #if defined POSH_IMPORTEXPORT
 #  undef POSH_IMPORTEXPORT
 #endif
 
 #if defined POSH_DLL
 #   if defined POSH_OS_WIN32
 #      if defined _MSC_VER 
 #         if ( _MSC_VER >= 800 )
 #            if defined POSH_BUILDING_LIB
 #               define POSH_IMPORTEXPORT __declspec( dllexport )
 #            else
 #               define POSH_IMPORTEXPORT __declspec( dllimport )
 #            endif
 #         else
 #            if defined POSH_BUILDING_LIB
 #               define POSH_IMPORTEXPORT __export
 #            else
 #               define POSH_IMPORTEXPORT 
 #            endif
 #         endif
 #      endif  /* defined _MSC_VER */
 #      if defined __BORLANDC__
 #         if ( __BORLANDC__ >= 0x500 )
 #            if defined POSH_BUILDING_LIB 
 #               define POSH_IMPORTEXPORT __declspec( dllexport )
 #            else
 #               define POSH_IMPORTEXPORT __declspec( dllimport )
 #            endif
 #         else
 #            if defined POSH_BUILDING_LIB
 #               define POSH_IMPORTEXPORT __export
 #            else
 #               define POSH_IMPORTEXPORT 
 #            endif
 #         endif
 #      endif /* defined __BORLANDC__ */
        /* for all other compilers, we're just making a blanket assumption */
 #      if defined __GNUC__ || defined __WATCOMC__ || defined __MWERKS__
 #         if defined POSH_BUILDING_LIB
 #            define POSH_IMPORTEXPORT __declspec( dllexport )
 #         else
 #            define POSH_IMPORTEXPORT __declspec( dllimport )
 #         endif
 #      endif /* all other compilers */
 #      if !defined POSH_IMPORTEXPORT
 #         error Building DLLs not supported on this compiler (poshlib@poshlib.org if you know how)
 #      endif
 #   endif /* defined POSH_OS_WIN32 */
 #endif
 
 /* On pretty much everything else, we can thankfully just ignore this */
 #if !defined POSH_IMPORTEXPORT
 #  define POSH_IMPORTEXPORT
 #endif
 
 #if defined FORCE_DOXYGEN
 #  define POSH_DLL    
 #  define POSH_BUILDING_LIB
 #  undef POSH_DLL
 #  undef POSH_BUILDING_LIB
 #endif
 
 /*
 ** ----------------------------------------------------------------------------
 ** (Re)define POSH_PUBLIC_API export signature 
 ** ----------------------------------------------------------------------------
 */
 #ifdef POSH_PUBLIC_API
 #  undef POSH_PUBLIC_API
 #endif
 
 #if ( ( defined _MSC_VER ) && ( _MSC_VER < 800 ) ) || ( defined __BORLANDC__ && ( __BORLANDC__ < 0x500 ) )
 #  define POSH_PUBLIC_API(rtype) extern rtype POSH_IMPORTEXPORT 
 #else
 #  define POSH_PUBLIC_API(rtype) extern POSH_IMPORTEXPORT rtype
 #endif
 
 /*
 ** ----------------------------------------------------------------------------
 ** Try to infer endianess.  Basically we just go through the CPUs we know are
 ** little endian, and assume anything that isn't one of those is big endian.
 ** As a sanity check, we also do this with operating systems we know are
 ** little endian, such as Windows.  Some processors are bi-endian, such as 
 ** the MIPS series, so we have to be careful about those.
 ** ----------------------------------------------------------------------------
 */
-#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_CPU_AARCH64 || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__ || defined __ORDER_LITTLE_ENDIAN__
+#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_CPU_AARCH64 || defined POSH_CPU_E2K || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__ || defined __ORDER_LITTLE_ENDIAN__
 #  define POSH_ENDIAN_STRING "little"
 #  define POSH_LITTLE_ENDIAN 1
 #else
 #  define POSH_ENDIAN_STRING "big"
 #  define POSH_BIG_ENDIAN 1
 #endif
 
 #if defined FORCE_DOXYGEN
 #  define POSH_LITTLE_ENDIAN
 #endif
 
 /*
 ** ----------------------------------------------------------------------------
 ** Cross-platform compile time assertion macro
 ** ----------------------------------------------------------------------------
 */
 #define POSH_COMPILE_TIME_ASSERT(name, x) typedef int _POSH_dummy_ ## name[(x) ? 1 : -1 ]
 
 /*
 ** ----------------------------------------------------------------------------
 ** 64-bit Integer
 **
 ** We don't require 64-bit support, nor do we emulate its functionality, we
 ** simply export it if it's available.  Since we can't count on <limits.h>
 ** for 64-bit support, we ignore the POSH_USE_LIMITS_H directive.
 ** ----------------------------------------------------------------------------
 */
-#if defined ( __LP64__ ) || defined ( __powerpc64__ ) || defined POSH_CPU_SPARC64
+#if defined ( __LP64__ ) || defined ( __powerpc64__ ) || defined POSH_CPU_SPARC64 || defined POSH_CPU_E2K
 #  define POSH_64BIT_INTEGER 1
 typedef long posh_i64_t; 
 typedef unsigned long posh_u64_t;
 #  define POSH_I64( x ) ((posh_i64_t)x)
 #  define POSH_U64( x ) ((posh_u64_t)x)
 #  define POSH_I64_PRINTF_PREFIX "l"
 #elif defined _MSC_VER || defined __BORLANDC__ || defined __WATCOMC__ || ( defined __alpha && defined __DECC )
 #  define POSH_64BIT_INTEGER 1
 typedef __int64 posh_i64_t;
 typedef unsigned __int64 posh_u64_t;
 #  define POSH_I64( x ) ((posh_i64_t)(x##i64))
 #  define POSH_U64( x ) ((posh_u64_t)(x##ui64))
 #  define POSH_I64_PRINTF_PREFIX "I64"
 #elif defined __GNUC__ || defined __MWERKS__ || defined __SUNPRO_C || defined __SUNPRO_CC || defined __APPLE_CC__ || defined POSH_OS_IRIX || defined _LONG_LONG || defined _CRAYC
 #  define POSH_64BIT_INTEGER 1
 typedef long long posh_i64_t;
 typedef unsigned long long posh_u64_t;
 #  define POSH_U64( x ) ((posh_u64_t)(x##LL))
 #  define POSH_I64( x ) ((posh_i64_t)(x##LL))
 #  define POSH_I64_PRINTF_PREFIX "ll"
 #endif
 
 /* hack */
 /*#ifdef __MINGW32__
 #undef POSH_I64
 #undef POSH_U64
 #undef POSH_I64_PRINTF_PREFIX
 #define POSH_I64( x ) ((posh_i64_t)x)
 #define POSH_U64( x ) ((posh_u64_t)x)
 #define POSH_I64_PRINTF_PREFIX "I64"
 #endif*/
 
 #ifdef FORCE_DOXYGEN
 typedef long long posh_i64_t;
 typedef unsigned long posh_u64_t;
 #  define POSH_64BIT_INTEGER
 #  define POSH_I64_PRINTF_PREFIX
 #  define POSH_I64(x)
 #  define POSH_U64(x)
 #endif
 
 /** Minimum value for a 64-bit signed integer */
 #define POSH_I64_MIN  POSH_I64(0x8000000000000000)
 /** Maximum value for a 64-bit signed integer */
 #define POSH_I64_MAX  POSH_I64(0x7FFFFFFFFFFFFFFF)
 /** Minimum value for a 64-bit unsigned integer */
 #define POSH_U64_MIN  POSH_U64(0)
 /** Maximum value for a 64-bit unsigned integer */
 #define POSH_U64_MAX  POSH_U64(0xFFFFFFFFFFFFFFFF)
 
 /* ----------------------------------------------------------------------------
 ** Basic Sized Types
 **
 ** These types are expected to be EXACTLY sized so you can use them for
 ** serialization.
 ** ----------------------------------------------------------------------------
 */
 #define POSH_FALSE 0 
 #define POSH_TRUE  1 
 
 typedef int            posh_bool_t;
 typedef unsigned char  posh_byte_t;
 
 /* NOTE: These assume that CHAR_BIT is 8!! */
 typedef unsigned char  posh_u8_t;
 typedef signed char    posh_i8_t;
 
 #if defined POSH_USE_LIMITS_H
 #  if CHAR_BITS > 8
 #    error This machine uses 9-bit characters.  This is a warning, you can comment this out now.
 #  endif /* CHAR_BITS > 8 */
 
 /* 16-bit */
 #  if ( USHRT_MAX == 65535 ) 
    typedef unsigned short posh_u16_t;
    typedef short          posh_i16_t;
 #  else
    /* Yes, in theory there could still be a 16-bit character type and shorts are
       32-bits in size...if you find such an architecture, let me know =P */
 #    error No 16-bit type found
 #  endif
 
 /* 32-bit */
 #  if ( INT_MAX == 2147483647 )
   typedef unsigned       posh_u32_t;
   typedef int            posh_i32_t;
 #  elif ( LONG_MAX == 2147483647 )
   typedef unsigned long  posh_u32_t;
   typedef long           posh_i32_t;
 #  else
       error No 32-bit type found
 #  endif
 
 #else /* POSH_USE_LIMITS_H */
 
   typedef unsigned short posh_u16_t;
   typedef short          posh_i16_t;
 
 #  if !defined POSH_OS_PALM
   typedef unsigned       posh_u32_t;
   typedef int            posh_i32_t;
 #  else
   typedef unsigned long  posh_u32_t;
   typedef long           posh_i32_t;
 #  endif
 #endif
 
 /** Minimum value for a byte */
 #define POSH_BYTE_MIN    0
 /** Maximum value for an 8-bit unsigned value */
 #define POSH_BYTE_MAX    255
 /** Minimum value for a byte */
 #define POSH_I16_MIN     ( ( posh_i16_t ) 0x8000 )
 /** Maximum value for a 16-bit signed value */
 #define POSH_I16_MAX     ( ( posh_i16_t ) 0x7FFF ) 
 /** Minimum value for a 16-bit unsigned value */
 #define POSH_U16_MIN     0
 /** Maximum value for a 16-bit unsigned value */
 #define POSH_U16_MAX     ( ( posh_u16_t ) 0xFFFF )
 /** Minimum value for a 32-bit signed value */
 #define POSH_I32_MIN     ( ( posh_i32_t ) 0x80000000 )
 /** Maximum value for a 32-bit signed value */
 #define POSH_I32_MAX     ( ( posh_i32_t ) 0x7FFFFFFF )
 /** Minimum value for a 32-bit unsigned value */
 #define POSH_U32_MIN     0
 /** Maximum value for a 32-bit unsigned value */
 #define POSH_U32_MAX     ( ( posh_u32_t ) 0xFFFFFFFF )
 
 /*
 ** ----------------------------------------------------------------------------
 ** Sanity checks on expected sizes
 ** ----------------------------------------------------------------------------
 */
 #if !defined FORCE_DOXYGEN
 
 POSH_COMPILE_TIME_ASSERT(posh_byte_t, sizeof(posh_byte_t) == 1);
 POSH_COMPILE_TIME_ASSERT(posh_u8_t, sizeof(posh_u8_t) == 1);
 POSH_COMPILE_TIME_ASSERT(posh_i8_t, sizeof(posh_i8_t) == 1);
 POSH_COMPILE_TIME_ASSERT(posh_u16_t, sizeof(posh_u16_t) == 2);
 POSH_COMPILE_TIME_ASSERT(posh_i16_t, sizeof(posh_i16_t) == 2);
 POSH_COMPILE_TIME_ASSERT(posh_u32_t, sizeof(posh_u32_t) == 4);
 POSH_COMPILE_TIME_ASSERT(posh_i32_t, sizeof(posh_i32_t) == 4);
 
 #if !defined POSH_NO_FLOAT
    POSH_COMPILE_TIME_ASSERT(posh_testfloat_t, sizeof(float)==4 );
    POSH_COMPILE_TIME_ASSERT(posh_testdouble_t, sizeof(double)==8);
 #endif
 
 #if defined POSH_64BIT_INTEGER
    POSH_COMPILE_TIME_ASSERT(posh_u64_t, sizeof(posh_u64_t) == 8);
    POSH_COMPILE_TIME_ASSERT(posh_i64_t, sizeof(posh_i64_t) == 8);
 #endif
 
 #endif
 
 /*
 ** ----------------------------------------------------------------------------
 ** 64-bit pointer support
 ** ----------------------------------------------------------------------------
 */
 #if defined POSH_CPU_AXP && ( defined POSH_OS_TRU64 || defined POSH_OS_LINUX )
 #  define POSH_64BIT_POINTER 1
 #endif
 
 #if defined POSH_CPU_X86_64 && defined POSH_OS_LINUX
 #  define POSH_64BIT_POINTER 1
 #endif
 
-#if defined POSH_CPU_SPARC64 || defined POSH_OS_WIN64 || defined __64BIT__ || defined __LP64 || defined _LP64 || defined __LP64__ || defined _ADDR64 || defined _CRAYC
+#if defined POSH_CPU_SPARC64 || defined POSH_CPU_E2K || defined POSH_OS_WIN64 || defined __64BIT__ || defined __LP64 || defined _LP64 || defined __LP64__ || defined _ADDR64 || defined _CRAYC
 #   define POSH_64BIT_POINTER 1
 #endif
 
 #if defined POSH_64BIT_POINTER
    POSH_COMPILE_TIME_ASSERT( posh_64bit_pointer, sizeof( void * ) == 8 );
 #elif !defined FORCE_DOXYGEN
 /* if this assertion is hit then you're on a system that either has 64-bit
    addressing and we didn't catch it, or you're on a system with 16-bit
    pointers.  In the latter case, POSH doesn't actually care, we're just
    triggering this assertion to make sure you're aware of the situation,
    so feel free to delete it.
 
    If this assertion is triggered on a known 32 or 64-bit platform, 
    please let us know (poshlib@poshlib.org) */
    POSH_COMPILE_TIME_ASSERT( posh_32bit_pointer, sizeof( void * ) == 4 );
 #endif
 
 #if defined FORCE_DOXYGEN
 #  define POSH_64BIT_POINTER
 #endif
 
 /*
 ** ----------------------------------------------------------------------------
 ** POSH Utility Functions
 **
 ** These are optional POSH utility functions that are not required if you don't
 ** need anything except static checking of your host and target environment.
 ** 
 ** These functions are NOT wrapped with POSH_PUBLIC_API because I didn't want
 ** to enforce their export if your own library is only using them internally.
 ** ----------------------------------------------------------------------------
 */
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 const char *POSH_GetArchString( void );
 
 #if !defined POSH_NO_FLOAT
 
 posh_u32_t  POSH_LittleFloatBits( float f );
 posh_u32_t  POSH_BigFloatBits( float f );
 float       POSH_FloatFromLittleBits( posh_u32_t bits );
 float       POSH_FloatFromBigBits( posh_u32_t bits );
 
 void        POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] );
 double      POSH_DoubleFromBits( const posh_byte_t src[ 8 ] );
 
 /* unimplemented
 float      *POSH_WriteFloatToLittle( void *dst, float f );
 float      *POSH_WriteFloatToBig( void *dst, float f );
 float       POSH_ReadFloatFromLittle( const void *src );
 float       POSH_ReadFloatFromBig( const void *src );
 
 double     *POSH_WriteDoubleToLittle( void *dst, double d );
 double     *POSH_WriteDoubleToBig( void *dst, double d );
 double      POSH_ReadDoubleFromLittle( const void *src );
 double      POSH_ReadDoubleFromBig( const void *src );
 */
 #endif /* !defined POSH_NO_FLOAT */
 
 #if defined FORCE_DOXYGEN
 #  define POSH_NO_FLOAT
 #  undef  POSH_NO_FLOAT
 #endif
 
 extern posh_u16_t  POSH_SwapU16( posh_u16_t u );
 extern posh_i16_t  POSH_SwapI16( posh_i16_t u );
 extern posh_u32_t  POSH_SwapU32( posh_u32_t u );
 extern posh_i32_t  POSH_SwapI32( posh_i32_t u );
 
 #if defined POSH_64BIT_INTEGER
 
 extern posh_u64_t  POSH_SwapU64( posh_u64_t u );
 extern posh_i64_t  POSH_SwapI64( posh_i64_t u );
 
 #endif /*POSH_64BIT_INTEGER */
 
 extern posh_u16_t *POSH_WriteU16ToLittle( void *dst, posh_u16_t value );
 extern posh_i16_t *POSH_WriteI16ToLittle( void *dst, posh_i16_t value );
 extern posh_u32_t *POSH_WriteU32ToLittle( void *dst, posh_u32_t value );
 extern posh_i32_t *POSH_WriteI32ToLittle( void *dst, posh_i32_t value );
 
 extern posh_u16_t *POSH_WriteU16ToBig( void *dst, posh_u16_t value );
 extern posh_i16_t *POSH_WriteI16ToBig( void *dst, posh_i16_t value );
 extern posh_u32_t *POSH_WriteU32ToBig( void *dst, posh_u32_t value );
 extern posh_i32_t *POSH_WriteI32ToBig( void *dst, posh_i32_t value );
 
 extern posh_u16_t  POSH_ReadU16FromLittle( const void *src );
 extern posh_i16_t  POSH_ReadI16FromLittle( const void *src );
 extern posh_u32_t  POSH_ReadU32FromLittle( const void *src );
 extern posh_i32_t  POSH_ReadI32FromLittle( const void *src );
 
 extern posh_u16_t  POSH_ReadU16FromBig( const void *src );
 extern posh_i16_t  POSH_ReadI16FromBig( const void *src );
 extern posh_u32_t  POSH_ReadU32FromBig( const void *src );
 extern posh_i32_t  POSH_ReadI32FromBig( const void *src );
 
 #if defined POSH_64BIT_INTEGER
 extern posh_u64_t *POSH_WriteU64ToLittle( void *dst, posh_u64_t value );
 extern posh_i64_t *POSH_WriteI64ToLittle( void *dst, posh_i64_t value );
 extern posh_u64_t *POSH_WriteU64ToBig( void *dst, posh_u64_t value );
 extern posh_i64_t *POSH_WriteI64ToBig( void *dst, posh_i64_t value );
 
 extern posh_u64_t  POSH_ReadU64FromLittle( const void *src );
 extern posh_i64_t  POSH_ReadI64FromLittle( const void *src );
 extern posh_u64_t  POSH_ReadU64FromBig( const void *src );
 extern posh_i64_t  POSH_ReadI64FromBig( const void *src );
 #endif /* POSH_64BIT_INTEGER */
 
 #if defined POSH_LITTLE_ENDIAN
 
 #  define POSH_LittleU16(x) (x)
 #  define POSH_LittleU32(x) (x)
 #  define POSH_LittleI16(x) (x)
 #  define POSH_LittleI32(x) (x)
 #  if defined POSH_64BIT_INTEGER
 #    define POSH_LittleU64(x) (x)
 #    define POSH_LittleI64(x) (x)
 #  endif /* defined POSH_64BIT_INTEGER */
 
 #  define POSH_BigU16(x) POSH_SwapU16(x)
 #  define POSH_BigU32(x) POSH_SwapU32(x)
 #  define POSH_BigI16(x) POSH_SwapI16(x)
 #  define POSH_BigI32(x) POSH_SwapI32(x)
 #  if defined POSH_64BIT_INTEGER
 #    define POSH_BigU64(x) POSH_SwapU64(x)
 #    define POSH_BigI64(x) POSH_SwapI64(x)
 #  endif /* defined POSH_64BIT_INTEGER */
 
 #else
 
 #  define POSH_BigU16(x) (x)
 #  define POSH_BigU32(x) (x)
 #  define POSH_BigI16(x) (x)
 #  define POSH_BigI32(x) (x)
 
 #  if defined POSH_64BIT_INTEGER
 #    define POSH_BigU64(x) (x)
 #    define POSH_BigI64(x) (x)
 #  endif /* POSH_64BIT_INTEGER */
 
 #  define POSH_LittleU16(x) POSH_SwapU16(x)
 #  define POSH_LittleU32(x) POSH_SwapU32(x)
 #  define POSH_LittleI16(x) POSH_SwapI16(x)
 #  define POSH_LittleI32(x) POSH_SwapI32(x)
 
 #  if defined POSH_64BIT_INTEGER
 #    define POSH_LittleU64(x) POSH_SwapU64(x)
 #    define POSH_LittleI64(x) POSH_SwapI64(x)
 #  endif /* POSH_64BIT_INTEGER */
 
 #endif
 
 #ifdef __cplusplus
 }
 #endif
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.cpp	(revision 27270)
@@ -1,1281 +1,1284 @@
 // This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
 
 #include "Debug.h"
 #include "Array.inl"
 #include "StrLib.h" // StringBuilder
 
 #include "StdStream.h" // fileOpen
 
 #include <stdlib.h>
 
 // Extern
 #if NV_OS_WIN32 //&& NV_CC_MSVC
 #   define WIN32_LEAN_AND_MEAN
 #   define VC_EXTRALEAN
 #   include <windows.h>
 #   include <direct.h>
 #   if NV_CC_MSVC
 #       include <crtdbg.h>
 #       if _MSC_VER < 1300
 #           define DECLSPEC_DEPRECATED
 // VC6: change this path to your Platform SDK headers
 #           include <dbghelp.h> // must be XP version of file
 //          include "M:\\dev7\\vs\\devtools\\common\\win32sdk\\include\\dbghelp.h"
 #       else
 // VC7: ships with updated headers
 #           include <dbghelp.h>
 #       endif
 #   endif
 #   pragma comment(lib,"dbghelp.lib")
 #endif
 
 #if NV_OS_XBOX
 #    include <Xtl.h>
 #    ifdef _DEBUG
 #        include <xbdm.h>
 #    endif //_DEBUG
 #endif //NV_OS_XBOX
 
 #if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
 #   include <signal.h>
 #endif
 
 #if NV_OS_UNIX
 #   include <unistd.h> // getpid
 #endif
 
 #if NV_OS_LINUX && defined(HAVE_EXECINFO_H)
 #   include <execinfo.h> // backtrace
 #   if NV_CC_GNUC // defined(HAVE_CXXABI_H)
 #       include <cxxabi.h>
 #   endif
 #endif
 
 #if NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD
 #   include <sys/types.h>
 #   include <sys/param.h>
 #   include <sys/sysctl.h> // sysctl
 #   if !defined(NV_OS_OPENBSD)
 #       include <sys/ucontext.h>
 #   endif
 #   if defined(HAVE_EXECINFO_H) // only after OSX 10.5
 #       include <execinfo.h> // backtrace
 #       if NV_CC_GNUC // defined(HAVE_CXXABI_H)
 #           include <cxxabi.h>
 #       endif
 #   endif
 #endif
 
 #if NV_OS_ORBIS
 #include <libdbg.h>
 #endif
 
 #define NV_USE_SEPARATE_THREAD 1
 
 
 using namespace nv;
 
 namespace 
 {
 
     static MessageHandler * s_message_handler = NULL;
     static AssertHandler * s_assert_handler = NULL;
 
     static bool s_sig_handler_enabled = false;
     static bool s_interactive = true;
 
 #if NV_OS_WIN32 && NV_CC_MSVC
 
     // Old exception filter.
     static LPTOP_LEVEL_EXCEPTION_FILTER s_old_exception_filter = NULL;
 
 #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
 
     // Old signal handlers.
     struct sigaction s_old_sigsegv;
     struct sigaction s_old_sigtrap;
     struct sigaction s_old_sigfpe;
     struct sigaction s_old_sigbus;
 
 #endif
 
 
 #if NV_OS_WIN32 && NV_CC_MSVC
 
     // We should try to simplify the top level filter as much as possible.
     // http://www.nynaeve.net/?p=128
 
 #if NV_USE_SEPARATE_THREAD
 
     // The critical section enforcing the requirement that only one exception be
     // handled by a handler at a time.
     static CRITICAL_SECTION s_handler_critical_section;
 
     // Semaphores used to move exception handling between the exception thread
     // and the handler thread.  handler_start_semaphore_ is signalled by the
     // exception thread to wake up the handler thread when an exception occurs.
     // handler_finish_semaphore_ is signalled by the handler thread to wake up
     // the exception thread when handling is complete.
     static HANDLE s_handler_start_semaphore = NULL;
     static HANDLE s_handler_finish_semaphore = NULL;
 
     // The exception handler thread.
     static HANDLE s_handler_thread = NULL;
 
     static DWORD s_requesting_thread_id = 0;
     static EXCEPTION_POINTERS * s_exception_info = NULL;
 
 #endif // NV_USE_SEPARATE_THREAD
 
 
     struct MinidumpCallbackContext {
         ULONG64 memory_base;
         ULONG memory_size;
         bool finished;
     };
 
     // static
     static BOOL CALLBACK miniDumpWriteDumpCallback(PVOID context, const PMINIDUMP_CALLBACK_INPUT callback_input, PMINIDUMP_CALLBACK_OUTPUT callback_output)
     {
         switch (callback_input->CallbackType)
         {
         case MemoryCallback: {
             MinidumpCallbackContext* callback_context = reinterpret_cast<MinidumpCallbackContext*>(context);
             if (callback_context->finished)
                 return FALSE;
 
             // Include the specified memory region.
             callback_output->MemoryBase = callback_context->memory_base;
             callback_output->MemorySize = callback_context->memory_size;
             callback_context->finished = true;
             return TRUE;
         }
 
         // Include all modules.
         case IncludeModuleCallback:
         case ModuleCallback:
             return TRUE;
 
         // Include all threads.
         case IncludeThreadCallback:
         case ThreadCallback:
             return TRUE;
 
         // Stop receiving cancel callbacks.
         case CancelCallback:
             callback_output->CheckCancel = FALSE;
             callback_output->Cancel = FALSE;
             return TRUE;
         }
 
         // Ignore other callback types.
         return FALSE;
     }
 
     static bool writeMiniDump(EXCEPTION_POINTERS * pExceptionInfo)
     {
         // create the file
         HANDLE hFile = CreateFileA("crash.dmp", GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
         if (hFile == INVALID_HANDLE_VALUE) {
             //nvDebug("*** Failed to create dump file.\n");
             return false;
         }
 
         MINIDUMP_EXCEPTION_INFORMATION * pExInfo = NULL;
         MINIDUMP_CALLBACK_INFORMATION * pCallback = NULL;
 
         if (pExceptionInfo != NULL) {
             MINIDUMP_EXCEPTION_INFORMATION ExInfo;
             ExInfo.ThreadId = ::GetCurrentThreadId();
             ExInfo.ExceptionPointers = pExceptionInfo;
             ExInfo.ClientPointers = NULL;
             pExInfo = &ExInfo;
 
             MINIDUMP_CALLBACK_INFORMATION callback;
             MinidumpCallbackContext context;
 
             // Find a memory region of 256 bytes centered on the
             // faulting instruction pointer.
             const ULONG64 instruction_pointer = 
             #if defined(_M_IX86)
                 pExceptionInfo->ContextRecord->Eip;
             #elif defined(_M_AMD64)
                 pExceptionInfo->ContextRecord->Rip;
             #else
                 #error Unsupported platform
             #endif
 
             MEMORY_BASIC_INFORMATION info;
             
             if (VirtualQuery(reinterpret_cast<LPCVOID>(instruction_pointer), &info, sizeof(MEMORY_BASIC_INFORMATION)) != 0 && info.State == MEM_COMMIT)
             {
                 // Attempt to get 128 bytes before and after the instruction
                 // pointer, but settle for whatever's available up to the
                 // boundaries of the memory region.
                 const ULONG64 kIPMemorySize = 256;
                 context.memory_base = max(reinterpret_cast<ULONG64>(info.BaseAddress), instruction_pointer - (kIPMemorySize / 2));
                 ULONG64 end_of_range = min(instruction_pointer + (kIPMemorySize / 2), reinterpret_cast<ULONG64>(info.BaseAddress) + info.RegionSize);
                 context.memory_size = static_cast<ULONG>(end_of_range - context.memory_base);
                 context.finished = false;
 
                 callback.CallbackRoutine = miniDumpWriteDumpCallback;
                 callback.CallbackParam = reinterpret_cast<void*>(&context);
                 pCallback = &callback;
             }
         }
 
         MINIDUMP_TYPE miniDumpType = (MINIDUMP_TYPE)(MiniDumpNormal|MiniDumpWithHandleData|MiniDumpWithThreadInfo);
 
         // write the dump
         BOOL ok = MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, miniDumpType, pExInfo, NULL, pCallback) != 0;
         CloseHandle(hFile);
 
         if (ok == FALSE) {
             //nvDebug("*** Failed to save dump file.\n");
             return false;
         }
 
         //nvDebug("\nDump file saved.\n");
 
         return true;
     }
 
 #if NV_USE_SEPARATE_THREAD
 
     static DWORD WINAPI ExceptionHandlerThreadMain(void* lpParameter) {
         nvDebugCheck(s_handler_start_semaphore != NULL);
         nvDebugCheck(s_handler_finish_semaphore != NULL);
 
         while (true) {
             if (WaitForSingleObject(s_handler_start_semaphore, INFINITE) == WAIT_OBJECT_0) {
                 writeMiniDump(s_exception_info);
 
                 // Allow the requesting thread to proceed.
                 ReleaseSemaphore(s_handler_finish_semaphore, 1, NULL);
             }
         }
 
         // This statement is not reached when the thread is unconditionally
         // terminated by the ExceptionHandler destructor.
         return 0;
     }
 
 #endif // NV_USE_SEPARATE_THREAD
 
     static bool hasStackTrace() {
         return true;
     }
 
     /*static NV_NOINLINE int backtrace(void * trace[], int maxcount) {
 
         // In Windows XP and Windows Server 2003, the sum of the FramesToSkip and FramesToCapture parameters must be less than 63.
         int xp_maxcount = min(63-1, maxcount);
 
         int count = RtlCaptureStackBackTrace(1, xp_maxcount, trace, NULL);
         nvDebugCheck(count <= maxcount);
 
         return count;
     }*/
 
     static NV_NOINLINE int backtraceWithSymbols(CONTEXT * ctx, void * trace[], int maxcount, int skip = 0) {
         
         // Init the stack frame for this function
         STACKFRAME64 stackFrame = { 0 };
 
     #if NV_CPU_X86_64
         DWORD dwMachineType = IMAGE_FILE_MACHINE_AMD64;
         stackFrame.AddrPC.Offset = ctx->Rip;
         stackFrame.AddrFrame.Offset = ctx->Rbp;
         stackFrame.AddrStack.Offset = ctx->Rsp;
     #elif NV_CPU_X86
         DWORD dwMachineType = IMAGE_FILE_MACHINE_I386;
         stackFrame.AddrPC.Offset = ctx->Eip;
         stackFrame.AddrFrame.Offset = ctx->Ebp;
         stackFrame.AddrStack.Offset = ctx->Esp;
     #else
         #error "Platform not supported!"
     #endif
         stackFrame.AddrPC.Mode = AddrModeFlat;
         stackFrame.AddrFrame.Mode = AddrModeFlat;
         stackFrame.AddrStack.Mode = AddrModeFlat;
 
         // Walk up the stack
         const HANDLE hThread = GetCurrentThread();
         const HANDLE hProcess = GetCurrentProcess();
         int i;
         for (i = 0; i < maxcount; i++)
         {
             // walking once first makes us skip self
             if (!StackWalk64(dwMachineType, hProcess, hThread, &stackFrame, ctx, NULL, &SymFunctionTableAccess64, &SymGetModuleBase64, NULL)) {
                 break;
             }
 
             /*if (stackFrame.AddrPC.Offset == stackFrame.AddrReturn.Offset || stackFrame.AddrPC.Offset == 0) {
                 break;
             }*/
 
             if (i >= skip) {
                 trace[i - skip] = (PVOID)stackFrame.AddrPC.Offset;
             }
         }
 
         return i - skip;
     }
 
 #pragma warning(push)
 #pragma warning(disable:4748)
     static NV_NOINLINE int backtrace(void * trace[], int maxcount) {
         CONTEXT ctx = { 0 };
 #if NV_CPU_X86 && !NV_CPU_X86_64
         ctx.ContextFlags = CONTEXT_CONTROL;
         _asm {
              call x
           x: pop eax
              mov ctx.Eip, eax
              mov ctx.Ebp, ebp
              mov ctx.Esp, esp
         }
 #else
         RtlCaptureContext(&ctx); // Not implemented correctly in x86.
 #endif
 
         return backtraceWithSymbols(&ctx, trace, maxcount, 1);
     }
 #pragma warning(pop)
 
     static NV_NOINLINE void writeStackTrace(void * trace[], int size, int start, Array<const char *> & lines)
     {
         StringBuilder builder(512);
 
         HANDLE hProcess = GetCurrentProcess();
         
         // Resolve PC to function names
         for (int i = start; i < size; i++)
         {
             // Check for end of stack walk
             DWORD64 ip = (DWORD64)trace[i];
             if (ip == NULL)
                 break;
 
             // Get function name
             #define MAX_STRING_LEN  (512)
             unsigned char byBuffer[sizeof(IMAGEHLP_SYMBOL64) + MAX_STRING_LEN] = { 0 };
             IMAGEHLP_SYMBOL64 * pSymbol = (IMAGEHLP_SYMBOL64*)byBuffer;
             pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
             pSymbol->MaxNameLength = MAX_STRING_LEN;
 
             DWORD64 dwDisplacement;
             
             if (SymGetSymFromAddr64(hProcess, ip, &dwDisplacement, pSymbol))
             {
                 pSymbol->Name[MAX_STRING_LEN-1] = 0;
                 
                 /*
                 // Make the symbol readable for humans
                 UnDecorateSymbolName( pSym->Name, lpszNonUnicodeUnDSymbol, BUFFERSIZE, 
                     UNDNAME_COMPLETE | 
                     UNDNAME_NO_THISTYPE |
                     UNDNAME_NO_SPECIAL_SYMS |
                     UNDNAME_NO_MEMBER_TYPE |
                     UNDNAME_NO_MS_KEYWORDS |
                     UNDNAME_NO_ACCESS_SPECIFIERS );
                 */
                 
                 // pSymbol->Name
                 const char * pFunc = pSymbol->Name;
 
                 // Get file/line number
                 IMAGEHLP_LINE64 theLine = { 0 };
                 theLine.SizeOfStruct = sizeof(theLine);
 
                 DWORD dwDisplacement;
                 if (!SymGetLineFromAddr64(hProcess, ip, &dwDisplacement, &theLine))
                 {
                     // Do not print unknown symbols anymore.
                     break;
                     //builder.format("unknown(%08X) : %s\n", (uint32)ip, pFunc);
                 }
                 else
                 {
                     /*
                     const char* pFile = strrchr(theLine.FileName, '\\');
                     if ( pFile == NULL ) pFile = theLine.FileName;
                     else pFile++;
                     */
                     const char * pFile = theLine.FileName;
                     
                     int line = theLine.LineNumber;
                     
                     builder.format("%s(%d) : %s\n", pFile, line, pFunc);
                 }
 
                 lines.append(builder.release());
 
                 if (pFunc != NULL && strcmp(pFunc, "WinMain") == 0) {
                     break;
                 }
             }
         }
     }
 
 
     // Write mini dump and print stack trace.
     static LONG WINAPI handleException(EXCEPTION_POINTERS * pExceptionInfo)
     {
         EnterCriticalSection(&s_handler_critical_section);
 #if NV_USE_SEPARATE_THREAD
         s_requesting_thread_id = GetCurrentThreadId();
         s_exception_info = pExceptionInfo;
 
         // This causes the handler thread to call writeMiniDump.
         ReleaseSemaphore(s_handler_start_semaphore, 1, NULL);
 
         // Wait until WriteMinidumpWithException is done and collect its return value.
         WaitForSingleObject(s_handler_finish_semaphore, INFINITE);
         //bool status = s_handler_return_value;
 
         // Clean up.
         s_requesting_thread_id = 0;
         s_exception_info = NULL;
 #else
         // First of all, write mini dump.
         writeMiniDump(pExceptionInfo);
 #endif
         LeaveCriticalSection(&s_handler_critical_section);
 
         nvDebug("\nDump file saved.\n");
 
         // Try to attach to debugger.
         if (s_interactive && debug::attachToDebugger()) {
             nvDebugBreak();
             return EXCEPTION_CONTINUE_EXECUTION;
         }
 
         // If that fails, then try to pretty print a stack trace and terminate.
         void * trace[64];
         
         int size = backtraceWithSymbols(pExceptionInfo->ContextRecord, trace, 64);
 
         // @@ Use win32's CreateFile?
         FILE * fp = fileOpen("crash.txt", "wb");
         if (fp != NULL) {
             Array<const char *> lines;
             writeStackTrace(trace, size, 0, lines);
 
             for (uint i = 0; i < lines.count(); i++) {
                 fputs(lines[i], fp);
                 delete lines[i];
             }
 
             // @@ Add more info to crash.txt?
 
             fclose(fp);
         }
 
         // This should terminate the process and set the error exit code.
         TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 2);
 
         return EXCEPTION_EXECUTE_HANDLER;   // Terminate app. In case terminate process did not succeed.
     }
 
     static void handlePureVirtualCall() {
         nvDebugBreak();
         TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
     }
 
     static void handleInvalidParameter(const wchar_t * wexpresion, const wchar_t * wfunction, const wchar_t * wfile, unsigned int line, uintptr_t reserved) {
 
         size_t convertedCharCount = 0;
         
         StringBuilder expresion;
         if (wexpresion != NULL) {
             uint size = U32(wcslen(wexpresion) + 1);
             expresion.reserve(size);
             wcstombs_s(&convertedCharCount, expresion.str(), size, wexpresion, _TRUNCATE);
         }
 
         StringBuilder file;
         if (wfile != NULL) {
             uint size = U32(wcslen(wfile) + 1);
             file.reserve(size);
             wcstombs_s(&convertedCharCount, file.str(), size, wfile, _TRUNCATE);
         }
 
         StringBuilder function;
         if (wfunction != NULL) {
             uint size = U32(wcslen(wfunction) + 1);
             function.reserve(size);
             wcstombs_s(&convertedCharCount, function.str(), size, wfunction, _TRUNCATE);
         }
         
         int result = nvAbort(expresion.str(), file.str(), line, function.str());
         if (result == NV_ABORT_DEBUG) {
             nvDebugBreak();
         } 
     }
 
 #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN
 
 #if defined(HAVE_EXECINFO_H)
 
     static bool hasStackTrace() {
         return true;
     }
 
 
     static void writeStackTrace(void * trace[], int size, int start, Array<const char *> & lines) {
         StringBuilder builder(512);
         char ** string_array = backtrace_symbols(trace, size);
 
         for(int i = start; i < size-1; i++ ) {
 #       if NV_CC_GNUC // defined(HAVE_CXXABI_H)
             // @@ Write a better parser for the possible formats.
             char * begin = strchr(string_array[i], '(');
             char * end = strrchr(string_array[i], '+');
             char * module = string_array[i];
 
             if (begin == 0 && end != 0) {
                 *(end - 1) = '\0';
                 begin = strrchr(string_array[i], ' ');
                 module = NULL; // Ignore module.
             }
 
             if (begin != 0 && begin < end) {
                 int stat;
                 *end = '\0';
                 *begin = '\0';
                 char * name = abi::__cxa_demangle(begin+1, 0, 0, &stat);
                 if (module == NULL) {
                     if (name == NULL || stat != 0) {
                         builder.format("  In: '%s'\n", begin+1);
                     }
                     else {
                         builder.format("  In: '%s'\n", name);
                     }
                 }
                 else {
                     if (name == NULL || stat != 0) {
                         builder.format("  In: [%s] '%s'\n", module, begin+1);
                     }
                     else {
                         builder.format("  In: [%s] '%s'\n", module, name);
                     }
                 }
                 free(name);
             }
             else {
                 builder.format("  In: '%s'\n", string_array[i]);
             }
 #       else
             builder.format("  In: '%s'\n", string_array[i]);
 #       endif
             lines.append(builder.release());
         }
 
         free(string_array);
     }
 
     static void printStackTrace(void * trace[], int size, int start=0) {
         nvDebug( "\nDumping stacktrace:\n" );
 
         Array<const char *> lines;
         writeStackTrace(trace, size, 1, lines);
 
         for (uint i = 0; i < lines.count(); i++) {
             nvDebug("%s", lines[i]);
             delete lines[i];
         }
 
         nvDebug("\n");
     }
 
 #endif // defined(HAVE_EXECINFO_H)
 
     static void * callerAddress(void * secret)
     {
 #if NV_OS_DARWIN
 #  if defined(_STRUCT_MCONTEXT)
 #    if NV_CPU_PPC
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->__ss.__srr0;
 #    elif NV_CPU_X86_64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->__ss.__rip;
 #    elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->__ss.__eip;
 #    elif NV_CPU_ARM
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->__ss.__pc;
 #    elif NV_CPU_AARCH64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->__ss.__pc;
 #    else
 #      error "Unknown CPU"
 #    endif
 #  else
 #    if NV_CPU_PPC
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->ss.srr0;
 #    elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext->ss.eip;
 #    else
 #      error "Unknown CPU"
 #    endif
 #  endif
 #elif NV_OS_FREEBSD
 #  if NV_CPU_X86_64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.mc_rip;
 #  elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.mc_eip;
 #    else
 #      error "Unknown CPU"
 #    endif
 #elif NV_OS_NETBSD
 #  if NV_CPU_X86_64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.__gregs[_REG_RIP];
 #  elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.__gregs[_REG_EIP];
 #  elif NV_CPU_PPC
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext.__gregs[_REG_PC];
 #  else
 #      error "Unknown CPU"
 #  endif
 #elif NV_OS_OPENBSD
 #  if NV_CPU_X86_64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->sc_rip;
 #  elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->sc_eip;
 #  else
 #       error "Unknown CPU"
 #  endif        
 #else
 #  if NV_CPU_X86_64
         // #define REG_RIP REG_INDEX(rip) // seems to be 16
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.gregs[REG_RIP];
 #  elif NV_CPU_X86
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
 #  elif NV_CPU_PPC
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext.regs->nip;
 #    elif NV_CPU_ARM
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext.arm_pc;
 #    elif NV_CPU_AARCH64
         ucontext_t * ucp = (ucontext_t *)secret;
         return (void *) ucp->uc_mcontext.pc;
+#  elif NV_CPU_E2K
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *) ucp->uc_mcontext.cr0_hi;
 #    else
 #      error "Unknown CPU"
 #    endif
 #endif
 
         // How to obtain the instruction pointers in different platforms, from mlton's source code.
         // http://mlton.org/
         // OpenBSD
         // ucp->sc_eip
         // FreeBSD:
         // ucp->uc_mcontext.mc_eip
         // HPUX:
         // ucp->uc_link
         // Solaris:
         // ucp->uc_mcontext.gregs[REG_PC]
         // Linux hppa:
         // uc->uc_mcontext.sc_iaoq[0] & ~0x3UL
         // Linux sparc:
         // ((struct sigcontext*) secret)->sigc_regs.tpc
         // Linux sparc64:
         // ((struct sigcontext*) secret)->si_regs.pc
 
         // potentially correct for other archs:
         // Linux alpha: ucp->m_context.sc_pc
         // Linux arm: ucp->m_context.ctx.arm_pc
         // Linux ia64: ucp->m_context.sc_ip & ~0x3UL
         // Linux mips: ucp->m_context.sc_pc
         // Linux s390: ucp->m_context.sregs->regs.psw.addr
     }
 
     static void nvSigHandler(int sig, siginfo_t *info, void *secret)
     {
         void * pnt = callerAddress(secret);
 
         // Do something useful with siginfo_t
         if (sig == SIGSEGV) {
             if (pnt != NULL) nvDebug("Got signal %d, faulty address is %p, from %p\n", sig, info->si_addr, pnt);
             else nvDebug("Got signal %d, faulty address is %p\n", sig, info->si_addr);
         }
         else if(sig == SIGTRAP) {
             nvDebug("Breakpoint hit.\n");
         }
         else {
             nvDebug("Got signal %d\n", sig);
         }
 
 #if defined(HAVE_EXECINFO_H)
         if (hasStackTrace()) // in case of weak linking
         {
             void * trace[64];
             int size = backtrace(trace, 64);
 
             if (pnt != NULL) {
                 // Overwrite sigaction with caller's address.
                 trace[1] = pnt;
             }
 
             printStackTrace(trace, size, 1);
         }
 #endif // defined(HAVE_EXECINFO_H)
 
         exit(0);
     }
 
 #endif // defined(HAVE_SIGNAL_H)
 
 
 
 #if NV_OS_WIN32 //&& NV_CC_MSVC
 
     /** Win32 assert handler. */
     struct Win32AssertHandler : public AssertHandler 
     {
         // Flush the message queue. This is necessary for the message box to show up.
         static void flushMessageQueue()
         {
             MSG msg;
             while( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) {
                 //if( msg.message == WM_QUIT ) break;
                 TranslateMessage( &msg );
                 DispatchMessage( &msg );
             }
         }
 
         // Assert handler method.
         virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
         {
             int ret = NV_ABORT_EXIT;
 
             StringBuilder error_string;
             error_string.format("*** Assertion failed: %s\n    On file: %s\n    On line: %d\n", exp, file, line );
             if (func != NULL) {
                 error_string.appendFormat("    On function: %s\n", func);
             }
             if (msg != NULL) {
                 error_string.append("    Message: ");
                 va_list tmp;
                 va_copy(tmp, arg);
                 error_string.appendFormatList(msg, tmp);
                 va_end(tmp);
                 error_string.append("\n");
             }
             nvDebug( error_string.str() );
 
             // Print stack trace:
             debug::dumpInfo();
 
             if (debug::isDebuggerPresent()) {
                 return NV_ABORT_DEBUG;
             }
 
             if (s_interactive) {
                 flushMessageQueue();
                 int action = MessageBoxA(NULL, error_string.str(), "Assertion failed", MB_ABORTRETRYIGNORE | MB_ICONERROR | MB_TOPMOST);
                 switch( action ) {
                 case IDRETRY:
                     ret = NV_ABORT_DEBUG;
                     break;
                 case IDIGNORE:
                     ret = NV_ABORT_IGNORE;
                     break;
                 case IDABORT:
                 default:
                     ret = NV_ABORT_EXIT;
                     break;
                 }
                 /*if( _CrtDbgReport( _CRT_ASSERT, file, line, module, exp ) == 1 ) {
                     return NV_ABORT_DEBUG;
                 }*/
             }
 
             if (ret == NV_ABORT_EXIT) {
                 // Exit cleanly.
                 exit(EXIT_FAILURE + 1);
             }
 
             return ret;
         }
     };
 #elif NV_OS_XBOX
 
     /** Xbox360 assert handler. */
     struct Xbox360AssertHandler : public AssertHandler 
     {
         // Assert handler method.
         virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
         {
             int ret = NV_ABORT_EXIT;
 
             StringBuilder error_string;
             if( func != NULL ) {
                 error_string.format( "*** Assertion failed: %s\n    On file: %s\n    On function: %s\n    On line: %d\n ", exp, file, func, line );
                 nvDebug( error_string.str() );
             }
             else {
                 error_string.format( "*** Assertion failed: %s\n    On file: %s\n    On line: %d\n ", exp, file, line );
                 nvDebug( error_string.str() );
             }
 
             if (debug::isDebuggerPresent()) {
                 return NV_ABORT_DEBUG;
             }
 
             if( ret == NV_ABORT_EXIT ) {
                  // Exit cleanly.
                 exit(EXIT_FAILURE + 1);
             }
 
             return ret;
         }
     };
 #elif NV_OS_ORBIS
 
     /** Orbis assert handler. */
     struct OrbisAssertHandler : public AssertHandler
     {
         // Assert handler method.
         virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
         {
             if( func != NULL ) {
                 nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On function: %s\n    On line: %d\n ", exp, file, func, line );
             }
             else {
                 nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On line: %d\n ", exp, file, line );
             }
 
             //SBtodoORBIS print stack trace
             /*if (hasStackTrace())
             {
                 void * trace[64];
                 int size = backtrace(trace, 64);
                 printStackTrace(trace, size, 2);
             }*/
             
             if (debug::isDebuggerPresent())
                 return NV_ABORT_DEBUG;
 
             return NV_ABORT_IGNORE;
         }
     };
 
 #else
 
     /** Unix assert handler. */
     struct UnixAssertHandler : public AssertHandler
     {
         // Assert handler method.
         virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
         {
             int ret = NV_ABORT_EXIT;            
             
             if( func != NULL ) {
                 nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On function: %s\n    On line: %d\n ", exp, file, func, line );
             }
             else {
                 nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On line: %d\n ", exp, file, line );
             }
 
 #if _DEBUG
             if (debug::isDebuggerPresent()) {
                 return NV_ABORT_DEBUG;
             }
 #endif
 
 #if defined(HAVE_EXECINFO_H)
             if (hasStackTrace())
             {
                 void * trace[64];
                 int size = backtrace(trace, 64);
                 printStackTrace(trace, size, 2);
             }
 #endif
 
             if( ret == NV_ABORT_EXIT ) {
                 // Exit cleanly.
                 exit(EXIT_FAILURE + 1);
             }
             
             return ret;
         }
     };
 
 #endif
 
 } // namespace
 
 
 /// Handle assertion through the assert handler.
 int nvAbort(const char * exp, const char * file, int line, const char * func/*=NULL*/, const char * msg/*= NULL*/, ...)
 {
 #if NV_OS_WIN32 //&& NV_CC_MSVC
     static Win32AssertHandler s_default_assert_handler;
 #elif NV_OS_XBOX
     static Xbox360AssertHandler s_default_assert_handler;
 #elif NV_OS_ORBIS
     static OrbisAssertHandler s_default_assert_handler;
 #else
     static UnixAssertHandler s_default_assert_handler;
 #endif
 
     va_list arg;
     va_start(arg,msg);
 
     AssertHandler * handler = s_assert_handler != NULL ? s_assert_handler : &s_default_assert_handler;
     int result = handler->assertion(exp, file, line, func, msg, arg);
 
     va_end(arg);
 
     return result;
 }
 
 // Abnormal termination. Create mini dump and output call stack.
 void debug::terminate(int code)
 {
 #if NV_OS_WIN32
     EnterCriticalSection(&s_handler_critical_section);
 
     writeMiniDump(NULL);
 
     const int max_stack_size = 64;
     void * trace[max_stack_size];
     int size = backtrace(trace, max_stack_size);
 
     // @@ Use win32's CreateFile?
     FILE * fp = fileOpen("crash.txt", "wb");
     if (fp != NULL) {
         Array<const char *> lines;
         writeStackTrace(trace, size, 0, lines);
 
         for (uint i = 0; i < lines.count(); i++) {
             fputs(lines[i], fp);
             delete lines[i];
         }
 
         // @@ Add more info to crash.txt?
 
         fclose(fp);
     }
 
     LeaveCriticalSection(&s_handler_critical_section);
 #endif
 
     exit(code);
 }
 
 
 /// Shows a message through the message handler.
 void NV_CDECL nvDebugPrint(const char *msg, ...)
 {
     va_list arg;
     va_start(arg,msg);
     if (s_message_handler != NULL) {
         s_message_handler->log( msg, arg );
     }
     va_end(arg);
 }
 
 
 /// Dump debug info.
 void debug::dumpInfo()
 {
 #if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H))
     if (hasStackTrace())
     {
         void * trace[64];
         int size = backtrace(trace, 64);
 
         nvDebug( "\nDumping stacktrace:\n" );
 
         Array<const char *> lines;
         writeStackTrace(trace, size, 1, lines);
 
         for (uint i = 0; i < lines.count(); i++) {
             nvDebug("%s", lines[i]);
             delete lines[i];
         }
     }
 #endif
 }
 
 /// Dump callstack using the specified handler.
 void debug::dumpCallstack(MessageHandler *messageHandler, int callstackLevelsToSkip /*= 0*/)
 {
 #if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H))
     if (hasStackTrace())
     {
         void * trace[64];
         int size = backtrace(trace, 64);
 
         Array<const char *> lines;
         writeStackTrace(trace, size, callstackLevelsToSkip + 1, lines);     // + 1 to skip the call to dumpCallstack
 
         for (uint i = 0; i < lines.count(); i++) {
 #if NV_CPU_ARM || NV_CPU_AARCH64 || NV_OS_DARWIN || NV_OS_IOS
             va_list empty_va_list = {};
             messageHandler->log(lines[i], empty_va_list);
 #else
             messageHandler->log(lines[i], NULL);
 #endif
             delete lines[i];
         }
     }
 #endif
 }
 
 
 /// Set the debug message handler.
 void debug::setMessageHandler(MessageHandler * message_handler)
 {
     s_message_handler = message_handler;
 }
 
 /// Reset the debug message handler.
 void debug::resetMessageHandler()
 {
     s_message_handler = NULL;
 }
 
 /// Set the assert handler.
 void debug::setAssertHandler(AssertHandler * assert_handler)
 {
     s_assert_handler = assert_handler;
 }
 
 /// Reset the assert handler.
 void debug::resetAssertHandler()
 {
     s_assert_handler = NULL;
 }
 
 #if NV_OS_WIN32
 #if NV_USE_SEPARATE_THREAD
 
 static void initHandlerThread()
 {
     static const int kExceptionHandlerThreadInitialStackSize = 64 * 1024;
 
     // Set synchronization primitives and the handler thread.  Each
     // ExceptionHandler object gets its own handler thread because that's the
     // only way to reliably guarantee sufficient stack space in an exception,
     // and it allows an easy way to get a snapshot of the requesting thread's
     // context outside of an exception.
     InitializeCriticalSection(&s_handler_critical_section);
     
     s_handler_start_semaphore = CreateSemaphore(NULL, 0, 1, NULL);
     nvDebugCheck(s_handler_start_semaphore != NULL);
 
     s_handler_finish_semaphore = CreateSemaphore(NULL, 0, 1, NULL);
     nvDebugCheck(s_handler_finish_semaphore != NULL);
 
     // Don't attempt to create the thread if we could not create the semaphores.
     if (s_handler_finish_semaphore != NULL && s_handler_start_semaphore != NULL) {
         DWORD thread_id;
         s_handler_thread = CreateThread(NULL,         // lpThreadAttributes
                                         kExceptionHandlerThreadInitialStackSize,
                                         ExceptionHandlerThreadMain,
                                         NULL,         // lpParameter
                                         0,            // dwCreationFlags
                                         &thread_id);
         nvDebugCheck(s_handler_thread != NULL);
     }
 
     /* @@ We should avoid loading modules in the exception handler!
     dbghelp_module_ = LoadLibrary(L"dbghelp.dll");
     if (dbghelp_module_) {
         minidump_write_dump_ = reinterpret_cast<MiniDumpWriteDump_type>(GetProcAddress(dbghelp_module_, "MiniDumpWriteDump"));
     }
     */
 }
 
 static void shutHandlerThread() {
     // @@ Free stuff. Terminate thread.
 }
 
 #endif // NV_USE_SEPARATE_THREAD
 #endif // NV_OS_WIN32
 
 
 // Enable signal handler.
 void debug::enableSigHandler(bool interactive)
 {
     nvCheck(s_sig_handler_enabled != true);
     s_sig_handler_enabled = true;
     s_interactive = interactive;
 
 #if NV_OS_WIN32 && NV_CC_MSVC
     if (interactive) {
         // Do not display message boxes on error.
         // http://msdn.microsoft.com/en-us/library/windows/desktop/ms680621(v=vs.85).aspx
         SetErrorMode(SEM_FAILCRITICALERRORS|SEM_NOGPFAULTERRORBOX|SEM_NOOPENFILEERRORBOX);
 
         // CRT reports errors to debug output only.
         // http://msdn.microsoft.com/en-us/library/1y71x448(v=vs.80).aspx
         _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_DEBUG);
         _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_DEBUG);
         _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_DEBUG);
     }
 
 
 #if NV_USE_SEPARATE_THREAD
     initHandlerThread();
 #endif
 
     s_old_exception_filter = ::SetUnhandledExceptionFilter( handleException );
 
 #if _MSC_VER >= 1400  // MSVC 2005/8
     _set_invalid_parameter_handler(handleInvalidParameter);
 #endif  // _MSC_VER >= 1400
 
     _set_purecall_handler(handlePureVirtualCall);
 
 
     // SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces
     SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_FAIL_CRITICAL_ERRORS|SYMOPT_LOAD_LINES|SYMOPT_UNDNAME);
 
     if (!SymInitialize(GetCurrentProcess(), NULL, TRUE)) {
         DWORD error = GetLastError();
         nvDebug("SymInitialize returned error : %d\n", error);
     }
 
 #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
 
     // Install our signal handler
     struct sigaction sa;
     sa.sa_sigaction = nvSigHandler;
     sigemptyset (&sa.sa_mask);
     sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
 
     sigaction(SIGSEGV, &sa, &s_old_sigsegv);
     sigaction(SIGTRAP, &sa, &s_old_sigtrap);
     sigaction(SIGFPE, &sa, &s_old_sigfpe);
     sigaction(SIGBUS, &sa, &s_old_sigbus);
 
 #endif
 }
 
 /// Disable signal handler.
 void debug::disableSigHandler()
 {
     nvCheck(s_sig_handler_enabled == true);
     s_sig_handler_enabled = false;
 
 #if NV_OS_WIN32 && NV_CC_MSVC
 
     ::SetUnhandledExceptionFilter( s_old_exception_filter );
     s_old_exception_filter = NULL;
 
     SymCleanup(GetCurrentProcess());
 
 #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
 
     sigaction(SIGSEGV, &s_old_sigsegv, NULL);
     sigaction(SIGTRAP, &s_old_sigtrap, NULL);
     sigaction(SIGFPE, &s_old_sigfpe, NULL);
     sigaction(SIGBUS, &s_old_sigbus, NULL);
 
 #endif
 }
 
 
 bool debug::isDebuggerPresent()
 {
 #if NV_OS_WIN32
     HINSTANCE kernel32 = GetModuleHandleA("kernel32.dll");
     if (kernel32) {
         FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent");
         if (IsDebuggerPresent != NULL && IsDebuggerPresent()) {
             return true;
         }
     }
     return false;
 #elif NV_OS_XBOX
 #ifdef _DEBUG
     return DmIsDebuggerPresent() == TRUE;
 #else
     return false;
 #endif
 #elif NV_OS_ORBIS
   #if PS4_FINAL_REQUIREMENTS
     return false; 
   #else
     return sceDbgIsDebuggerAttached() == 1;
   #endif
 #elif NV_OS_DARWIN
     int mib[4];
     struct kinfo_proc info;
     size_t size;
     mib[0] = CTL_KERN;
     mib[1] = KERN_PROC;
     mib[2] = KERN_PROC_PID;
     mib[3] = getpid();
     size = sizeof(info);
     info.kp_proc.p_flag = 0;
     sysctl(mib,4,&info,&size,NULL,0);
     return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
 #else
     // if ppid != sid, some process spawned our app, probably a debugger. 
     return getsid(getpid()) != getppid();
 #endif
 }
 
 bool debug::attachToDebugger()
 {
 #if NV_OS_WIN32
     if (isDebuggerPresent() == FALSE) {
         Path process(1024);
         process.copy("\"");
         GetSystemDirectoryA(process.str() + 1, 1024 - 1);
 
         process.appendSeparator();
 
         process.appendFormat("VSJitDebugger.exe\" -p %lu", ::GetCurrentProcessId());
 
         STARTUPINFOA sSi;
         memset(&sSi, 0, sizeof(sSi));
 
         PROCESS_INFORMATION sPi;
         memset(&sPi, 0, sizeof(sPi));
         
         BOOL b = CreateProcessA(NULL, process.str(), NULL, NULL, FALSE, 0, NULL, NULL, &sSi, &sPi);
         if (b != FALSE) {
             ::WaitForSingleObject(sPi.hProcess, INFINITE);
             
             DWORD dwExitCode;
             ::GetExitCodeProcess(sPi.hProcess, &dwExitCode);
             if (dwExitCode != 0) //if exit code is zero, a debugger was selected
                 b = FALSE;
         }
 
         if (sPi.hThread != NULL) ::CloseHandle(sPi.hThread);
         if (sPi.hProcess != NULL) ::CloseHandle(sPi.hProcess);
 
         if (b == FALSE)
             return false;
 
         for (int i = 0; i < 5*60; i++) {
             if (isDebuggerPresent())
                 break;
             ::Sleep(200);
         }
     }
 #endif // NV_OS_WIN32
 
     return true;
 }
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Debug.h	(revision 27270)
@@ -1,217 +1,217 @@
 // This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_DEBUG_H
 #define NV_CORE_DEBUG_H
 
 #include "nvcore.h"
 
 #include <stdarg.h> // va_list
 
 
 // Make sure we are using our assert.
 #undef assert
 
 #define NV_ABORT_DEBUG      1
 #define NV_ABORT_IGNORE     2
 #define NV_ABORT_EXIT       3
 
 #define nvNoAssert(exp) \
     NV_MULTI_LINE_MACRO_BEGIN \
     (void)sizeof(exp); \
     NV_MULTI_LINE_MACRO_END
 
 #if NV_NO_ASSERT
 
 #   define nvAssert(exp) nvNoAssert(exp)
 #   define nvCheck(exp) nvNoAssert(exp)
 #   define nvDebugAssert(exp) nvNoAssert(exp)
 #   define nvDebugCheck(exp) nvNoAssert(exp)
 #   define nvDebugBreak() nvNoAssert(0)
 
 #else // NV_NO_ASSERT
 
 #   if NV_CC_MSVC
         // @@ Does this work in msvc-6 and earlier?
 #       define nvDebugBreak()       __debugbreak()
 //#       define nvDebugBreak()        __asm { int 3 }
 #   elif NV_OS_ORBIS
 #       define nvDebugBreak()       __debugbreak()
 #   elif NV_CC_GNUC
 #       define nvDebugBreak()       __builtin_trap()
 #   else
 #       error "No nvDebugBreak()!"
 #   endif
 
 /*
 #   elif NV_CC_GNUC || NV_CPU_PPC && NV_OS_DARWIN
         // @@ Use __builtin_trap() on GCC
 #       define nvDebugBreak()       __asm__ volatile ("trap")
 #   elif (NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64) && NV_OS_DARWIN
 #       define nvDebugBreak()       __asm__ volatile ("int3")
 #   elif NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64
 #       define nvDebugBreak()       __asm__ ( "int %0" : :"I"(3) )
 #   else
 #       include <signal.h>
 #       define nvDebugBreak()       raise(SIGTRAP)
 #   endif
 */
 
 #define nvDebugBreakOnce() \
     NV_MULTI_LINE_MACRO_BEGIN \
     static bool firstTime = true; \
     if (firstTime) { firstTime = false; nvDebugBreak(); } \
     NV_MULTI_LINE_MACRO_END
 
 #define nvAssertMacro(exp) \
     NV_MULTI_LINE_MACRO_BEGIN \
     if (!(exp)) { \
         if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \
             nvDebugBreak(); \
         } \
     } \
     NV_MULTI_LINE_MACRO_END
 
 // GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care
 #define nvAssertMacroWithIgnoreAll(exp,...) \
     NV_MULTI_LINE_MACRO_BEGIN \
         static bool ignoreAll = false; \
         if (!ignoreAll && !(exp)) { \
             int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \
             if (result == NV_ABORT_DEBUG) { \
                 nvDebugBreak(); \
             } else if (result == NV_ABORT_IGNORE) { \
                 ignoreAll = true; \
             } \
         } \
     NV_MULTI_LINE_MACRO_END
 
 // Interesting assert macro from Insomniac:
 // http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to
 // Used as follows:
 // if (nvCheck(i < count)) {
 //     normal path
 // } else {
 //     fixup code.
 // }
 // This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely.
 #define nvCheckMacro(exp) \
     (\
         (exp) ? true : ( \
             (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \
         ) \
     )
 
 
 #define nvAssert(exp)    nvAssertMacro(exp)
 #define nvCheck(exp)     nvAssertMacro(exp)
 
 #if defined(_DEBUG)
 #   define nvDebugAssert(exp)   nvAssertMacro(exp)
 #   define nvDebugCheck(exp)    nvAssertMacro(exp)
 #else // _DEBUG
 #   define nvDebugAssert(exp)   nvNoAssert(exp)
 #   define nvDebugCheck(exp)    nvNoAssert(exp)
 #endif // _DEBUG
 
 #endif // NV_NO_ASSERT
 
 // Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc.
 /*#if !defined(_DEBUG)
 #   if NV_CC_MSVC
 #       define nvAssume(exp)    __assume(exp)
 #   else
 #       define nvAssume(exp)    nvCheck(exp)
 #   endif
 #else
 #   define nvAssume(exp)    nvCheck(exp)
 #endif*/
 
 #if defined(_DEBUG)
 #  if NV_CC_MSVC
 #   define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0)
 #  else
 #   define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable()
 #  endif
 #else
 #  if NV_CC_MSVC
 #   define nvUnreachable() __assume(0)
 #  else
 #   define nvUnreachable() __builtin_unreachable()
 #  endif
 #endif
 
 
 #define nvError(x)      nvAbort(x, __FILE__, __LINE__, __FUNC__)
 #define nvWarning(x)    nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x))
 
 #ifndef NV_DEBUG_PRINT
 #define NV_DEBUG_PRINT 1 //defined(_DEBUG)
 #endif
 
 #if NV_DEBUG_PRINT
 #define nvDebug(...)    nvDebugPrint(__VA_ARGS__)
 #else
 #if NV_CC_MSVC
 #define nvDebug(...)    __noop(__VA_ARGS__)
 #else
 #define nvDebug(...)    ((void)0) // Non-msvc platforms do not evaluate arguments?
 #endif
 #endif
 
 
 NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6)));
 NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
 
 namespace nv
 {
     inline bool isValidPtr(const void * ptr) {
-    #if NV_CPU_X86_64 || POSH_CPU_PPC64 || NV_CPU_AARCH64
+    #if NV_CPU_X86_64 || POSH_CPU_PPC64 || NV_CPU_AARCH64 || NV_CPU_E2K
         if (ptr == NULL) return true;
         if (reinterpret_cast<uint64>(ptr) < 0x10000ULL) return false;
         if (reinterpret_cast<uint64>(ptr) >= 0x000007FFFFFEFFFFULL) return false;
     #else
 	    if (reinterpret_cast<uint32>(ptr) == 0xcccccccc) return false;
 	    if (reinterpret_cast<uint32>(ptr) == 0xcdcdcdcd) return false;
 	    if (reinterpret_cast<uint32>(ptr) == 0xdddddddd) return false;
 	    if (reinterpret_cast<uint32>(ptr) == 0xffffffff) return false;
     #endif
         return true;
     }
 
     // Message handler interface.
     struct MessageHandler {
         virtual void log(const char * str, va_list arg) = 0;
         virtual ~MessageHandler() {}
     };
 
     // Assert handler interface.
     struct AssertHandler {
         virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0;
         virtual ~AssertHandler() {}
     };
 
 
     namespace debug
     {
         NVCORE_API void dumpInfo();
         NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 );
 
         NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
         NVCORE_API void resetMessageHandler();
 
         NVCORE_API void setAssertHandler( AssertHandler * assertHanlder );
         NVCORE_API void resetAssertHandler();
 
         NVCORE_API void enableSigHandler(bool interactive);
         NVCORE_API void disableSigHandler();
 
         NVCORE_API bool isDebuggerPresent();
         NVCORE_API bool attachToDebugger();
 
         NVCORE_API void terminate(int code);
     }
 
 } // nv namespace
 
 #endif // NV_CORE_DEBUG_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/Timer.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/Timer.h	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/Timer.h	(revision 27270)
@@ -1,53 +1,59 @@
 // This code is in the public domain -- castano@gmail.com
 
 #pragma once
 #ifndef NV_CORE_TIMER_H
 #define NV_CORE_TIMER_H
 
 #include "nvcore.h"
 
 #if NV_CC_MSVC
 #include <intrin.h>
 #endif
 
 namespace nv {
 
 #if NV_CC_MSVC
     NV_FORCEINLINE uint64 fastCpuClock() { return __rdtsc(); }
 #elif NV_CC_GNUC && NV_CPU_X86
     NV_FORCEINLINE uint64 fastCpuClock() {
         uint64 val;
         __asm__ volatile (".byte 0x0f, 0x31" : "=A" (val));
         return val;
     }
 #elif NV_CC_GNUC && NV_CPU_X86_64
     NV_FORCEINLINE uint64 fastCpuClock() {
         uint hi, lo;
         __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
         return uint64(lo) | (uint64(hi) << 32);
     }
+#elif NV_CPU_E2K
+    NV_FORCEINLINE uint64 fastCpuClock() {
+        uint64 val;
+        asm volatile("rrd %%clkr, %0" : "=r" (val));
+        return val;
+    }
 #else
     NV_FORCEINLINE uint64 fastCpuClock() { return 0; }    
 #endif
     
     uint64 systemClockFrequency();
     uint64 systemClock();
 
     class NVCORE_CLASS Timer
     {
     public:
         Timer() {}
 
         void start() { m_start = systemClock(); }
         void stop() { m_stop = systemClock(); }
 
         float elapsed() const { return float(m_stop - m_start) / systemClockFrequency(); }
 
     private:
         uint64 m_start;
         uint64 m_stop;
     };
 
 } // nv namespace
 
 #endif // NV_CORE_TIMER_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/src/src/nvcore/nvcore.h	(revision 27270)
@@ -1,307 +1,310 @@
 // This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
 
 #pragma once
 #ifndef NV_CORE_H
 #define NV_CORE_H
 
 // Function linkage
 #if NVCORE_SHARED
 #ifdef NVCORE_EXPORTS
 #define NVCORE_API DLL_EXPORT
 #define NVCORE_CLASS DLL_EXPORT_CLASS
 #else
 #define NVCORE_API DLL_IMPORT
 #define NVCORE_CLASS DLL_IMPORT
 #endif
 #else // NVCORE_SHARED
 #define NVCORE_API
 #define NVCORE_CLASS
 #endif // NVCORE_SHARED
 
 
 // Platform definitions
 #include <posh.h>
 
 // OS:
 // NV_OS_WIN32
 // NV_OS_WIN64
 // NV_OS_MINGW
 // NV_OS_CYGWIN
 // NV_OS_LINUX
 // NV_OS_UNIX
 // NV_OS_DARWIN
 // NV_OS_XBOX
 // NV_OS_ORBIS
 // NV_OS_IOS
 
 #define NV_OS_STRING POSH_OS_STRING
 
 #if defined POSH_OS_LINUX
 #   define NV_OS_LINUX 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_ORBIS
 #   define NV_OS_ORBIS 1
 #elif defined POSH_OS_FREEBSD
 #   define NV_OS_FREEBSD 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_NETBSD
 #   define NV_OS_NETBSD 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_OPENBSD
 #   define NV_OS_OPENBSD 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_CYGWIN32
 #   define NV_OS_CYGWIN 1
 #elif defined POSH_OS_MINGW
 #   define NV_OS_MINGW 1
 #   define NV_OS_WIN32 1
 #elif defined POSH_OS_OSX
 #   define NV_OS_DARWIN 1
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_IOS
 #   define NV_OS_DARWIN 1 //ACS should we keep this on IOS?
 #   define NV_OS_UNIX 1
 #   define NV_OS_IOS 1
 #elif defined POSH_OS_UNIX
 #   define NV_OS_UNIX 1
 #elif defined POSH_OS_WIN64
 #   define NV_OS_WIN32 1
 #   define NV_OS_WIN64 1
 #elif defined POSH_OS_WIN32
 #   define NV_OS_WIN32 1
 #elif defined POSH_OS_XBOX
 #   define NV_OS_XBOX 1
 #else
 #   error "Unsupported OS"
 #endif
 
 
 // Threading:
 // some platforms don't implement __thread or similar for thread-local-storage
 #if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
 #   define NV_OS_USE_PTHREAD 1
 #   if NV_OS_DARWIN || NV_OS_IOS
 #       define NV_OS_HAS_TLS_QUALIFIER 0
 #   else
 #       define NV_OS_HAS_TLS_QUALIFIER 1
 #   endif
 #else
 #   define NV_OS_USE_PTHREAD 0
 #   define NV_OS_HAS_TLS_QUALIFIER 1
 #endif
 
 
 // CPUs:
 // NV_CPU_X86
 // NV_CPU_X86_64
 // NV_CPU_PPC
 // NV_CPU_ARM
 // NV_CPU_AARCH64
+// NV_CPU_E2K
 
 #define NV_CPU_STRING   POSH_CPU_STRING
 
 #if defined POSH_CPU_X86_64
 //#   define NV_CPU_X86 1
 #   define NV_CPU_X86_64 1
 #elif defined POSH_CPU_X86
 #   define NV_CPU_X86 1
 #elif defined POSH_CPU_PPC
 #   define NV_CPU_PPC 1
 #elif defined POSH_CPU_STRONGARM
 #   define NV_CPU_ARM 1
 #elif defined POSH_CPU_AARCH64
 #   define NV_CPU_AARCH64 1
+#elif defined POSH_CPU_E2K
+#   define NV_CPU_E2K 1
 #else
 #   error "Unsupported CPU"
 #endif
 
 
 // Compiler:
 // NV_CC_GNUC
 // NV_CC_MSVC
 // NV_CC_CLANG
 
 #if defined POSH_COMPILER_CLANG
 #   define NV_CC_CLANG  1
 #   define NV_CC_GNUC   1    // Clang is compatible with GCC.
 #   define NV_CC_STRING "clang"
 #elif defined POSH_COMPILER_GCC
 #   define NV_CC_GNUC   1
 #   define NV_CC_STRING "gcc"
 #elif defined POSH_COMPILER_MSVC
 #   define NV_CC_MSVC   1
 #   define NV_CC_STRING "msvc"
 #else
 #   error "Unsupported compiler"
 #endif
 
 #if NV_CC_MSVC
 #define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet.
 #else
 // @@ IC: This works in CLANG, about GCC?
 // @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4.
 #ifdef __clang__
 #define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert))
 #elif defined __GNUC__ 
 #define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
 #endif
 #endif
 
 // Endiannes:
 #define NV_LITTLE_ENDIAN    POSH_LITTLE_ENDIAN
 #define NV_BIG_ENDIAN       POSH_BIG_ENDIAN
 #define NV_ENDIAN_STRING    POSH_ENDIAN_STRING
 
 
 // Define the right printf prefix for size_t arguments:
 #if POSH_64BIT_POINTER
 #  define NV_SIZET_PRINTF_PREFIX POSH_I64_PRINTF_PREFIX
 #else
 #  define NV_SIZET_PRINTF_PREFIX
 #endif
 
 
 // cmake config
 #include "nvconfig.h"
 
 
 // Type definitions:
 typedef posh_u8_t   uint8;
 typedef posh_i8_t   int8;
 
 typedef posh_u16_t  uint16;
 typedef posh_i16_t  int16;
 
 typedef posh_u32_t  uint32;
 typedef posh_i32_t  int32;
 
 typedef posh_u64_t  uint64;
 typedef posh_i64_t  int64;
 
 // Aliases
 typedef uint32      uint;
 
 
 // Version string:
 #define NV_VERSION_STRING \
     NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \
     NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
 
 
 // Disable copy constructor and assignment operator. 
 #if NV_CC_CPP11
 #define NV_FORBID_COPY(C) \
     C( const C & ) = delete; \
     C &operator=( const C & ) = delete
 #else
 #define NV_FORBID_COPY(C) \
     private: \
     C( const C & ); \
     C &operator=( const C & )
 #endif
 
 // Disable dynamic allocation on the heap. 
 // See Prohibiting Heap-Based Objects in More Effective C++.
 #define NV_FORBID_HEAPALLOC() \
     private: \
     void *operator new(size_t size); \
     void *operator new[](size_t size)
     //static void *operator new(size_t size); \
     //static void *operator new[](size_t size);
 
 // String concatenation macros.
 #define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
 #define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
 #define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
 #define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
 #define NV_STRING2(x) #x
 #define NV_STRING(x) NV_STRING2(x)
 
 #if NV_CC_MSVC
 #define NV_MULTI_LINE_MACRO_BEGIN do {  
 #define NV_MULTI_LINE_MACRO_END \
     __pragma(warning(push)) \
     __pragma(warning(disable:4127)) \
     } while(false) \
     __pragma(warning(pop))  
 #else
 #define NV_MULTI_LINE_MACRO_BEGIN do {
 #define NV_MULTI_LINE_MACRO_END } while(false)
 #endif
 
 #if NV_CC_CPP11
 #define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed")
 #else
 #define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
 #endif
 #define NV_COMPILER_CHECK(x) nvStaticCheck(x)   // I like this name best.
 
 // Make sure type definitions are fine.
 NV_COMPILER_CHECK(sizeof(int8) == 1);
 NV_COMPILER_CHECK(sizeof(uint8) == 1);
 NV_COMPILER_CHECK(sizeof(int16) == 2);
 NV_COMPILER_CHECK(sizeof(uint16) == 2);
 NV_COMPILER_CHECK(sizeof(int32) == 4);
 NV_COMPILER_CHECK(sizeof(uint32) == 4);
 NV_COMPILER_CHECK(sizeof(int32) == 4);
 NV_COMPILER_CHECK(sizeof(uint32) == 4);
 
 
 #define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
 
 #if 0 // Disabled in The Witness.
 #if NV_CC_MSVC
 #define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x)
 #else
 #define NV_MESSAGE(x) message(x)
 #endif
 #else
 #define NV_MESSAGE(x) 
 #endif
 
 
 // Startup initialization macro.
 #define NV_AT_STARTUP(some_code) \
     namespace { \
         static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \
             NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \
         } \
         NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
     }
 
 // Indicate the compiler that the parameter is not used to suppress compier warnings.
 #define NV_UNUSED(a) ((a)=(a))
 
 // Null index. @@ Move this somewhere else... it's only used by nvmesh.
 //const unsigned int NIL = unsigned int(~0);
 //#define NIL uint(~0)
 
 // Null pointer.
 #ifndef NULL
 #define NULL 0
 #endif
 
 // Platform includes
 #if NV_CC_MSVC
 #   if NV_OS_WIN32
 #       include "DefsVcWin32.h"
 #   elif NV_OS_XBOX
 #       include "DefsVcXBox.h"
 #   else
 #       error "MSVC: Platform not supported"
 #   endif
 #elif NV_CC_GNUC
 #   if NV_OS_LINUX
 #       include "DefsGnucLinux.h"
 #   elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD
 #       include "DefsGnucDarwin.h"
 #   elif NV_OS_MINGW
 #       include "DefsGnucWin32.h"
 #   elif NV_OS_CYGWIN
 #       error "GCC: Cygwin not supported"
 #   else
 #       error "GCC: Platform not supported"
 #   endif
 #endif
 
 #endif // NV_CORE_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/src/src/nvmath/Half.cpp	(revision 27270)
@@ -1,789 +1,789 @@
 // Branch-free implementation of half-precision (16 bit) floating point
 // Copyright 2006 Mike Acton <macton@gmail.com>
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a 
 // copy of this software and associated documentation files (the "Software"),
 // to deal in the Software without restriction, including without limitation
 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 // and/or sell copies of the Software, and to permit persons to whom the 
 // Software is furnished to do so, subject to the following conditions:
 // 
 // The above copyright notice and this permission notice shall be included 
 // in all copies or substantial portions of the Software.
 // 
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE
 //
 // Half-precision floating point format
 // ------------------------------------
 //
 //   | Field    | Last | First | Note
 //   |----------|------|-------|----------
 //   | Sign     | 15   | 15    |
 //   | Exponent | 14   | 10    | Bias = 15
 //   | Mantissa | 9    | 0     |
 //
 // Compiling
 // ---------
 //
 //  Preferred compile flags for GCC: 
 //     -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
 //
 //     This file is a C99 source file, intended to be compiled with a C99 
 //     compliant compiler. However, for the moment it remains combatible
 //     with C++98. Therefore if you are using a compiler that poorly implements
 //     C standards (e.g. MSVC), it may be compiled as C++. This is not
 //     guaranteed for future versions. 
 //
 // Features
 // --------
 //
 //  * QNaN + <x>  = QNaN
 //  * <x>  + +INF = +INF
 //  * <x>  - -INF = -INF
 //  * INF  - INF  = SNaN
 //  * Denormalized values
 //  * Difference of ZEROs is always +ZERO
 //  * Sum round with guard + round + sticky bit (grs)
 //  * And of course... no branching
 // 
 // Precision of Sum
 // ----------------
 //
 //  (SUM)        uint16 z = half_add( x, y );
 //  (DIFFERENCE) uint16 z = half_add( x, -y );
 //
 //     Will have exactly (0 ulps difference) the same result as:
 //     (For 32 bit IEEE 784 floating point and same rounding mode)
 //
 //     union FLOAT_32
 //     {
 //       float    f32;
 //       uint32 u32;
 //     };
 //
 //     union FLOAT_32 fx = { .u32 = half_to_float( x ) };
 //     union FLOAT_32 fy = { .u32 = half_to_float( y ) };
 //     union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32    };
 //     uint16       z  = float_to_half( fz );
 //
 
 #include "Half.h"
 #include <stdio.h>
 
 
 // Load immediate
 static inline uint32 _uint32_li( uint32 a )
 {
     return (a);
 }
 
 // Decrement
 static inline uint32 _uint32_dec( uint32 a )
 {
     return (a - 1);
 }
 
 // Increment
 static inline uint32 _uint32_inc( uint32 a )
 {
   return (a + 1);
 }
 
 // Complement
 static inline uint32 _uint32_not( uint32 a )
 {
     return (~a);
 }
 
 // Negate
 static inline uint32 _uint32_neg( uint32 a )
 {
 #pragma warning(disable : 4146)     // unary minus operator applied to unsigned type, result still unsigned
     return (-a);
 #pragma warning(default : 4146)
 }
 
 // Extend sign
 static inline uint32 _uint32_ext( uint32 a )
 {
     return (((int32)a)>>31);
 }
 
 // And
 static inline uint32 _uint32_and( uint32 a, uint32 b )
 {
     return (a & b);
 }
 
 // And with Complement
 static inline uint32 _uint32_andc( uint32 a, uint32 b )
 {
     return (a & ~b);
 }
 
 // Or
 static inline uint32 _uint32_or( uint32 a, uint32 b )
 {
     return (a | b);
 }
 
 // Shift Right Logical
 static inline uint32 _uint32_srl( uint32 a, int sa )
 {
     return (a >> sa);
 }
 
 // Shift Left Logical
 static inline uint32 _uint32_sll( uint32 a, int sa )
 {
     return (a << sa);
 }
 
 // Add
 static inline uint32 _uint32_add( uint32 a, uint32 b )
 {
     return (a + b);
 }
 
 // Subtract
 static inline uint32 _uint32_sub( uint32 a, uint32 b )
 {
     return (a - b);
 }
 
 // Select on Sign bit
 static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b )
 {
     const uint32 mask   = _uint32_ext( test );
     const uint32 sel_a  = _uint32_and(  a,     mask  );
     const uint32 sel_b  = _uint32_andc( b,     mask  );
     const uint32 result = _uint32_or(   sel_a, sel_b );
 
     return (result);
 }
 
 // Load Immediate
 static inline uint16 _uint16_li( uint16 a )
 {
     return (a);
 }
 
 // Extend sign
 static inline uint16 _uint16_ext( uint16 a )
 {
     return (((int16)a)>>15);
 }
 
 // Negate
 static inline uint16 _uint16_neg( uint16 a )
 {
     return (-a);
 }
 
 // Complement
 static inline uint16 _uint16_not( uint16 a )
 {
     return (~a);
 }
 
 // Decrement
 static inline uint16 _uint16_dec( uint16 a )
 {
     return (a - 1);
 }
 
 // Shift Left Logical
 static inline uint16 _uint16_sll( uint16 a, int sa )
 {
     return (a << sa);
 }
 
 // Shift Right Logical
 static inline uint16 _uint16_srl( uint16 a, int sa )
 {
     return (a >> sa);
 }
 
 // Add
 static inline uint16 _uint16_add( uint16 a, uint16 b )
 {
     return (a + b);
 }
 
 // Subtract
 static inline uint16 _uint16_sub( uint16 a, uint16 b )
 {
     return (a - b);
 }
 
 // And
 static inline uint16 _uint16_and( uint16 a, uint16 b )
 {
     return (a & b);
 }
 
 // Or
 static inline uint16 _uint16_or( uint16 a, uint16 b )
 {
     return (a | b);
 }
 
 // Exclusive Or
 static inline uint16 _uint16_xor( uint16 a, uint16 b )
 {
     return (a ^ b);
 }
 
 // And with Complement
 static inline uint16 _uint16_andc( uint16 a, uint16 b )
 {
     return (a & ~b);
 }
 
 // And then Shift Right Logical
 static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa )
 {
     return ((a & b) >> sa);
 }
 
 // Shift Right Logical then Mask
 static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask )
 {
     return ((a >> sa) & mask);
 }
 
 // Add then Mask
 static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask )
 {
     return ((a + b) & mask);
 }
 
 
 // Select on Sign bit
 static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b )
 {
     const uint16 mask   = _uint16_ext( test );
     const uint16 sel_a  = _uint16_and(  a,     mask  );
     const uint16 sel_b  = _uint16_andc( b,     mask  );
     const uint16 result = _uint16_or(   sel_a, sel_b );
 
     return (result);
 }
 
 #if NV_OS_XBOX
 #include <PPCIntrinsics.h>
 #elif NV_CC_MSVC
 
 #include <intrin.h>
 #pragma intrinsic(_BitScanReverse)
 
 uint32 _uint32_nlz( uint32 x ) {
     unsigned long index;
     _BitScanReverse(&index, x);
     return 31 - index;
 }
 #endif
 
 
 // Count Leading Zeros
 static inline uint32 _uint32_cntlz( uint32 x )
 {
 #if NV_CC_GCC
     /* On PowerPC, this will map to insn: cntlzw */
     /* On Pentium, this will map to insn: clz    */
     uint32 is_x_nez_msb = _uint32_neg( x );
     uint32 nlz          = __builtin_clz( x );
     uint32 result       = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 );
     return (result);
 #elif NV_OS_XBOX
     // Xbox PPC has this as an intrinsic.
     return _CountLeadingZeros(x);
 #elif NV_CC_MSVC
     uint32 is_x_nez_msb = _uint32_neg( x );
     uint32 nlz          = _uint32_nlz( x );
     uint32 result       = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 );
     return (result);
 #else
     const uint32 x0  = _uint32_srl(  x,  1 );
     const uint32 x1  = _uint32_or(   x,  x0 );
     const uint32 x2  = _uint32_srl(  x1, 2 );
     const uint32 x3  = _uint32_or(   x1, x2 );
     const uint32 x4  = _uint32_srl(  x3, 4 );
     const uint32 x5  = _uint32_or(   x3, x4 );
     const uint32 x6  = _uint32_srl(  x5, 8 );
     const uint32 x7  = _uint32_or(   x5, x6 );
     const uint32 x8  = _uint32_srl(  x7, 16 );
     const uint32 x9  = _uint32_or(   x7, x8 );
     const uint32 xA  = _uint32_not(  x9 );
     const uint32 xB  = _uint32_srl(  xA, 1 );
     const uint32 xC  = _uint32_and(  xB, 0x55555555 );
     const uint32 xD  = _uint32_sub(  xA, xC );
     const uint32 xE  = _uint32_and(  xD, 0x33333333 );
     const uint32 xF  = _uint32_srl(  xD, 2 );
     const uint32 x10 = _uint32_and(  xF, 0x33333333 );
     const uint32 x11 = _uint32_add(  xE, x10 );
     const uint32 x12 = _uint32_srl(  x11, 4 );
     const uint32 x13 = _uint32_add(  x11, x12 );
     const uint32 x14 = _uint32_and(  x13, 0x0f0f0f0f );
     const uint32 x15 = _uint32_srl(  x14, 8 );
     const uint32 x16 = _uint32_add(  x14, x15 );
     const uint32 x17 = _uint32_srl(  x16, 16 );
     const uint32 x18 = _uint32_add(  x16, x17 );
     const uint32 x19 = _uint32_and(  x18, 0x0000003f );
     return ( x19 );
 #endif
 }
 
 // Count Leading Zeros
 static inline uint16 _uint16_cntlz( uint16 x )
 {
 #ifdef __GNUC__
     /* On PowerPC, this will map to insn: cntlzw */
     /* On Pentium, this will map to insn: clz    */
     uint16 nlz32 = (uint16)_uint32_cntlz( (uint32)x );
     uint32 nlz   = _uint32_sub( nlz32, 16 );
     return (nlz);
 #elif _NV_OS_XBOX_
     uint16 nlz32 = (uint16)_CountLeadingZeros( (uint32)x );
     return _uint32_sub( nlz32, 16);
 #else
     const uint16 x0  = _uint16_srl(  x,  1 );
     const uint16 x1  = _uint16_or(   x,  x0 );
     const uint16 x2  = _uint16_srl(  x1, 2 );
     const uint16 x3  = _uint16_or(   x1, x2 );
     const uint16 x4  = _uint16_srl(  x3, 4 );
     const uint16 x5  = _uint16_or(   x3, x4 );
     const uint16 x6  = _uint16_srl(  x5, 8 );
     const uint16 x7  = _uint16_or(   x5, x6 );
     const uint16 x8  = _uint16_not(  x7 );
     const uint16 x9  = _uint16_srlm( x8, 1, 0x5555 );
     const uint16 xA  = _uint16_sub(  x8, x9 );
     const uint16 xB  = _uint16_and(  xA, 0x3333 );
     const uint16 xC  = _uint16_srlm( xA, 2, 0x3333 );
     const uint16 xD  = _uint16_add(  xB, xC );
     const uint16 xE  = _uint16_srl(  xD, 4 );
     const uint16 xF  = _uint16_addm( xD, xE, 0x0f0f );
     const uint16 x10 = _uint16_srl(  xF, 8 );
     const uint16 x11 = _uint16_addm( xF, x10, 0x001f );
     return ( x11 );
 #endif
 }
 
 uint16
 nv::half_from_float( uint32 f )
 {
     const uint32 one                        = _uint32_li( 0x00000001 );
     const uint32 f_s_mask                   = _uint32_li( 0x80000000 );
     const uint32 f_e_mask                   = _uint32_li( 0x7f800000 );
     const uint32 f_m_mask                   = _uint32_li( 0x007fffff );
     const uint32 f_m_hidden_bit             = _uint32_li( 0x00800000 );
     const uint32 f_m_round_bit              = _uint32_li( 0x00001000 );
     const uint32 f_snan_mask                = _uint32_li( 0x7fc00000 );
     const uint32 f_e_pos                    = _uint32_li( 0x00000017 );
     const uint32 h_e_pos                    = _uint32_li( 0x0000000a );
     const uint32 h_e_mask                   = _uint32_li( 0x00007c00 );
     const uint32 h_snan_mask                = _uint32_li( 0x00007e00 );
     const uint32 h_e_mask_value             = _uint32_li( 0x0000001f );
     const uint32 f_h_s_pos_offset           = _uint32_li( 0x00000010 );
     const uint32 f_h_bias_offset            = _uint32_li( 0x00000070 );
     const uint32 f_h_m_pos_offset           = _uint32_li( 0x0000000d );
     const uint32 h_nan_min                  = _uint32_li( 0x00007c01 );
     const uint32 f_h_e_biased_flag          = _uint32_li( 0x0000008f );
     const uint32 f_s                        = _uint32_and( f,               f_s_mask         );
     const uint32 f_e                        = _uint32_and( f,               f_e_mask         );
     const uint16 h_s                        = _uint32_srl( f_s,             f_h_s_pos_offset );
     const uint32 f_m                        = _uint32_and( f,               f_m_mask         );
     const uint16 f_e_amount                 = _uint32_srl( f_e,             f_e_pos          );
     const uint32 f_e_half_bias              = _uint32_sub( f_e_amount,      f_h_bias_offset  );
     const uint32 f_snan                     = _uint32_and( f,               f_snan_mask      );
     const uint32 f_m_round_mask             = _uint32_and( f_m,             f_m_round_bit    );
     const uint32 f_m_round_offset           = _uint32_sll( f_m_round_mask,  one              );
     const uint32 f_m_rounded                = _uint32_add( f_m,             f_m_round_offset );
     const uint32 f_m_denorm_sa              = _uint32_sub( one,             f_e_half_bias    );
     const uint32 f_m_with_hidden            = _uint32_or(  f_m_rounded,     f_m_hidden_bit   );
     const uint32 f_m_denorm                 = _uint32_srl( f_m_with_hidden, f_m_denorm_sa    );
     const uint32 h_m_denorm                 = _uint32_srl( f_m_denorm,      f_h_m_pos_offset );
     const uint32 f_m_rounded_overflow       = _uint32_and( f_m_rounded,     f_m_hidden_bit   );
     const uint32 m_nan                      = _uint32_srl( f_m,             f_h_m_pos_offset );
     const uint32 h_em_nan                   = _uint32_or(  h_e_mask,        m_nan            );
     const uint32 h_e_norm_overflow_offset   = _uint32_inc( f_e_half_bias );
     const uint32 h_e_norm_overflow          = _uint32_sll( h_e_norm_overflow_offset, h_e_pos          );
     const uint32 h_e_norm                   = _uint32_sll( f_e_half_bias,            h_e_pos          );
     const uint32 h_m_norm                   = _uint32_srl( f_m_rounded,              f_h_m_pos_offset );
     const uint32 h_em_norm                  = _uint32_or(  h_e_norm,                 h_m_norm         );
     const uint32 is_h_ndenorm_msb           = _uint32_sub( f_h_bias_offset,   f_e_amount    );
     const uint32 is_f_e_flagged_msb         = _uint32_sub( f_h_e_biased_flag, f_e_half_bias );
     const uint32 is_h_denorm_msb            = _uint32_not( is_h_ndenorm_msb );
     const uint32 is_f_m_eqz_msb             = _uint32_dec( f_m   );
     const uint32 is_h_nan_eqz_msb           = _uint32_dec( m_nan );
     const uint32 is_f_inf_msb               = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb   );
     const uint32 is_f_nan_underflow_msb     = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb );
     const uint32 is_e_overflow_msb          = _uint32_sub( h_e_mask_value,     f_e_half_bias    );
     const uint32 is_h_inf_msb               = _uint32_or(  is_e_overflow_msb,  is_f_inf_msb     );
     const uint32 is_f_nsnan_msb             = _uint32_sub( f_snan,             f_snan_mask      );
     const uint32 is_m_norm_overflow_msb     = _uint32_neg( f_m_rounded_overflow );
     const uint32 is_f_snan_msb              = _uint32_not( is_f_nsnan_msb );
     const uint32 h_em_overflow_result       = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm                 );
     const uint32 h_em_nan_result            = _uint32_sels( is_f_e_flagged_msb,     h_em_nan,          h_em_overflow_result      );
     const uint32 h_em_nan_underflow_result  = _uint32_sels( is_f_nan_underflow_msb, h_nan_min,         h_em_nan_result           );
     const uint32 h_em_inf_result            = _uint32_sels( is_h_inf_msb,           h_e_mask,          h_em_nan_underflow_result );
     const uint32 h_em_denorm_result         = _uint32_sels( is_h_denorm_msb,        h_m_denorm,        h_em_inf_result           );
     const uint32 h_em_snan_result           = _uint32_sels( is_f_snan_msb,          h_snan_mask,       h_em_denorm_result        );
     const uint32 h_result                   = _uint32_or( h_s, h_em_snan_result );
 
     return (uint16)(h_result);
 }
 
 uint32 
 nv::half_to_float( uint16 h )
 {
     const uint32 h_e_mask              = _uint32_li( 0x00007c00 );
     const uint32 h_m_mask              = _uint32_li( 0x000003ff );
     const uint32 h_s_mask              = _uint32_li( 0x00008000 );
     const uint32 h_f_s_pos_offset      = _uint32_li( 0x00000010 );
     const uint32 h_f_e_pos_offset      = _uint32_li( 0x0000000d );
     const uint32 h_f_bias_offset       = _uint32_li( 0x0001c000 );
     const uint32 f_e_mask              = _uint32_li( 0x7f800000 );
     const uint32 f_m_mask              = _uint32_li( 0x007fffff );
     const uint32 h_f_e_denorm_bias     = _uint32_li( 0x0000007e );
     const uint32 h_f_m_denorm_sa_bias  = _uint32_li( 0x00000008 );
     const uint32 f_e_pos               = _uint32_li( 0x00000017 );
     const uint32 h_e_mask_minus_one    = _uint32_li( 0x00007bff );
     const uint32 h_e                   = _uint32_and( h, h_e_mask );
     const uint32 h_m                   = _uint32_and( h, h_m_mask );
     const uint32 h_s                   = _uint32_and( h, h_s_mask );
     const uint32 h_e_f_bias            = _uint32_add( h_e, h_f_bias_offset );
     const uint32 h_m_nlz               = _uint32_cntlz( h_m );
     const uint32 f_s                   = _uint32_sll( h_s,        h_f_s_pos_offset );
     const uint32 f_e                   = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
     const uint32 f_m                   = _uint32_sll( h_m,        h_f_e_pos_offset );
     const uint32 f_em                  = _uint32_or(  f_e,        f_m              );
     const uint32 h_f_m_sa              = _uint32_sub( h_m_nlz,             h_f_m_denorm_sa_bias );
     const uint32 f_e_denorm_unpacked   = _uint32_sub( h_f_e_denorm_bias,   h_f_m_sa             );
     const uint32 h_f_m                 = _uint32_sll( h_m,                 h_f_m_sa             );
     const uint32 f_m_denorm            = _uint32_and( h_f_m,               f_m_mask             );
     const uint32 f_e_denorm            = _uint32_sll( f_e_denorm_unpacked, f_e_pos              );
     const uint32 f_em_denorm           = _uint32_or(  f_e_denorm,          f_m_denorm           );
     const uint32 f_em_nan              = _uint32_or(  f_e_mask,            f_m                  );
     const uint32 is_e_eqz_msb          = _uint32_dec(  h_e );
     const uint32 is_m_nez_msb          = _uint32_neg(  h_m );
     const uint32 is_e_flagged_msb      = _uint32_sub(  h_e_mask_minus_one, h_e );
     const uint32 is_zero_msb           = _uint32_andc( is_e_eqz_msb,       is_m_nez_msb );
     const uint32 is_inf_msb            = _uint32_andc( is_e_flagged_msb,   is_m_nez_msb );
     const uint32 is_denorm_msb         = _uint32_and(  is_m_nez_msb,       is_e_eqz_msb );
     const uint32 is_nan_msb            = _uint32_and(  is_e_flagged_msb,   is_m_nez_msb ); 
     const uint32 is_zero               = _uint32_ext(  is_zero_msb );
     const uint32 f_zero_result         = _uint32_andc( f_em, is_zero );
     const uint32 f_denorm_result       = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
     const uint32 f_inf_result          = _uint32_sels( is_inf_msb,    f_e_mask,    f_denorm_result );
     const uint32 f_nan_result          = _uint32_sels( is_nan_msb,    f_em_nan,    f_inf_result    );
     const uint32 f_result              = _uint32_or( f_s, f_nan_result );
 
     return (f_result);
 }
 
 
-#if !NV_OS_IOS && (defined(__i386__) || defined(__x86_64__))
+#if !NV_OS_IOS && (defined(__i386__) || defined(__x86_64__) || defined(__e2k__))
 
 #if NV_CC_GNUC
 #if defined(__i386__)
 #include <x86intrin.h>
-#elif defined(__x86_64__)
+#elif defined(__x86_64__) || defined(__e2k__)
 #include <xmmintrin.h>
 #endif
 #endif
 
 #include "nvcore/Memory.h" // NV_ALIGN_16
 
 static __m128 half_to_float4_SSE2(__m128i h)
 {
 #define SSE_CONST4(name, val) static const NV_ALIGN_16 uint name[4] = { (val), (val), (val), (val) }
     
 #define CONST(name) *(const __m128i *)&name
 
     SSE_CONST4(mask_nosign,         0x7fff);
     SSE_CONST4(mask_justsign,       0x8000);
     SSE_CONST4(mask_shifted_exp,    0x7c00 << 13);
     SSE_CONST4(expadjust_normal,    (127 - 15) << 23);
     SSE_CONST4(expadjust_infnan,    (128 - 16) << 23);
     SSE_CONST4(expadjust_denorm,    1 << 23);
     SSE_CONST4(magic_denorm,        113 << 23);
 
     __m128i mnosign     = CONST(mask_nosign);
     __m128i expmant     = _mm_and_si128(mnosign, h);
     __m128i justsign    = _mm_and_si128(h, CONST(mask_justsign));
     __m128i mshiftexp   = CONST(mask_shifted_exp);
     __m128i eadjust     = CONST(expadjust_normal);
     __m128i shifted     = _mm_slli_epi32(expmant, 13);
     __m128i adjusted    = _mm_add_epi32(eadjust, shifted);
     __m128i justexp     = _mm_and_si128(shifted, mshiftexp);
 
     __m128i zero        = _mm_setzero_si128();
     __m128i b_isinfnan  = _mm_cmpeq_epi32(mshiftexp, justexp);
     __m128i b_isdenorm  = _mm_cmpeq_epi32(zero, justexp);
 
     __m128i adj_infnan  = _mm_and_si128(b_isinfnan, CONST(expadjust_infnan));
     __m128i adjusted2   = _mm_add_epi32(adjusted, adj_infnan);
 
     __m128i adj_den     = CONST(expadjust_denorm);
     __m128i den1        = _mm_add_epi32(adj_den, adjusted2);
     __m128  den2        = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm);
     __m128  adjusted3   = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
     __m128  adjusted4   = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2));
     __m128  adjusted5   = _mm_or_ps(adjusted3, adjusted4);
     __m128i sign        = _mm_slli_epi32(justsign, 16);
     __m128  final       = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));
 
     // ~21 SSE2 ops.
     return final;
 
 #undef SSE_CONST4
 #undef CONST
 }
 
 
 void nv::half_to_float_array_SSE2(const uint16 * vin, float * vout, int count) {
     nvDebugCheck((intptr_t(vin) & 15) == 0);
     nvDebugCheck((intptr_t(vout) & 15) == 0);
     nvDebugCheck((count & 7) == 0);
 
     __m128i zero = _mm_setzero_si128();
 
     for (int i = 0; i < count; i += 8)
     {
         __m128i in = _mm_loadu_si128((const __m128i *)(vin + i));
         __m128i a = _mm_unpacklo_epi16(in, zero);
         __m128i b = _mm_unpackhi_epi16(in, zero);
         
         __m128 outa = half_to_float4_SSE2(a);
         _mm_storeu_ps((float *)(vout + i), outa);
         
         __m128 outb = half_to_float4_SSE2(b);
         _mm_storeu_ps((float *)(vout + i + 4), outb);
     }
 }
 
 #endif 
 
 
 // @@ These tables could be smaller.
 namespace nv {
     uint32 mantissa_table[2048] = { 0xDEADBEEF };
     uint32 exponent_table[64];
     uint32 offset_table[64];
 }
 
 void nv::half_init_tables()
 {
     // Init mantissa table.
     mantissa_table[0] = 0;
 
     // denormals
     for (int i = 1; i < 1024; i++) {
         uint m = i << 13;
         uint e = 0;
 
         while ((m & 0x00800000) == 0) {
             e -= 0x00800000;
             m <<= 1;
         }
         m &= ~0x00800000;
         e += 0x38800000;
         mantissa_table[i] = m | e;
     }
 
     // normals
     for (int i = 1024; i < 2048; i++) {
         mantissa_table[i] = (i - 1024) << 13;
     }
 
 
     // Init exponent table.
     exponent_table[0] = 0;
 
     for (int i = 1; i < 31; i++) {
         exponent_table[i] = 0x38000000 + (i << 23);
     }
 
     exponent_table[31] = 0x7f800000;
     exponent_table[32] = 0x80000000;
 
     for (int i = 33; i < 63; i++) {
         exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
     }
 
     exponent_table[63] = 0xff800000;
 
 
     // Init offset table.
     offset_table[0] = 0;
 
     for (int i = 1; i < 32; i++) {
         offset_table[i] = 1024;
     }
 
     offset_table[32] = 0;
 
     for (int i = 33; i < 64; i++) {
         offset_table[i] = 1024;
     }
 }
 
 // Fast half to float conversion based on:
 // http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
 uint32 nv::fast_half_to_float(uint16 h)
 {
 	// Initialize table if necessary.
 	if (mantissa_table[0] != 0)
 		half_init_tables();
 	uint exp = h >> 10;
 	return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
 }
 
 #if 0
 
 // Inaccurate conversion suggested at the ffmpeg mailing list:
 // http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2009-July/068949.html
 uint32 nv::fast_half_to_float(uint16 v)
 {
     if (v & 0x8000) return 0;
     uint exp = v >> 10;
     if (!exp) return (v>>9)&1;
     if (exp >= 15) return 0xffff;
     v <<= 6;
     return (v+(1<<16)) >> (15-exp);
 }
 
 #endif
 
 #if 0
 
 // Some more from a gamedev thread:
 // http://www.devmaster.net/forums/showthread.php?t=10924
 
 // I believe it does not handle specials either.
 
 // Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though.
 
 
 static __declspec(align(16)) unsigned half_sign[4]    = {0x00008000, 0x00008000, 0x00008000, 0x00008000};
 static __declspec(align(16)) unsigned half_exponent[4]    = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00};
 static __declspec(align(16)) unsigned half_mantissa[4]    = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF};
 static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000};
 
 __asm
 {
     movaps  xmm1, xmm0  // Input in xmm0
     movaps  xmm2, xmm0
 
     andps   xmm0, half_sign
     andps   xmm1, half_exponent
     andps   xmm2, half_mantissa
     paddd   xmm1, half_bias_offset
 
     pslld   xmm0, 16
     pslld   xmm1, 13
     pslld   xmm2, 13
 
     orps    xmm1, xmm2
     orps    xmm0, xmm1  // Result in xmm0
 }
 
 
 #endif
 
 #if 0
 // These version computes the tables at compile time:
 // http://gamedev.stackexchange.com/questions/17326/conversion-of-a-number-from-single-precision-floating-point-representation-to-a
 
 /* This method is faster than the OpenEXR implementation (very often
  * used, eg. in Ogre), with the additional benefit of rounding, inspired
  * by James Tursa's half-precision code. */
 static inline uint16_t float_to_half_branch(uint32_t x)
 {
     uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */
     uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
     unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */
 
     /* If zero, or denormal, or exponent underflows too much for a denormal
      * half, return signed zero. */
     if (e < 103)
         return bits;
 
     /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
     if (e > 142)
     {
         bits |= 0x7c00u;
         /* If exponent was 0xff and one mantissa bit was set, it means NaN,
          * not Inf, so make sure we set one mantissa bit too. */
         bits |= e == 255 && (x & 0x007fffffu);
         return bits;
     }
 
     /* If exponent underflows but not too much, return a denormal */
     if (e < 113)
     {
         m |= 0x0800u;
         /* Extra rounding may overflow and set mantissa to 0 and exponent
          * to 1, which is OK. */
         bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
         return bits;
     }
 
     bits |= ((e - 112) << 10) | (m >> 1);
     /* Extra rounding. An overflow will set mantissa to 0 and increment
      * the exponent, which is OK. */
     bits += m & 1;
     return bits;
 }
 
 /* These macros implement a finite iterator useful to build lookup
  * tables. For instance, S64(0) will call S1(x) for all values of x
  * between 0 and 63.
  * Due to the exponential behaviour of the calls, the stress on the
  * compiler may be important. */
 #define S4(x)    S1((x)),   S1((x)+1),     S1((x)+2),     S1((x)+3)
 #define S16(x)   S4((x)),   S4((x)+4),     S4((x)+8),     S4((x)+12)
 #define S64(x)   S16((x)),  S16((x)+16),   S16((x)+32),   S16((x)+48)
 #define S256(x)  S64((x)),  S64((x)+64),   S64((x)+128),  S64((x)+192)
 #define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768)
 
 /* Lookup table-based algorithm from "Fast Half Float Conversions"
  * by Jeroen van der Zijp, November 2008. No rounding is performed,
  * and some NaN values may be incorrectly converted to Inf. */
 static inline uint16_t float_to_half_nobranch(uint32_t x)
 {
     static uint16_t const basetable[512] =
     {
 #define S1(i) (((i) < 103) ? 0x0000 : \
                ((i) < 113) ? 0x0400 >> (113 - (i)) : \
                ((i) < 143) ? ((i) - 112) << 10 : 0x7c00)
         S256(0),
 #undef S1
 #define S1(i) (0x8000 | (((i) < 103) ? 0x0000 : \
                          ((i) < 113) ? 0x0400 >> (113 - (i)) : \
                          ((i) < 143) ? ((i) - 112) << 10 : 0x7c00))
         S256(0),
 #undef S1
     };
 
     static uint8_t const shifttable[512] =
     {
 #define S1(i) (((i) < 103) ? 24 : \
                ((i) < 113) ? 126 - (i) : \
                ((i) < 143 || (i) == 255) ? 13 : 24)
         S256(0), S256(0),
 #undef S1
     };
 
     uint16_t bits = basetable[(x >> 23) & 0x1ff];
     bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff];
     return bits;
 }
 #endif
Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/ftoi.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvmath/ftoi.h	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/src/src/nvmath/ftoi.h	(revision 27270)
@@ -1,256 +1,256 @@
 // This code is in the public domain -- castano@gmail.com
 
 #pragma once
 #ifndef NV_MATH_FTOI_H
 #define NV_MATH_FTOI_H
 
 #include "nvmath/nvmath.h"
 
 #include <math.h>
 
 namespace nv
 {
     // Optimized float to int conversions. See:
     // http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html
     // http://www.stereopsis.com/sree/fpu2006.html
     // http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/
     // http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
 
 
     union DoubleAnd64 {
         uint64    i;
         double    d;
     };
 
     static const double floatutil_xs_doublemagic = (6755399441055744.0);                            // 2^52 * 1.5
     static const double floatutil_xs_doublemagicdelta = (1.5e-8);                                   // almost .5f = .5f + 1e^(number of exp bit)
     static const double floatutil_xs_doublemagicroundeps = (0.5f - floatutil_xs_doublemagicdelta);  // almost .5f = .5f - 1e^(number of exp bit)
 
     NV_FORCEINLINE int ftoi_round_xs(double val, double magic) {
 #if 1
         DoubleAnd64 dunion;
         dunion.d = val + magic;
         return (int32) dunion.i; // just cast to grab the bottom bits
 #else
         val += magic;
         return ((int*)&val)[0]; // @@ Assumes little endian.
 #endif
     }
 
     NV_FORCEINLINE int ftoi_round_xs(float val) {
         return ftoi_round_xs(val, floatutil_xs_doublemagic);
     }
 
     NV_FORCEINLINE int ftoi_floor_xs(float val) {
         return ftoi_round_xs(val - floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
     }
 
     NV_FORCEINLINE int ftoi_ceil_xs(float val) {
         return ftoi_round_xs(val + floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
     }
 
     NV_FORCEINLINE int ftoi_trunc_xs(float val) {
         return (val<0) ? ftoi_ceil_xs(val) : ftoi_floor_xs(val);
     }
 
-#if NV_CPU_X86 || NV_CPU_X86_64
+#if NV_CPU_X86 || NV_CPU_X86_64 || NV_CPU_E2K
 
     NV_FORCEINLINE int ftoi_round_sse(float f) {
         return _mm_cvt_ss2si(_mm_set_ss(f));
     }
 
     NV_FORCEINLINE int ftoi_trunc_sse(float f) {
       return _mm_cvtt_ss2si(_mm_set_ss(f));
     }
 
 #endif
 
 
 
 #if NV_USE_SSE
 
     NV_FORCEINLINE int ftoi_round(float val) {
         return ftoi_round_sse(val);
     }
 
     NV_FORCEINLINE int ftoi_trunc(float f) {
       return ftoi_trunc_sse(f);
     }
 
     // We can probably do better than this. See for example:
     // http://dss.stephanierct.com/DevBlog/?p=8
     NV_FORCEINLINE int ftoi_floor(float val) {
         return ftoi_round(floorf(val));
     }
 
     NV_FORCEINLINE int ftoi_ceil(float val) {
         return ftoi_round(ceilf(val));
     }
 
 #else
 
     // In theory this should work with any double floating point math implementation, but it appears that MSVC produces incorrect code
     // when SSE2 is targeted and fast math is enabled (/arch:SSE2 & /fp:fast). These problems go away with /fp:precise, which is the default mode.
 
     NV_FORCEINLINE int ftoi_round(float val) {
         return ftoi_round_xs(val);
     }
 
     NV_FORCEINLINE int ftoi_floor(float val) {
         return ftoi_floor_xs(val);
     }
 
     NV_FORCEINLINE int ftoi_ceil(float val) {
         return ftoi_ceil_xs(val);
     }
 
     NV_FORCEINLINE int ftoi_trunc(float f) {
       return ftoi_trunc_xs(f);
     }
 
 #endif
 
 
     inline void test_ftoi() {
 
         // Round to nearest integer.
         nvCheck(ftoi_round(0.1f) == 0);
         nvCheck(ftoi_round(0.6f) == 1);
         nvCheck(ftoi_round(-0.2f) == 0);
         nvCheck(ftoi_round(-0.7f) == -1);
         nvCheck(ftoi_round(10.1f) == 10);
         nvCheck(ftoi_round(10.6f) == 11);
         nvCheck(ftoi_round(-90.1f) == -90);
         nvCheck(ftoi_round(-90.6f) == -91);
 
         nvCheck(ftoi_round(0) == 0);
         nvCheck(ftoi_round(1) == 1);
         nvCheck(ftoi_round(-1) == -1);
         
         nvCheck(ftoi_round(0.5f) == 0);  // How are midpoints rounded? Bankers rounding.
         nvCheck(ftoi_round(1.5f) == 2);
         nvCheck(ftoi_round(2.5f) == 2);
         nvCheck(ftoi_round(3.5f) == 4);
         nvCheck(ftoi_round(4.5f) == 4);
         nvCheck(ftoi_round(-0.5f) == 0);
         nvCheck(ftoi_round(-1.5f) == -2);
                 
 
         // Truncation (round down if > 0, round up if < 0).
         nvCheck(ftoi_trunc(0.1f) == 0);
         nvCheck(ftoi_trunc(0.6f) == 0);
         nvCheck(ftoi_trunc(-0.2f) == 0);
         nvCheck(ftoi_trunc(-0.7f) == 0);    // @@ When using /arch:SSE2 in Win32, msvc produce wrong code for this one. It is skipping the addition.
         nvCheck(ftoi_trunc(1.99f) == 1);
         nvCheck(ftoi_trunc(-1.2f) == -1);
 
         // Floor (round down).
         nvCheck(ftoi_floor(0.1f) == 0);
         nvCheck(ftoi_floor(0.6f) == 0);
         nvCheck(ftoi_floor(-0.2f) == -1);
         nvCheck(ftoi_floor(-0.7f) == -1);
         nvCheck(ftoi_floor(1.99f) == 1);
         nvCheck(ftoi_floor(-1.2f) == -2);
 
         nvCheck(ftoi_floor(0) == 0);
         nvCheck(ftoi_floor(1) == 1);
         nvCheck(ftoi_floor(-1) == -1);
         nvCheck(ftoi_floor(2) == 2);
         nvCheck(ftoi_floor(-2) == -2);
 
         // Ceil (round up).
         nvCheck(ftoi_ceil(0.1f) == 1);
         nvCheck(ftoi_ceil(0.6f) == 1);
         nvCheck(ftoi_ceil(-0.2f) == 0);
         nvCheck(ftoi_ceil(-0.7f) == 0);
         nvCheck(ftoi_ceil(1.99f) == 2);
         nvCheck(ftoi_ceil(-1.2f) == -1);
 
         nvCheck(ftoi_ceil(0) == 0);
         nvCheck(ftoi_ceil(1) == 1);
         nvCheck(ftoi_ceil(-1) == -1);
         nvCheck(ftoi_ceil(2) == 2);
         nvCheck(ftoi_ceil(-2) == -2);
     }
 
 
 
 
 
     // Safe versions using standard casts.
 
     inline int iround(float f)
     {
         return int(floorf(f + 0.5f));
     }
 
     inline int iround(double f)
     {
         return int(::floor(f + 0.5));
     }
 
     inline int ifloor(float f)
     {
         return int(floorf(f));
     }
 
     inline int iceil(float f)
     {
         return int(ceilf(f));
     }
 
 
 
     // I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.
     // Some notes from cbloom: http://cbloomrants.blogspot.com/2011/07/07-26-11-pixel-int-to-float-options.html
 
     // Quantize a float in the [0,1] range, using exact end points or uniform bins.
     inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) {
         nvDebugCheck(bits <= 16);
 
         float range = float(1 << bits);
         if (exactEndPoints) {
             return floorf(x * (range-1) + 0.5f) / (range-1);
         }
         else {
             return (floorf(x * range) + 0.5f) / range;
         }
     }
 
 
     // This is the most common rounding mode:
     // 
     //   0     1       2     3
     // |___|_______|_______|___|
     // 0                       1
     //
     // You get that if you take the unit floating point number multiply by 'N-1' and round to nearest. That is, `i = round(f * (N-1))`.
     // You reconstruct the original float dividing by 'N-1': `f = i / (N-1)`
 
 
     //    0     1     2     3
     // |_____|_____|_____|_____|
     // 0                       1
 
     /*enum BinningMode {
         RoundMode_ExactEndPoints,       
         RoundMode_UniformBins,
     };*/
 
     template <int N>
     inline uint unitFloatToFixed(float f) {
         return ftoi_round(f * ((1<<N)-1));
     }
 
     inline uint8 unitFloatToFixed8(float f) {
         return (uint8)unitFloatToFixed<8>(f);
     }
 
     inline uint16 unitFloatToFixed16(float f) {
         return (uint16)unitFloatToFixed<16>(f);
     }
 
 
 } // nv
 
 #endif // NV_MATH_FTOI_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvmath/nvmath.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvmath/nvmath.h	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/src/src/nvmath/nvmath.h	(revision 27270)
@@ -1,330 +1,339 @@
 // This code is in the public domain -- castanyo@yahoo.es
 
 #pragma once
 #ifndef NV_MATH_H
 #define NV_MATH_H
 
 #include "nvcore/nvcore.h"
 #include "nvcore/Debug.h"   // nvDebugCheck
 #include "nvcore/Utils.h"   // max, clamp
 
 #include <math.h>
 
 #if NV_OS_WIN32 || NV_OS_XBOX
 #include <float.h>  // finite, isnan
 #endif
 
-#if NV_CPU_X86 || NV_CPU_X86_64
+#if NV_CPU_X86 || NV_CPU_X86_64 || NV_CPU_E2K
     //#include <intrin.h>
     #include <xmmintrin.h>
 #endif
 
 
 
 // Function linkage
 #if NVMATH_SHARED
 #ifdef NVMATH_EXPORTS
 #define NVMATH_API DLL_EXPORT
 #define NVMATH_CLASS DLL_EXPORT_CLASS
 #else
 #define NVMATH_API DLL_IMPORT
 #define NVMATH_CLASS DLL_IMPORT
 #endif
 #else // NVMATH_SHARED
 #define NVMATH_API
 #define NVMATH_CLASS
 #endif // NVMATH_SHARED
 
 // Set some reasonable defaults.
 #ifndef NV_USE_ALTIVEC
 #   define NV_USE_ALTIVEC NV_CPU_PPC
 //#   define NV_USE_ALTIVEC defined(__VEC__)
 #endif
 
 #ifndef NV_USE_SSE
 #   if NV_CPU_X86_64
         // x64 always supports at least SSE2
 #       define NV_USE_SSE 2
 #   elif NV_CC_MSVC && defined(_M_IX86_FP)
         // Also on x86 with the /arch:SSE flag in MSVC.
 #       define NV_USE_SSE _M_IX86_FP       // 1=SSE, 2=SS2
+#   elif NV_CPU_E2K
+        // mcst-lcc compiler flags determine e2k CPU features
+#       if defined(__SSE2__)
+#              define NV_USE_SSE 2
+#       elif defined(__SSE__)
+#              define NV_USE_SSE 1
+#       else
+#              define NV_USE_SSE 0
+#       endif
 #   elif defined(__SSE__)
 #       define NV_USE_SSE 1
 #   elif defined(__SSE2__)
 #       define NV_USE_SSE 2
 #   else
         // Otherwise we assume no SSE.
 #       define NV_USE_SSE 0
 #   endif
 #endif
 
 
 // Internally set NV_USE_SIMD when either altivec or sse is available.
 #if NV_USE_ALTIVEC && NV_USE_SSE
 #	error "Cannot enable both altivec and sse!"
 #endif
 
 
 
 #ifndef PI
 #define PI                  float(3.1415926535897932384626433833)
 #endif
 
 #define NV_EPSILON          (0.0001f)
 #define NV_NORMAL_EPSILON   (0.001f)
 
 /*
 #define SQ(r)               ((r)*(r))
 
 #define SIGN_BITMASK        0x80000000
 
 /// Integer representation of a floating-point value.
 #define IR(x)               ((uint32 &)(x))
 
 /// Absolute integer representation of a floating-point value
 #define AIR(x)              (IR(x) & 0x7fffffff)
 
 /// Floating-point representation of an integer value.
 #define FR(x)               ((float&)(x))
 
 /// Integer-based comparison of a floating point value.
 /// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
 #define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
 */
 
 extern "C" inline double sqrt_assert(const double f)
 {
     nvDebugCheck(f >= 0.0f);
     return sqrt(f);
 }
 
 inline float sqrtf_assert(const float f)
 {
     nvDebugCheck(f >= 0.0f);
     return sqrtf(f);
 }
 
 extern "C" inline double acos_assert(const double f) 
 {
     nvDebugCheck(f >= -1.0f && f <= 1.0f);
     return acos(f);
 }
 
 inline float acosf_assert(const float f)
 {
     nvDebugCheck(f >= -1.0f && f <= 1.0f);
     return acosf(f);
 }
 
 extern "C" inline double asin_assert(const double f)
 {
     nvDebugCheck(f >= -1.0f && f <= 1.0f);
     return asin(f);
 }
 
 inline float asinf_assert(const float f)
 {
     nvDebugCheck(f >= -1.0f && f <= 1.0f);
     return asinf(f);
 }
 
 // Replace default functions with asserting ones.
 #if !NV_CC_MSVC || (NV_CC_MSVC && (_MSC_VER < 1700))    // IC: Apparently this was causing problems in Visual Studio 2012. See Issue 194: https://code.google.com/p/nvidia-texture-tools/issues/detail?id=194
 #define sqrt sqrt_assert
 #define sqrtf sqrtf_assert
 #define acos acos_assert
 #define acosf acosf_assert
 #define asin asin_assert
 #define asinf asinf_assert
 #endif
 
 #if NV_CC_MSVC
 NV_FORCEINLINE float log2f(float x)
 {
     nvCheck(x >= 0);
     return logf(x) / logf(2.0f);
 }
 NV_FORCEINLINE float exp2f(float x)
 {
     return powf(2.0f, x);
 }
 #endif
 
 namespace nv
 {
     inline float toRadian(float degree) { return degree * (PI / 180.0f); }
     inline float toDegree(float radian) { return radian * (180.0f / PI); }
 
     // Robust floating point comparisons:
     // http://realtimecollisiondetection.net/blog/?p=89
     inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
     {
         //return fabs(f0-f1) <= epsilon;
         return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));
     }
 
     inline bool isZero(const float f, const float epsilon = NV_EPSILON)
     {
         return fabs(f) <= epsilon;
     }
 
     inline bool isFinite(const float f)
     {
 #if NV_OS_WIN32 || NV_OS_XBOX
         return _finite(f) != 0;
 #elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD || NV_OS_ORBIS
         return isfinite(f);
 #elif NV_OS_LINUX
         return finitef(f);
 #else
 #   error "isFinite not supported"
 #endif
         //return std::isfinite (f);
         //return finite (f);
     }
 
     inline bool isNan(const float f)
     {
 #if NV_OS_WIN32 || NV_OS_XBOX
         return _isnan(f) != 0;
 #elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_NETBSD || NV_OS_OPENBSD || NV_OS_ORBIS || NV_OS_LINUX
         return isnan(f);
 #else
 #   error "isNan not supported"
 #endif
     }
 
     inline uint log2(uint i)
     {
         uint value = 0;
         while( i >>= 1 ) {
             value++;
         }
         return value;
     }
 
     inline float lerp(float f0, float f1, float t)
     {
         const float s = 1.0f - t;
         return f0 * s + f1 * t;
     }
 
     inline float square(float f) { return f * f; }
     inline int square(int i) { return i * i; }
 
     inline float cube(float f) { return f * f * f; }
     inline int cube(int i) { return i * i * i; }
 
     inline float frac(float f)
     {
         return f - floor(f);
     }
 
     inline float floatRound(float f)
     {
         return floorf(f + 0.5f);
     }
 
     // Eliminates negative zeros from a float array.
     inline void floatCleanup(float * fp, int n)
     {
         for (int i = 0; i < n; i++) {
             //nvDebugCheck(isFinite(fp[i]));
             union { float f; uint32 i; } x = { fp[i] };
             if (x.i == 0x80000000) fp[i] = 0.0f;
         }
     }
 
     inline float saturate(float f) {
         return clamp(f, 0.0f, 1.0f);
     }
 
     inline float linearstep(float edge0, float edge1, float x) {
         // Scale, bias and saturate x to 0..1 range
         return saturate((x - edge0) / (edge1 - edge0));
     }
 
     inline float smoothstep(float edge0, float edge1, float x) {
         x = linearstep(edge0, edge1, x); 
 
         // Evaluate polynomial
         return x*x*(3 - 2*x);
     }
 
     inline int sign(float a)
     {
         return (a > 0) - (a < 0);
         //if (a > 0.0f) return 1;
         //if (a < 0.0f) return -1;
         //return 0;
     }
 
     union Float754 {
         unsigned int raw;
         float value;
         struct {
         #if NV_BIG_ENDIAN
             unsigned int negative:1;
             unsigned int biasedexponent:8;
             unsigned int mantissa:23;
         #else
             unsigned int mantissa:23;
             unsigned int biasedexponent:8;
             unsigned int negative:1;
         #endif
         } field;
     };
 
     // Return the exponent of x ~ Floor(Log2(x))
     inline int floatExponent(float x)
     {
         Float754 f;
         f.value = x;
         return (f.field.biasedexponent - 127);
     }
 
 
     // FloatRGB9E5
     union Float3SE {
         uint32 v;
         struct {
         #if NV_BIG_ENDIAN
             uint32 e : 5;
             uint32 zm : 9;
             uint32 ym : 9;
             uint32 xm : 9;
         #else
             uint32 xm : 9;
             uint32 ym : 9;
             uint32 zm : 9;
             uint32 e : 5;
         #endif
         };
     };
 
     // FloatR11G11B10
     union Float3PK {
         uint32 v;
         struct {
         #if NV_BIG_ENDIAN
             uint32 ze : 5;
             uint32 zm : 5;
             uint32 ye : 5;
             uint32 ym : 6;
             uint32 xe : 5;
             uint32 xm : 6;
         #else
             uint32 xm : 6;
             uint32 xe : 5;
             uint32 ym : 6;
             uint32 ye : 5;
             uint32 zm : 5;
             uint32 ze : 5;
         #endif
         };
     };
 
 
 } // nv
 
 #endif // NV_MATH_H
Index: ps/trunk/libraries/source/nvtt/src/src/nvthread/Atomic.h
===================================================================
--- ps/trunk/libraries/source/nvtt/src/src/nvthread/Atomic.h	(revision 27269)
+++ ps/trunk/libraries/source/nvtt/src/src/nvthread/Atomic.h	(revision 27270)
@@ -1,408 +1,408 @@
 // This code is in the public domain -- castanyo@yahoo.es
 
 #ifndef NV_THREAD_ATOMIC_H
 #define NV_THREAD_ATOMIC_H
 
 #include "nvthread.h"
 
 #include "nvcore/Debug.h"
 
 
 #if NV_CC_MSVC
 
 #include <intrin.h> // Already included by nvthread.h
 
 #pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement)
 #pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange)
 //#pragma intrinsic(_InterlockedExchangeAdd64)
 
 /*
 extern "C"
 {
     #pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement)
     LONG  __cdecl _InterlockedIncrement(long volatile *Addend);
     LONG  __cdecl _InterlockedDecrement(long volatile *Addend);
 
     #pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange)
     LONG  __cdecl _InterlockedCompareExchange(long volatile * Destination, long Exchange, long Compared);
     LONG  __cdecl _InterlockedExchange(long volatile * Target, LONG Value);
 }
 */
 
 #endif // NV_CC_MSVC
 
 #if NV_CC_CLANG && POSH_CPU_STRONGARM
 // LLVM/Clang do not yet have functioning atomics as of 2.1
 // #include <atomic>
 #endif
 
 //ACS: need this if we want to use Apple's atomics.
 /*
 #if NV_OS_IOS || NV_OS_DARWIN
 // for iOS & OSX we use apple's atomics
 #include "libkern/OSAtomic.h"
 #endif
 */
 
 namespace nv {
 
     // Load and stores.
     inline uint32 loadRelaxed(const uint32 * ptr) { return *ptr; }
     inline void storeRelaxed(uint32 * ptr, uint32 value) { *ptr = value; }
 
     inline uint32 loadAcquire(const volatile uint32 * ptr)
     {
         nvDebugCheck((intptr_t(ptr) & 3) == 0);
 
-#if POSH_CPU_X86 || POSH_CPU_X86_64
+#if POSH_CPU_X86 || POSH_CPU_X86_64 || POSH_CPU_E2K
         uint32 ret = *ptr;  // on x86, loads are Acquire
         nvCompilerReadBarrier();
         return ret;
 #elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64
         // need more specific cpu type for armv7?
         // also utilizes a full barrier
         // currently treating laod like x86 - this could be wrong
         
         // this is the easiest but slowest way to do this
         nvCompilerReadWriteBarrier();
 		uint32 ret = *ptr; // replace with ldrex?
         nvCompilerReadWriteBarrier();
         return ret;
 #elif POSH_CPU_PPC64
         // need more specific cpu type for ppc64?
         // also utilizes a full barrier
         // currently treating load like x86 - this could be wrong
 
         // this is the easiest but slowest way to do this
         nvCompilerReadWriteBarrier();
 		uint32 ret = *ptr; // replace with ldrex?
         nvCompilerReadWriteBarrier();
         return ret;
 #else
 #error "Not implemented"
 #endif
     }
 
     inline void storeRelease(volatile uint32 * ptr, uint32 value)
     {
         nvDebugCheck((intptr_t(ptr) & 3) == 0);
         nvDebugCheck((intptr_t(&value) & 3) == 0);
 
-#if POSH_CPU_X86 || POSH_CPU_X86_64
+#if POSH_CPU_X86 || POSH_CPU_X86_64 || POSH_CPU_E2K
         nvCompilerWriteBarrier();
         *ptr = value;   // on x86, stores are Release
         //nvCompilerWriteBarrier(); // @@ IC: Where does this barrier go? In nvtt it was after, in Witness before. Not sure which one is right.
 #elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64
         // this is the easiest but slowest way to do this
         nvCompilerReadWriteBarrier();
 		*ptr = value; //strex?
 		nvCompilerReadWriteBarrier();
 #elif POSH_CPU_PPC64
         // this is the easiest but slowest way to do this
         nvCompilerReadWriteBarrier();
 		*ptr = value; //strex?
 		nvCompilerReadWriteBarrier();
 #else
 #error "Atomics not implemented."
 #endif
     }
 
 
     template <typename T>
     inline void storeReleasePointer(volatile T * pTo, T from)
     {
         NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t));
         nvDebugCheck((((intptr_t)pTo) % sizeof(intptr_t)) == 0);
         nvDebugCheck((((intptr_t)&from) % sizeof(intptr_t)) == 0);
         nvCompilerWriteBarrier();
         *pTo = from;    // on x86, stores are Release
     }
 
     template <typename T>
     inline T loadAcquirePointer(volatile T * ptr)
     {
         NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t));
         nvDebugCheck((((intptr_t)ptr) % sizeof(intptr_t)) == 0);
         T ret = *ptr;   // on x86, loads are Acquire
         nvCompilerReadBarrier();
         return ret;
     } 
 
 
     // Atomics. @@ Assuming sequential memory order?
 
 #if NV_CC_MSVC
     NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long));
 
     // Returns incremented value.
     inline uint32 atomicIncrement(uint32 * value)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return uint32(_InterlockedIncrement((long *)value));
     }
 
     // Returns decremented value.
     inline uint32 atomicDecrement(uint32 * value)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return uint32(_InterlockedDecrement((long *)value));
     }
 
     // Returns added value.
     inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add)) + value_to_add;
     }
 
     // Returns original value before addition.
     inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add));
     }
 
 
 
 
     // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
     // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
     // @@ Is this strong or weak? Does InterlockedCompareExchange have spurious failures?
     inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         long result = _InterlockedCompareExchange((long *)value, (long)desired, (long)expected);
         return result == (long)expected;
     }
 
 
     inline uint32 atomicSwap(uint32 * value, uint32 desired)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return (uint32)_InterlockedExchange((long *)value, (long)desired);
     }
 
 
 
 #elif NV_CC_CLANG && (NV_OS_IOS || NV_OS_DARWIN)
 
     //ACS: Use Apple's atomics instead? I don't know if these are better in any way; there are non-barrier versions too. There's no OSAtomicSwap32 tho'
     /*
     inline uint32 atomicIncrement(uint32 * value)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return (uint32)OSAtomicIncrement32Barrier((int32_t *)value);
     }
     
     inline uint32 atomicDecrement(uint32 * value)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return (uint32)OSAtomicDecrement32Barrier((int32_t *)value);
     }
 
     // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
     // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
     // @@ Is this strong or weak?
     inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return OSAtomicCompareAndSwap32Barrier((int32_t)expected, (int32_t)desired, (int32_t *)value);
     }
     */
 
     // Returns incremented value.
     inline uint32 atomicIncrement(uint32 * value) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_add_and_fetch(value, 1);
     }
     
     // Returns decremented value.
     inline uint32 atomicDecrement(uint32 * value) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_sub_and_fetch(value, 1);
     }
 
     // Returns added value.
     inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_add_and_fetch(value, value_to_add);
     }
 
     // Returns original value before addition.
     inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_fetch_and_add(value, value_to_add);
     }
 
 
     // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
     // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
     // @@ Is this strong or weak?
     inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_bool_compare_and_swap(value, expected, desired);
     }
     
     inline uint32 atomicSwap(uint32 * value, uint32 desired)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         // this is confusingly named, it doesn't actually do a test but always sets
         return __sync_lock_test_and_set(value, desired);
     }
 
 
 
 
 #elif NV_CC_CLANG && POSH_CPU_STRONGARM
     
     inline uint32 atomicIncrement(uint32 * value)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         
         // this should work in LLVM eventually, but not as of 2.1
         // return (uint32)AtomicIncrement((long *)value);
         
         // in the mean time,
         register uint32 result;
         asm volatile (
                       "1:   ldrexb  %0,  [%1]	\n\t"
                       "add     %0,   %0, #1     \n\t"
                       "strexb  r1,   %0, [%1]	\n\t"
                       "cmp     r1,   #0			\n\t"
                       "bne     1b"
                       : "=&r" (result)
                       : "r"(value)
                       : "r1"
                       );
         return result;
 
     }
     
     inline uint32 atomicDecrement(uint32 * value)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         
         // this should work in LLVM eventually, but not as of 2.1:
         // return (uint32)sys::AtomicDecrement((long *)value);
 
         // in the mean time,
         
         register uint32 result;
         asm volatile (
                       "1:   ldrexb  %0,  [%1]	\n\t"
                       "sub     %0,   %0, #1     \n\t"
                       "strexb  r1,   %0, [%1]	\n\t"
                       "cmp     r1,   #0			\n\t"
                       "bne     1b"
                       : "=&r" (result)
                       : "r"(value)
                       : "r1"
                       );
         return result;
          
     }
 
 #elif NV_CC_GNUC
     // Many alternative implementations at:
     // http://www.memoryhole.net/kyle/2007/05/atomic_incrementing.html
 
     // Returns incremented value.
     inline uint32 atomicIncrement(uint32 * value) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_add_and_fetch(value, 1);
     }
 
     // Returns decremented value.
     inline uint32 atomicDecrement(uint32 * value) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_sub_and_fetch(value, 1);
     }
 
     // Returns added value.
     inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_add_and_fetch(value, value_to_add);
     }
 
     // Returns original value before addition.
     inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_fetch_and_add(value, value_to_add);
     }
 
     // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
     // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
     // @@ Is this strong or weak?
     inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         return __sync_bool_compare_and_swap(value, expected, desired);
     }
     
     inline uint32 atomicSwap(uint32 * value, uint32 desired)
     {
         nvDebugCheck((intptr_t(value) & 3) == 0);
         // this is confusingly named, it doesn't actually do a test but always sets
         return __sync_lock_test_and_set(value, desired);
     }
     
 #else
 #error "Atomics not implemented."
 
 #endif
 
 
 
 
     // It would be nice to have C++0x-style atomic types, but I'm not in the mood right now. Only uint32 supported so far.
 #if 0
     template <typename T>
     void increment(T * value);
 
     template <typename T>
     void decrement(T * value);
 
     template <>
     void increment(uint32 * value) {
     }
 
     template <>
     void increment(uint64 * value) {
     }
 
 
 
     template <typename T>
     class Atomic
     {
     public:
         explicit Atomic()  : m_value() { }
         explicit Atomic( T val ) : m_value(val) { }
         ~Atomic() { }
 
         T loadRelaxed()  const { return m_value; }
         void storeRelaxed(T val) { m_value = val; }
 
         //T loadAcquire() const volatile { return nv::loadAcquire(&m_value); }
         //void storeRelease(T val) volatile { nv::storeRelease(&m_value, val); }
 
         void increment() /*volatile*/ { nv::atomicIncrement(m_value); }
         void decrement() /*volatile*/ { nv::atomicDecrement(m_value); }
 
         void compareAndStore(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); }
         T compareAndExchange(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); }
         T exchange(T newVal) { nv::atomicExchange(&m_value, newVal); }
 
     private:
         // don't provide operator = or == ; make the client write Store( Load() )
         NV_FORBID_COPY(Atomic);
 
         NV_COMPILER_CHECK(sizeof(T) == sizeof(uint32) || sizeof(T) == sizeof(uint64));
 
         T m_value;
     };
 #endif
 
 } // nv namespace 
 
 
 #endif // NV_THREADS_ATOMICS_H