Index: ps/trunk/source/graphics/Color.cpp
===================================================================
--- ps/trunk/source/graphics/Color.cpp	(revision 3910)
+++ ps/trunk/source/graphics/Color.cpp	(revision 3911)
@@ -1,41 +1,41 @@
 #include "precompiled.h"
 
 #include "MathUtil.h"
 #include "graphics/Color.h"
 
 
 static u32 fallback_ConvertRGBColorTo4ub(const RGBColor& src)
 {
 	SColor4ub result;
 	result.R=clamp(int(src.X*255),0,255);
 	result.G=clamp(int(src.Y*255),0,255);
 	result.B=clamp(int(src.Z*255),0,255);
 	result.A=0xff;
 	return *(u32*)&result;
 }
 
 // on IA32, this is replaced by an SSE assembly version in ia32.cpp
 u32 (*ConvertRGBColorTo4ub)(const RGBColor& src) = fallback_ConvertRGBColorTo4ub;
 
 
 // Assembler-optimized function for color conversion
 #if CPU_IA32
 extern "C" u32 sse_ConvertRGBColorTo4ub(const RGBColor& src);
 #endif
 
 void ColorActivateFastImpl()
 {
 	if(0)
 	{
 	}
 #if CPU_IA32
-	else if (ia32_cap(SSE))
+	else if (ia32_cap(IA32_CAP_SSE))
 	{
 		ConvertRGBColorTo4ub = sse_ConvertRGBColorTo4ub;
 	}
 #endif
 	else
 	{
 		debug_printf("No SSE available. Slow fallback routines will be used.\n");
 	}
 }
Index: ps/trunk/source/ps/XML/XMLWriter.cpp
===================================================================
--- ps/trunk/source/ps/XML/XMLWriter.cpp	(revision 3910)
+++ ps/trunk/source/ps/XML/XMLWriter.cpp	(revision 3911)
@@ -1,251 +1,184 @@
 #include "precompiled.h"
 
 #include "XMLWriter.h"
 
 #include "ps/CLogger.h"
 #include "lib/res/file/vfs.h"
 #include "self_test.h"
 
 
 // TODO (maybe): Write to the VFS handle frequently, instead of buffering
 // the entire file, so that large files get written faster.
 
 enum { EL_ATTR, EL_TEXT, EL_SUBEL };
 
 XMLWriter_File::XMLWriter_File(const char* encoding)
 	: m_Indent(0), m_LastElement(NULL)
 {
 	m_Data = "<?xml version=\"1.0\" encoding=\"";
 	m_Data += encoding;
 	m_Data += "\" standalone=\"no\"?>\n";
 }
 
 void XMLWriter_File::Doctype(const char* type, const char* dtd)
 {
 	m_Data += "<!DOCTYPE ";
 	m_Data += type;
 	m_Data += " SYSTEM \"";
 	m_Data += dtd;
 	m_Data += "\">\n";
 }
 
 bool XMLWriter_File::StoreVFS(Handle h)
 {
 	if (m_LastElement) debug_warn("ERROR: Saving XML while an element is still open");
 
 	FileIOBuf data = (FileIOBuf)m_Data.data();
 	int err = vfs_io(h, m_Data.Length(), &data);
 	if (err < 0)
 	{
 		LOG(ERROR, "xml", "Error saving XML data through VFS: %lld", h);
 		return false;
 	}
 	return true;
 }
 
 void XMLWriter_File::Comment(const char* text)
 {
 	ElementStart(NULL, "!-- ");
 	m_Data += text;
 	m_Data += " -->";
 	--m_Indent;
 }
 
 CStr XMLWriter_File::Indent()
 {
 	return std::string(m_Indent, '\t');
 }
 
 void XMLWriter_File::ElementStart(XMLWriter_Element* element, const char* name)
 {
 	if (m_LastElement) m_LastElement->Close(EL_SUBEL);
 	m_LastElement = element;
 	m_Data += "\n";
 	m_Data += Indent();
 	m_Data += "<";
 	m_Data += name;
 
 	++m_Indent;
 }
 
 void XMLWriter_File::ElementClose()
 {
 	m_Data += ">";
 }
 
 void XMLWriter_File::ElementEnd(const char* name, int type)
 {
 	--m_Indent;
 	m_LastElement = NULL;
 
 	switch (type)
 	{
 	case EL_ATTR:
 		m_Data += " />";
 		break;
 	case EL_TEXT:
 		m_Data += "</";
 		m_Data += name;
 		m_Data += ">";
 		break;
 	case EL_SUBEL:
 		m_Data += "\n";
 		m_Data += Indent();
 		m_Data += "</";
 		m_Data += name;
 		m_Data += ">";
 		break;
 	default:
 		debug_assert(0);
 	}
 }
 
 void XMLWriter_File::ElementText(const char* text)
 {
 	m_Data += text;
 }
 
 
 XMLWriter_Element::XMLWriter_Element(XMLWriter_File& file, const char* name)
 	: m_File(&file), m_Name(name), m_Type(EL_ATTR)
 {
 	m_File->ElementStart(this, name);
 }
 
 
 XMLWriter_Element::~XMLWriter_Element()
 {
 	m_File->ElementEnd(m_Name, m_Type);
 }
 
 
 void XMLWriter_Element::Close(int type)
 {
 	m_File->ElementClose();
 	m_Type = type;
 }
 
 void XMLWriter_Element::Text(const char* text)
 {
 	Close(EL_TEXT);
 	m_File->ElementText(text);
 }
 
 
 
 template <> void XMLWriter_File::ElementAttribute<CStr>(const char* name, const CStr& value, bool newelement)
 {
 	if (newelement)
 	{
 		ElementStart(NULL, name);
 		m_Data += ">";
 		ElementText(value);
 		ElementEnd(name, EL_TEXT);
 	}
 	else
 	{
 		debug_assert(m_LastElement && m_LastElement->m_Type == EL_ATTR);
 		m_Data += " ";
 		m_Data += name;
 		m_Data += "=\"";
 		m_Data += value;
 		m_Data += "\"";
 	}
 }
 
 // Attribute/setting value-to-string template specialisations.
 //
 // These only deal with basic types. Anything more complicated should
 // be converted into a basic type by whatever is making use of XMLWriter,
 // to keep game-related logic out of the not-directly-game-related code here.
 
 // Use CStr's conversion for most types:
 #define TYPE2(ID_T, ARG_T) \
 template <> void XMLWriter_File::ElementAttribute<ID_T>(const char* name, ARG_T value, bool newelement) \
 { \
 	ElementAttribute(name, CStr(value), newelement); \
 }
 #define TYPE(T) TYPE2(T, const T &)
 
 TYPE(int)
 TYPE(unsigned int)
 TYPE(float)
 TYPE(double)
 // This is the effect of doing const T& with T=const char* - char const* const&
 // Weird - I know ;-)
 TYPE2(const char *, char const* const&) 
 
 // Encode Unicode strings as UTF-8 (though that will only be correct if
 // the encoding was set to "utf-8"; it'll look a little odd if you store
 // Unicode strings in an iso-8859-1 file, so please don't do that)
 template <> void XMLWriter_File::ElementAttribute<CStrW>(const char* name, const CStrW& value, bool newelement)
 {
 	ElementAttribute(name, value.ToUTF8(), newelement);
 }
-
-
-//----------------------------------------------------------------------------
-// built-in self test
-//----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-static void test1()
-{
-	XML_Start("utf-8");
-	XML_Doctype("Scenario", "/maps/scenario.dtd");
-
-	{
-		XML_Element("Scenario");
-		{
-			XML_Comment("Comment test.");
-			XML_Comment("Comment test again.");
-			{
-				XML_Element("a");
-				XML_Attribute("one", 1);
-				XML_Attribute("two", "TWO");
-				XML_Text("b");
-				XML_Text(" (etc)");
-			}
-			{
-				XML_Element("c");
-				XML_Text("d");
-			}
-			XML_Setting("c2", "d2");
-			{
-				XML_Element("e");
-				{
-					{
-						XML_Element("f");
-						XML_Text("g");
-					}
-					{
-						XML_Element("h");
-					}
-					{
-						XML_Element("i");
-						XML_Attribute("j", 1.23);
-						{
-							XML_Element("k");
-							XML_Attribute("l", 2.34);
-							XML_Text("m");
-						}
-					}
-				}
-			}
-		}
-	}
-
-	// For this test to be useful, it should actually test something.
-}
-
-static void self_test()
-{
-	test1();
-}
-
-SELF_TEST_RUN;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
Index: ps/trunk/source/ps/CStr.cpp
===================================================================
--- ps/trunk/source/ps/CStr.cpp	(revision 3910)
+++ ps/trunk/source/ps/CStr.cpp	(revision 3911)
@@ -1,657 +1,627 @@
 #include "precompiled.h"
 
 #ifndef CStr_CPP_FIRST
 #define CStr_CPP_FIRST
 
 #include "posix.h" // for htons, ntohs
 #include "Network/Serialization.h"
 #include <cassert>
 
 #include <sstream>
 
 #define UNIDOUBLER_HEADER "CStr.cpp"
 #include "UniDoubler.h"
 
 #include "self_test.h"
 
 
 // Only include these function definitions in the first instance of CStr.cpp:
 
 CStrW::CStrW(const CStr8 &asciStr) : std::wstring(asciStr.begin(), asciStr.end()) {}
 CStr8::CStr8(const CStrW &wideStr) : std:: string(wideStr.begin(), wideStr.end()) {}
 
 // UTF conversion code adapted from http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
 
 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 static const char trailingBytesForUTF8[256] = {
 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
 static const u32 offsetsFromUTF8[6] = {
 	0x00000000UL, 0x00003080UL, 0x000E2080UL,
 	0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 
 CStr8 CStrW::ToUTF8() const
 {
 	CStr8 result;
 
 	for (size_t i = 0; i < Length(); ++i)
 	{
 		unsigned short bytesToWrite;
 		wchar_t ch = (*this)[i];
 
 		if (ch < 0x80) bytesToWrite = 1;
 		else if (ch < 0x800) bytesToWrite = 2;
 		else if (ch < 0x10000) bytesToWrite = 3;
 		else if (ch <= 0x7FFFFFFF) bytesToWrite = 4;
 		else bytesToWrite = 3, ch = 0x0000FFFD; // replacement character
 
 		char buf[4];
 		char* target = &buf[bytesToWrite];
 		switch (bytesToWrite)
 		{
 		case 4: *--target = ((ch | 0x80) & 0xBF); ch >>= 6;
 		case 3: *--target = ((ch | 0x80) & 0xBF); ch >>= 6;
 		case 2: *--target = ((ch | 0x80) & 0xBF); ch >>= 6;
 		case 1: *--target = (ch | firstByteMark[bytesToWrite]);
 		}
 		result += CStr(buf, bytesToWrite);
 	}
 	return result;
 }
 
 static bool isLegalUTF8(const unsigned char *source, int length) {
 	unsigned char a;
 	const unsigned char *srcptr = source+length;
 
 	switch (length) {
 	default: return false;
 	case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
 	case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
 	case 2: if ((a = (*--srcptr)) > 0xBF) return false;
 
 	switch (*source) {
 		case 0xE0: if (a < 0xA0) return false; break;
 		case 0xED: if (a > 0x9F) return false; break;
 		case 0xF0: if (a < 0x90) return false; break;
 		case 0xF4: if (a > 0x8F) return false; break;
 		default:   if (a < 0x80) return false;
 	}
 	case 1: if (*source >= 0x80 && *source < 0xC2) return false;
 	}
 
 	if (*source > 0xF4) return false;
 	return true;
 }
 
 
 CStrW CStr8::FromUTF8() const
 {
 	CStrW result;
 
 	const unsigned char* source = (const unsigned char*)&*begin();
 	const unsigned char* sourceEnd = source + length();
 	while (source < sourceEnd)
 	{
 		wchar_t ch = 0;
 		unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
 		if (source + extraBytesToRead >= sourceEnd)
 		{
 			debug_warn("Invalid UTF-8 (fell off end)");
 			return L"";
 		}
 
 		if (! isLegalUTF8(source, extraBytesToRead+1)) {
 			debug_warn("Invalid UTF-8 (illegal data)");
 			return L"";
 		}
 
 		switch (extraBytesToRead)
 		{
 		case 5: ch += *source++; ch <<= 6;
 		case 4: ch += *source++; ch <<= 6;
 		case 3: ch += *source++; ch <<= 6;
 		case 2: ch += *source++; ch <<= 6;
 		case 1: ch += *source++; ch <<= 6;
 		case 0: ch += *source++;
 		}
 		ch -= offsetsFromUTF8[extraBytesToRead];
 
 		result += ch;
 	}
 	return result;
 }
 
 
-//----------------------------------------------------------------------------
-// built-in self test
-//----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-static void test1()
-{
-	const wchar_t chr_utf16[] = { 0x12, 0xff, 0x1234, 0x3456, 0x5678, 0x7890, 0x9abc, 0xbcde, 0xfffe };
-	const unsigned char chr_utf8[] = { 0x12, 0xc3, 0xbf, 0xe1, 0x88, 0xb4, 0xe3, 0x91, 0x96, 0xe5, 0x99, 0xb8, 0xe7, 0xa2, 0x90, 0xe9, 0xaa, 0xbc, 0xeb, 0xb3, 0x9e, 0xef, 0xbf, 0xbe };
-	CStrW str_utf16 (chr_utf16, sizeof(chr_utf16)/sizeof(wchar_t));
-	CStr8 str_utf8 = str_utf16.ToUTF8();
-	TEST(str_utf8.length() == sizeof(chr_utf8));
-	TEST(memcmp(str_utf8.data(), chr_utf8, sizeof(chr_utf8)) == 0);
-	TEST(str_utf8.FromUTF8() == str_utf16);
-}
-
-
-static void self_test()
-{
-	test1();
-}
-
-SELF_TEST_RUN;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
-
-
 #else
 
 // The following code is compiled twice, as CStrW then as CStr8:
 
 #include "CStr.h"
 using namespace std;
 
 #include <sstream>
 
 #ifdef  _UNICODE
  #define tstringstream wstringstream
  #define _tstod wcstod
  #define _ttoi(a) wcstol(a, NULL, 0)
  #define _ttol(a) wcstol(a, NULL, 0)
  #define _istspace iswspace
  #define _totlower towlower
  #define _totupper towupper
 #else
  #define tstringstream stringstream
  #define _tstod strtod
  #define _ttoi atoi
  #define _ttol atol
  #define _istspace isspace
  #define _totlower tolower
  #define _totupper toupper
 #endif
 
 CStr CStr::Repeat(CStr String, size_t Reps)
 {
 	CStr ret;
 	ret.reserve(String.Length() * Reps);
 	while (Reps--) ret += String;
 	return ret;
 }
 
 // Construction and assignment from numbers:
 
 #define NUM_TYPE(T) \
 	CStr::CStr(T Number)			\
 	{								\
 		std::tstringstream ss;		\
 		ss << Number;				\
 		ss >> *this;				\
 	}								\
 									\
 	CStr& CStr::operator=(T Number)	\
 	{								\
 		std::tstringstream ss;		\
 		ss << Number;				\
 		ss >> *this;				\
 		return *this;				\
 	}
 
 NUM_TYPE(int)
 NUM_TYPE(long)
 NUM_TYPE(unsigned int)
 NUM_TYPE(unsigned long)
 NUM_TYPE(float)
 NUM_TYPE(double)
 
 #undef NUM_TYPE
 
 // Conversion to numbers:
 
 int CStr::ToInt() const
 {
 	return _ttoi(c_str());
 }
 
 unsigned int CStr::ToUInt() const
 {
 	return uint(_ttoi(c_str()));
 }
 
 long CStr::ToLong() const
 {
 	return _ttol(c_str());
 }
 
 unsigned long CStr::ToULong() const
 {
 	return ulong(_ttol(c_str()));
 }
 
 float CStr::ToFloat() const
 {
 	return (float)_tstod(c_str(), NULL);
 }
 
 double CStr::ToDouble() const
 {
 	return _tstod(c_str(), NULL);
 }
 
 // Retrieves at most 'len' characters, starting at 'start'
 CStr CStr::GetSubstring(size_t start, size_t len) const
 {
 	return substr(start, len);
 }
 
 
 // Search the string for another string 
 long CStr::Find(const CStr& Str) const
 {
 	size_t Pos = find(Str, 0);
 
 	if (Pos != npos)
 		return (long)Pos;
 
 	return -1;
 }
 
 // Search the string for another string 
 long CStr::Find(const tchar &chr) const
 {
 	size_t Pos = find(chr, 0);
 
 	if (Pos != npos)
 		return (long)Pos;
 
 	return -1;
 }
 
 // Search the string for another string 
 long CStr::Find(const int &start, const tchar &chr) const
 {
 	size_t Pos = find(chr, start);
 
 	if (Pos != npos)
 		return (long)Pos;
 
 	return -1;
 }
 
 long CStr::FindInsensitive(const int &start, const tchar &chr) const { return LCase().Find(start, _totlower(chr)); }
 long CStr::FindInsensitive(const tchar &chr) const { return LCase().Find(_totlower(chr)); }
 long CStr::FindInsensitive(const CStr& Str) const { return LCase().Find(Str.LCase()); }
 
 
 long CStr::ReverseFind(const CStr& Str) const
 {
 	size_t Pos = rfind(Str, length() );
 
 	if (Pos != npos)
 		return (long)Pos;
 
 	return -1;
 
 }
 
 // Lowercase and uppercase 
 CStr CStr::LowerCase() const
 {
 	tstring NewString = *this;
 	for (size_t i = 0; i < length(); i++)
 		NewString[i] = (tchar)_totlower((*this)[i]);
 
 	return NewString;
 }
 
 CStr CStr::UpperCase() const
 {
 	tstring NewString = *this;
 	for (size_t i = 0; i < length(); i++)
 		NewString[i] = (tchar)_totupper((*this)[i]);
 
 	return NewString;
 }
 
 // Lazy versions
 // code duplication because return by value overhead if they were merely an alias
 CStr CStr::LCase() const
 {
 	tstring NewString = *this;
 	for (size_t i = 0; i < length(); i++)
 		NewString[i] = (tchar)_totlower((*this)[i]);
 
 	return NewString;
 }
 
 CStr CStr::UCase() const
 {
 	tstring NewString = *this;
 	for (size_t i = 0; i < length(); i++)
 		NewString[i] = (tchar)_totupper((*this)[i]);
 
 	return NewString;
 }
 
 // Retrieve the substring of the first n characters 
 CStr CStr::Left(size_t len) const
 {
 	debug_assert(len <= length());
 	return substr(0, len);
 }
 
 // Retrieve the substring of the last n characters
 CStr CStr::Right(size_t len) const
 {
 	debug_assert(len <= length());
 	return substr(length()-len, len);
 }
 
 // Retrieve the substring following the last occurrence of Str
 // (or the whole string if it doesn't contain Str)
 CStr CStr::AfterLast(const CStr& Str) const
 {
 	long pos = ReverseFind(Str);
 	if (pos == -1)
 		return *this;
 	else
 		return substr(pos + Str.length());
 }
 
 // Retrieve the substring preceding the last occurrence of Str
 // (or the whole string if it doesn't contain Str)
 CStr CStr::BeforeLast(const CStr& Str) const
 {
 	long pos = ReverseFind(Str);
 	if (pos == -1)
 		return *this;
 	else
 		return substr(0, pos);
 }
 
 // Retrieve the substring following the first occurrence of Str
 // (or the whole string if it doesn't contain Str)
 CStr CStr::AfterFirst(const CStr& Str) const
 {
 	long pos = Find(Str);
 	if (pos == -1)
 		return *this;
 	else
 		return substr(pos + Str.length());
 }
 
 // Retrieve the substring preceding the first occurrence of Str
 // (or the whole string if it doesn't contain Str)
 CStr CStr::BeforeFirst(const CStr& Str) const
 {
 	long pos = Find(Str);
 	if (pos == -1)
 		return *this;
 	else
 		return substr(0, pos);
 }
 
 // Remove all occurrences of some character or substring 
 void CStr::Remove(const CStr& Str)
 {
 	size_t FoundAt = 0;
 	while (FoundAt != npos)
 	{
 		FoundAt = find(Str, 0);
 		
 		if (FoundAt != npos)
 			erase(FoundAt, Str.length());
 	}
 }
 
 // Replace all occurrences of some substring by another 
 void CStr::Replace(const CStr& ToReplace, const CStr& ReplaceWith)
 {
 	size_t Pos = 0;
 	
 	while (Pos != npos)
 	{
 		Pos = find(ToReplace, Pos);
 		if (Pos != npos)
 		{
 			erase(Pos, ToReplace.length());
 			insert(Pos, ReplaceWith);
 			Pos += ReplaceWith.length();
 		}
 	}
 }
 
 CStr CStr::UnescapeBackslashes()
 {
 	// Currently only handle \n and \\, because they're the only interesting ones
 	CStr NewString;
 	bool escaping = false;
 	for (size_t i = 0; i < length(); i++)
 	{
 		tchar ch = (*this)[i];
 		if (escaping)
 		{
 			switch (ch)
 			{
 			case 'n': NewString += '\n'; break;
 			default: NewString += ch; break;
 			}
 			escaping = false;
 		}
 		else
 		{
 			if (ch == '\\')
 				escaping = true;
 			else
 				NewString += ch;
 		}
 	}
 	return NewString;
 }
 
 // Returns a trimmed string, removes whitespace from the left/right/both
 CStr CStr::Trim(PS_TRIM_MODE Mode) const
 {
 	size_t Left = 0, Right = 0;
 	
 	switch (Mode)
 	{
 		case PS_TRIM_LEFT:
 		{
 			for (Left = 0; Left < length(); Left++)
 				if (_istspace((*this)[Left]) == false)
 					break; // end found, trim 0 to Left-1 inclusive
 		} break;
 		
 		case PS_TRIM_RIGHT:
 		{
 			Right = length();
 			while (Right--)
 				if (_istspace((*this)[Right]) == false)
 					break; // end found, trim len-1 to Right+1	inclusive
 		} break;
 		
 		case PS_TRIM_BOTH:
 		{
 			for (Left = 0; Left < length(); Left++)
 				if (_istspace((*this)[Left]) == false)
 					break; // end found, trim 0 to Left-1 inclusive
 
 			Right = length();
 			while (Right--)
 				if (_istspace((*this)[Right]) == false)
 					break; // end found, trim len-1 to Right+1	inclusive
 		} break;
 
 		default:
 			debug_warn("CStr::Trim: invalid Mode");
 	}
 
 
 	return substr(Left, Right-Left+1);
 }
 
 CStr CStr::Pad(PS_TRIM_MODE Mode, size_t Length) const
 {
 	size_t Left = 0, Right = 0;
 
 	if (Length <= length())
 		return *this;
 	
 	// From here: Length-length() >= 1
 
 	switch (Mode)
 	{
 	case PS_TRIM_LEFT:
 		Left = Length - length();
 		break;
 
 	case PS_TRIM_RIGHT:
 		Right = Length - length();
 		break;
 
 	case PS_TRIM_BOTH:
 		Left  = (Length - length() + 1)/2;
 		Right = (Length - length() - 1)/2; // cannot be negative
 		break;
 
 	default:
 		debug_warn("CStr::Trim: invalid Mode");
 	}
 
 	return std::tstring(Left, _T(' ')) + *this + std::tstring(Right, _T(' '));
 }
 
 // Concatenation:
 
 CStr CStr::operator+(const CStr& Str)
 {
 	return std::operator+(*this, std::tstring(Str));
 }
 
 CStr CStr::operator+(const tchar* Str)
 {
 	return std::operator+(*this, std::tstring(Str));
 }
 
 // Joining ASCII and Unicode strings:
 #ifndef _UNICODE
 CStr8 CStr::operator+(const CStrW& Str)
 {
 	return std::operator+(*this, CStr8(Str));
 }
 #else
 CStrW CStr::operator+(const CStr8& Str)
 {
 	return std::operator+(*this, CStrW(Str));
 }
 #endif
 
 
 CStr::operator const tchar*() const
 {
 	return c_str();
 }
 
 
 size_t CStr::GetHashCode() const
 {
 	return (size_t)fnv_hash(data(), length());
 		// janwas 2005-03-18: now use 32-bit version; 64 is slower and
 		// the result was truncated down to 32 anyway.
 }
 
 #ifdef _UNICODE
 /*
 	CStrW is always serialized to/from UTF-16
 */
 
 u8 *CStrW::Serialize(u8 *buffer) const
 {
 	size_t len = length();
 	size_t i = 0;
 	for (i = 0; i < len; i++)
 		*(u16 *)(buffer + i*2) = htons((*this)[i]); // convert to network order (big-endian)
 	*(u16 *)(buffer+i*2) = 0;
 	return buffer+len*2+2;
 }
 
 const u8 *CStrW::Deserialize(const u8 *buffer, const u8 *bufferend)
 {
 	const u16 *strend = (const u16 *)buffer;
 	while ((const u8 *)strend < bufferend && *strend) strend++;
 	if ((const u8 *)strend >= bufferend) return NULL;
 
 	resize(strend - (const u16 *)buffer);
 	const u16 *ptr = (const u16 *)buffer;
 
 	std::wstring::iterator str = begin();
 	while (ptr < strend)
 		*(str++) = (tchar)ntohs(*(ptr++)); // convert from network order (big-endian)
 
 	return (const u8 *)(strend+1);
 }
 
 uint CStr::GetSerializedLength() const
 {
 	return uint(length()*2 + 2);
 }
 
 #else
 /*
 	CStr8 is always serialized to/from ASCII (or whatever 8-bit codepage stored
 	in the CStr)
 */
 
 u8 *CStr8::Serialize(u8 *buffer) const
 {
 	size_t len = length();
 	size_t i = 0;
 	for (i = 0; i < len; i++)
 		buffer[i] = (*this)[i];
 	buffer[i] = 0;
 	return buffer+len+1;
 }
 
 const u8 *CStr8::Deserialize(const u8 *buffer, const u8 *bufferend)
 {
 	const u8 *strend = buffer;
 	while (strend < bufferend && *strend) strend++;
 	if (strend >= bufferend) return NULL;
 
 	*this = std::string(buffer, strend);
 
 	return strend+1;
 }
 
 uint CStr::GetSerializedLength() const
 {
 	return uint(length() + 1);
 }
 
 #endif // _UNICODE
 
 // Clean up, to keep the second pass through unidoubler happy
 #undef tstringstream
 #undef _tstod
 #undef _ttoi
 #undef _ttol
 #undef _istspace
 #undef _totlower
 #undef _totupper
 
 #endif // CStr_CPP_FIRST
Index: ps/trunk/source/ps/Parser.cpp
===================================================================
--- ps/trunk/source/ps/Parser.cpp	(revision 3910)
+++ ps/trunk/source/ps/Parser.cpp	(revision 3911)
@@ -1,1203 +1,1091 @@
 #include "precompiled.h"
 
 #include "Parser.h"
 #include "lib.h"
 #include "self_test.h"
 
 #if MSC_VERSION
 #pragma warning(disable:4786)
 #endif
 
 using namespace std;
 
 //-------------------------------------------------
 // Macros
 //-------------------------------------------------
 
 #define REGULAR_MAX_LENGTH			10
 #define START_DYNAMIC				'<'
 #define END_DYNAMIC					'>'
 #define START_OPTIONAL				'['
 #define END_OPTIONAL				']'
 #define REGULAR_EXPRESSION			'$'
 
 // use GetDouble and type-cast it to <<type>>
 #define FUNC_IMPL_CAST_GETDOUBLE(func_name,type)		\
 bool CParserValue::func_name(type &ret)					\
 {														\
 	double d;											\
 	if (GetDouble(d))									\
 		return ret = (type)d, true;						\
 	else												\
 		return false;									\
 }
 
 // Function-implementation creator for GetArg%type% that will call
 //  Get%type% from the CParserValue
 // func_name must belong to CParserFile
 #define FUNC_IMPL_GETARG(func_name, get_name, type)		\
 bool CParserLine::func_name(size_t arg, type &ret)		\
 {														\
 	if (GetArgCount() <= arg)							\
 		return false;									\
 	return m_Arguments[arg].get_name(ret);				\
 }
 
 //-------------------------------------------------
 // Function definitions
 //-------------------------------------------------
 
 static bool _IsStrictNameChar(const char& c);
 static bool _IsValueChar(const char& c);
 
 
 // Functions used for checking a character if it belongs to a value
 //  or not
 
 // Checks ident
 static bool _IsStrictNameChar(const char& c)
 {
 	return ((c >= 'a' && c <= 'z') ||
 			(c >= 'A' && c <= 'Z') ||
 			(c >= '0' && c <= '9'));
 }
 
 // Checks value
 static bool _IsValueChar(const char& c)
 {
 	return ((c >= 'a' && c <= 'z') ||
 			(c >= 'A' && c <= 'Z') ||
 			(c >= '0' && c <= '9') ||
 			c=='.' || c=='_');
 }
 
 // CParserValue
 // ---------------------------------------------------------------------| Class
 
 CParserValue::CParserValue()
 {
 }
 
 CParserValue::~CParserValue()
 {
 }
 
 // Parse the string in Value to different types
 
 // bool
 bool CParserValue::GetBool(bool &ret)
 {
 	// TODO Raj Add or remove some? I can make it all lowercase
 	//  first so True and TRUE also works, or you could just
 	//  add them too
 
 	// true
 	if (m_String == "true" ||
 		m_String == "on" ||
 		m_String == "1" ||
 		m_String == "yes")
 	{
 		ret = true;
 		return true;
 	}
 	else
 	// false
 	if (m_String == "false" ||
 		m_String == "off" ||
 		m_String == "0" ||
 		m_String == "no")
 	{
 		ret = false;
 		return true;
 	}
 
 	// point only erroneous runs reach
 	return false;
 }
 
 // double
 bool CParserValue::GetDouble(double &ret)
 {
 	// locals
 	double			TempRet = 0.0;
 	size_t			Size = m_String.size();
 	size_t			i;
 	bool			AtLeastOne = false;		// Checked if at least one of the loops
 											//  run, otherwise "." would parse OK
 	size_t			DecimalPos;
 	bool			Negative = false;		// "-" is found
 
 	// Check if '-' is found
 	if (m_String[0]=='-')
 	{
 		Negative = true;
 	}
 	
 	// find decimal position
 	DecimalPos = m_String.find(".");
 	if (DecimalPos == string::npos)	
 		DecimalPos = Size;
 
 	// Iterate left of the decimal sign
 	//
 	for (i=(Negative?1:0); i < DecimalPos; ++i)
 	{
 		// Set AtLeastOne to true
 		AtLeastOne = true;
 
 		// Check if a digit is found
 		if (m_String[i] >= '0' && m_String[i] <= '9')
 		{
 			double exp = (DecimalPos-i-1);	// disambiguate pow() argument type
 			TempRet += (m_String[i]-'0')*pow(10.0, exp);
 		}
 		else
 		{
 			// parse error!
 			return false;
 		}
 	}
 
 	// Iterate right of the decimal sign
 	//
 	for (i=DecimalPos+1; i < Size; ++i)
 	{
 		// Set AtLeastOne to true
 		AtLeastOne = true;
 
 		// Check if a digit is found
 		if (m_String[i] >= '0' && m_String[i] <= '9')
 		{
 			double exp = (int)(DecimalPos-i);	// disambiguate pow() argument type
 			TempRet += (m_String[i]-'0')*pow(10.0,exp);
 		}
 		// It will accept and ending f, like 1.0f
 		else if (!(i==Size-1 && m_String[i] == 'f'))
 		{
 			// parse error!
 			return false;
 		}
 	}
 
 	if (!AtLeastOne)return false;
 
 	// Set the reference to the temp value and return success
 	ret = (Negative?-TempRet:TempRet);
 	return true;
 }
 
 // string - only return m_String, can't fail
 bool CParserValue::GetString(std::string &ret)
 {
 	ret = m_String;
 	return true;
 }
 
 bool CParserValue::GetString( CStr& ret )
 {
 	ret = m_String;
 	return true; 
 }
 
 // These macros include the IMPLEMENTATION of the 
 // the function in the macro argument for CParserValue
 // They use GetDouble, and then type-cast it
 FUNC_IMPL_CAST_GETDOUBLE(GetFloat,			float)
 FUNC_IMPL_CAST_GETDOUBLE(GetChar,			char)
 FUNC_IMPL_CAST_GETDOUBLE(GetShort,			short)
 FUNC_IMPL_CAST_GETDOUBLE(GetInt,			int)
 FUNC_IMPL_CAST_GETDOUBLE(GetLong,			long)
 FUNC_IMPL_CAST_GETDOUBLE(GetUnsignedShort,	unsigned short)
 FUNC_IMPL_CAST_GETDOUBLE(GetUnsignedInt,	unsigned int)
 FUNC_IMPL_CAST_GETDOUBLE(GetUnsignedLong,	unsigned long)
 
 // CParserTaskTypeNode
 // ---------------------------------------------------------------------| Class
 
 CParserTaskTypeNode::CParserTaskTypeNode() : m_ParentNode(NULL), m_NextNode(NULL), m_AltNode(NULL)
 {
 }
 
 CParserTaskTypeNode::~CParserTaskTypeNode()
 {
 }
 
 // Delete all children
 void CParserTaskTypeNode::DeleteChildren()
 {
 	// Delete nodes if applicable
 	if (m_NextNode)
 	{
 		m_NextNode->DeleteChildren();
 		delete m_NextNode;
 		m_NextNode = NULL;
 	}
 
 	if (m_AltNode)
 	{
 		m_AltNode->DeleteChildren();
 		delete m_AltNode;
 		m_AltNode = NULL;
 	}
 }
 
 // CParserTaskType
 // ---------------------------------------------------------------------| Class
 
 CParserTaskType::CParserTaskType() : m_BaseNode(NULL)
 {
 }
 
 CParserTaskType::~CParserTaskType()
 {
 }
 
 // Delete m_BaseNode and all of its children
 void CParserTaskType::DeleteTree()
 {
 	if (m_BaseNode)
 	{
 		m_BaseNode->DeleteChildren();
 		delete m_BaseNode;
 		m_BaseNode = NULL;
 	}
 }
 
 // CParserLine
 // ---------------------------------------------------------------------| Class
 
 CParserLine::CParserLine()
 {
 }
 
 CParserLine::~CParserLine()
 {
 	
 	ClearArguments();
 }
 
 // Clear arguments (deleting m_Memory
 bool CParserLine::ClearArguments()
 {
 	// Now we can actually clear it
 	m_Arguments.clear();
 	return true;
 }
 
 // Implementation of CParserFile::GetArg*
 //  it just checks if argument isn't out of range, and
 //  then it uses the the respective function in CParserValue
 FUNC_IMPL_GETARG(GetArgString,			GetString,			string)
 FUNC_IMPL_GETARG(GetArgBool,			GetBool,			bool)
 FUNC_IMPL_GETARG(GetArgChar,			GetChar,			char)
 FUNC_IMPL_GETARG(GetArgShort,			GetShort,			short)
 FUNC_IMPL_GETARG(GetArgInt,				GetInt,				int)
 FUNC_IMPL_GETARG(GetArgLong,			GetLong,			long)
 FUNC_IMPL_GETARG(GetArgUnsignedShort,	GetUnsignedShort,	unsigned short)
 FUNC_IMPL_GETARG(GetArgUnsignedInt,		GetUnsignedInt,		unsigned int)
 FUNC_IMPL_GETARG(GetArgUnsignedLong,	GetUnsignedLong,	unsigned long)
 FUNC_IMPL_GETARG(GetArgFloat,			GetFloat,			float)
 FUNC_IMPL_GETARG(GetArgDouble,			GetDouble,			double)
 
 // ParseString
 // ------------------------------------------------------------------| Function
 // Parses a line, dividing it into segments according to defined semantics
 // each segment is called an argument and represents a value of some kind
 // ex:
 // variable = 5		=> variable, =, 5
 // CallFunc(4,2)	=> CallFunc, 4, 2
 
 // TODO Gee: Make Parser use CStr.
 bool CParserLine::ParseString(const CParser& Parser, const std::string &strLine)
 {
 	// Don't process empty string
 	if (strLine == string())
 	{
 		m_ParseOK = false;		// Empty lines should never be inputted by CParserFile
 		return m_ParseOK;
 	}
 
 	// Locals
 	bool				Extract=false;
 	size_t				ExtractPos=0;
 	char				Buffer[256];
 	char				Letter[] = {'\0','\0'};		// Letter as string
 	vector<string>		Segments;
 	string				strSub;
 	size_t				i;
 
 	// Set result to false, then if a match is found, turn it true
 	m_ParseOK = false;
 
 	/* 
 	TODO Gee Remove this comment!
 	// Remove C++-styled comments!
 	// * * * *
 	int pos = strLine.find("//");
 	if (pos != string::npos)
 		strLine = strLine.substr(0,pos);
 	*/
 
 	// Divide string into smaller vectors, separators are unusual signs
 	// * * * *
 
 	for (i=0; i<strLine.size(); ++i)
 	{
 		// Check if we're trying to use some kind of type
 		if (!Extract)
 		{
 			// GET NAME, IDENT, FLOAT
 			if (_IsValueChar(strLine[i]))
 			{
 				Extract = true;
 				ExtractPos = i;
 				memset((void*)Buffer, '\0', sizeof(char)*256);
 			}
 			else
 			// GET STRING BETWEEN QUOTES	
 			if (strLine[i] == '\"')
 			{
 				// Extract a string, search for another "
 				size_t pos = strLine.find("\"", i+1);
 
 				// If matching can't be found,
 				//  the parsing will fail!
 				if (pos == string::npos)
 				{
 					// TODO Gee - Output in logfile
 					m_ParseOK = false;
 					return m_ParseOK;
 				}
 
 				// Get substring
 				// Add a " to indicate this is a "..." string
 				//  and can't be used as name
 				strSub = "\"" + strLine.substr(i+1, pos-i-1);
 
 				// Input substring!
 				Segments.push_back(strSub);
 
 				// Now we can't skip everything that we 
 				//  we just read in, update i
 				i = pos;
 			}
 			// GET JUST ONE CHARACTER
 			else
 			{
 				// Input just the one char
 				Letter[0] = strLine[i];
 				Segments.push_back(Letter);
 				continue;
 			}
 		}
 		// Extract whatever
 		if (Extract)
 		{
 			// No type names are longer than 256 characters
 			if (i-ExtractPos >= 256)
 			{
 				Extract=false;
 			}
 			else
 			{
 				// Extract string after $ !
 				// break whenever we reach a sign that's not A-Z a-z
 				if (_IsValueChar(strLine[i]))
 				{
 					Buffer[i-ExtractPos] = strLine[i];
 				}
 				else
 				{
 					// Extraction is finished
 					Extract=false;
 
 					// strLine[i] is now a non-regular character
 					//  we'll jump back one step so that will
 					//  be included next loop
 					--i;
 				}
 
 				// Check if string is complete
 				if (i == strLine.size()-1)
 					Extract=false;
 			}
 
 			// If extraction was finished! Input Buffer
 			if (Extract == false)
 			{
 				Segments.push_back( string(Buffer) );
 			}
 		}
 	}
 
 	// Try to find an appropriate CParserTaskType in parser
 	// * * * *
 
 	// Locals
 	size_t Progress;						// progress in Segments index
 	size_t Lane=0;							// Have many alternative routes we are in
 	bool Match;							// If a task-type match has been found
 	// The vector of these three represents the different lanes
 	//  LastValidProgress[1] takes you back to lane 1 and how
 	//  the variables was set at that point
 	vector<size_t> LastValidProgress;		// When diving into a dynamic argument store store
 										//  the last valid so you can go back to it
 	vector<size_t> LastValidArgCount;		// If an alternative route turns out to fail, we
 										//  need to know the amount of arguments on the last
 										//  valid position, so we can remove them.
 	vector<bool> LastValidMatch;		// Match at that point
 	bool BlockAltNode = false;			// If this turns true, the alternative route
 										//  tested was not a success, and the settings
 										//  should be set back in order to test the 
 										//  next node instead
 	bool LookNoFurther = false;			// If this turns true, it means a definite match has been
 										//  found and no further looking is required
 	CParserTaskTypeNode *CurNode=NULL;	// Current node on task type
 	CParserTaskTypeNode *PrevNode=NULL;	// Last node
 	UNUSED2(PrevNode);
 
 	// Iterate all different TaskType, and all TaskTypeElements... 
 	//  start from left and go  to the right (prog), comparing 
 	//  the similarities. If enough
 	//  similarities are found, then we can declare progress as
 	//  that type and exit loop
 	vector<CParserTaskType>::const_iterator cit_tt;
 	for (cit_tt = Parser.m_TaskTypes.begin(); 
 		 cit_tt != Parser.m_TaskTypes.end(); 
 		 ++cit_tt)
 	{
 		// Reset for this task-type
 		Match = true;
 		Progress = 0;
 		ClearArguments();				// Previous failed can have filled this
 		CurNode = cit_tt->m_BaseNode;	// Start at base node
 		LookNoFurther = false;
 		BlockAltNode = false;
 
 		// This loop will go through the whole tree until
 		// it reaches an empty node
 		while (!LookNoFurther)
 		{
 			// Check if node is valid
 			//  otherwise try to jump back to parent
 			if (CurNode->m_NextNode == NULL &&
 				(CurNode->m_AltNode == NULL || BlockAltNode))
 			{
 				// Jump back to valid
 				//CurNode = PrevNode;
 
 				// If the node has no children, it's the last, and we're
 				//  on lane 0, i.e. with no 
 				if (CurNode->m_NextNode == NULL &&
 					(CurNode->m_AltNode == NULL || BlockAltNode) &&
 					Lane == 0)
 				{
 					if (Progress != Segments.size())
 						Match = false;
 
 					break;
 				}
 				else
 				{
 					CParserTaskTypeNode *OldNode = NULL;
 
 					// Go back to regular route!
 					for(;;)
 					{
 						OldNode = CurNode;
 						CurNode = CurNode->m_ParentNode;
 
 						if (CurNode->m_AltNode == OldNode)
 						{
 							break;
 						}
 					}
 
 					// If the alternative route isn't repeatable, block alternative route for
 					//  next loop cycle
 					if (!CurNode->m_AltNodeRepeatable)
 						BlockAltNode = true;
 
 					// Decrease lane
 					--Lane;
 				}
 			}
 
 			// Check alternative route
 			// * * * *
 
 			// Check if alternative route is present
 			//  note, if an alternative node has already failed
 			//  we don't want to force usage of the next node
 			//  therefore BlockAltNode has to be false
 			if (!BlockAltNode)
 			{
 				if (CurNode->m_AltNode)
 				{
 					// Alternative route found, we'll test this first!
 					CurNode = CurNode->m_AltNode;
 
 					// --- New node is set!
 
 					// Make sure they are large enough
 					if ((int)LastValidProgress.size() < Lane+1)
 					{
 						LastValidProgress.resize(Lane+1);
 						LastValidMatch.resize(Lane+1);
 						LastValidArgCount.resize(Lane+1);
 					}
 
 					// Store last valid progress
 					LastValidProgress[Lane] = Progress;
 					LastValidMatch[Lane] = Match;
 					LastValidArgCount[Lane] = (int)m_Arguments.size();
 
 					++Lane;
 
 					continue;
 				}
 			}
 			else BlockAltNode = false;
 
 			// Now check Regular Next Node
 			// * * * *
 
 			if (CurNode->m_NextNode)
 			{
 				// Important!
 				// Change working node to the next node!
 				CurNode = CurNode->m_NextNode;
 
 				// --- New node is set!
 
 				// CHECK IF LETTER IS CORRECT
 				if (CurNode->m_Letter != '\0')
 				{
 					// OPTIONALLY SKIP BLANK SPACES
 					if (CurNode->m_Letter == '_')
 					{
 						// Find blank space if any!
 						//  and jump to the next non-blankspace
 						if (Progress < Segments.size())
 						{	
 							// Skip blankspaces AND tabs!
 							while (Segments[Progress].size()==1 &&
 								   (Segments[Progress][0]==' ' || 
 									Segments[Progress][0]=='\t'))
 							{
 								++Progress;
 
 								// Check length
 								if (Progress >= (int)Segments.size())
 								{
 									break;
 								}
 							}
 						}
 					}
 					else
 					// CHECK LETTER IF IT'S CORRECT
 					{
 						if (Progress < Segments.size())
 						{
 							// This should be 1-Letter long
 							if (Segments[Progress].size() != 1)
 								Match = false;
 
 							// Check Letter
 							if (CurNode->m_Letter != Segments[Progress][0])
 								Match = false;
 
 							// Update progress
 							++Progress;
 						}
 						else Match = false;
 					}
 				}
 
 				else if (CurNode->m_Type == typeNull)
 				{
 					// Match without doing anything (leaving Match==true)
 				}
 
 				// CHECK NAME
 				else
 				{
 					// Do this first, because we wan't to
 					//  avoid the Progress and Segments.size()
 					//  check for this
 					if (CurNode->m_Type == typeAddArg)
 					{
 						// Input argument
 						CParserValue value;
 						value.m_String = CurNode->m_String;
 						m_Arguments.push_back(value);
 					}
 					else
 					{
 						// Alright! An ident or const has been acquired, if we
 						//  can't find any or if the string has run out
 						//  that invalidates the match
 
 						// String end?
 						if (Progress >= (int)Segments.size())
 						{
 							Match = false;
 						}
 						else
 						{
 							// Store argument in CParserValue!
 							CParserValue value;
 							size_t i;
 
 							switch(CurNode->m_Type)
 							{
 							case typeIdent:
 								// Check if this really is a string
 								if (!_IsStrictNameChar(Segments[Progress][0]))
 								{
 									Match = false;
 									break;
 								}
 								
 								// Same as at typeValue, but this time
 								//  we won't allow strings like "this", just
 								//  like this
 								if (Segments[Progress][0] == '\"')
 									Match = false;
 								else
 									value.m_String = Segments[Progress];
 								
 								// Input argument!
 								m_Arguments.push_back(value);
 
 								++Progress;
 								break;
 							case typeValue:
 								// Check if this really is a string
 								if (!_IsValueChar(Segments[Progress][0]) &&
 									Segments[Progress][0] != '\"')
 								{
 									Match = false;
 									break;
 								}
 										
 								// Check if initial is -> " <-, because that means it was
 								//  stored from a "String like these with quotes"
 								//  We don't want to store that prefix
 								if (Segments[Progress][0] == '\"')
 									value.m_String = Segments[Progress].substr(1, Segments[Progress].size()-1);
 								else
 									value.m_String = Segments[Progress];
 								
 								// Input argument!
 								m_Arguments.push_back(value);
 
 								++Progress;
 								break;
 							case typeRest:
 								// Extract the whole of the string
 								
 								// Reset, probably is but still
 								value.m_String = string();
 
 								for (i=Progress; i<Segments.size(); ++i)
 								{
 									value.m_String += Segments[i];
 
 									// If argument starts with => " <=, add one to the end of it too
 									if (Segments[i][0] == '"')
 										value.m_String += "\"";
 								}
 
 								m_Arguments.push_back(value);
 
 								// Now BREAK EVERYTHING !
 								//  We're done, we found our match and let's get out
 								LookNoFurther = true;
 								//Match = true;
 								break;
 							default:
 								break;
 							}
 						}
 					}
 				}
 			}
 
 			// Check if match is false! if it is, try returning to last valid state
 			if (!Match && Lane > 0)
 			{
 				// The alternative route failed
 				BlockAltNode = true;
 
 				CParserTaskTypeNode *OldNode = NULL;
 
 				// Go back to regular route!
 				for(;;)
 				{
 					OldNode = CurNode;
 					CurNode = CurNode->m_ParentNode;
 
 					if (CurNode->m_AltNode == OldNode)
 					{
 						break;
 					}
 				}
 
 				// Decrease lane
 				--Lane;
 
 				// Restore values as before
 				Progress = LastValidProgress[Lane];
 				Match = LastValidMatch[Lane];
 				m_Arguments.resize(LastValidArgCount[Lane]);
 			}
 		}
 
 		// Check if it was a match!
 		if (Match)
 		{
 			// Before we celebrate the match, let's check if whole
 			//  of Segments has been used, and if so we have to
 			//  nullify the match
 			//if (Progress == Segments.size())
 			{
 				// *** REPORT MATCH WAS FOUND ***
 				m_TaskTypeName = cit_tt->m_Name;
 				m_ParseOK = true;
 				break;
 			}
 		}
 	}
 
 	// POST-PROCESSING OF ARGUMENTS!
 	
 	// if _minus is found as argument, remove it and add "-" to the one after that
 	// note, it's easier if std::iterator isn't used here
 	
 	for (i=1; i<(int)GetArgCount(); ++i)
 	{
 		if (m_Arguments[i-1].m_String == "_minus")
 		{
 			// Add "-" to next, and remove "_minus"
 			m_Arguments[i].m_String = "-" + m_Arguments[i].m_String;
 			m_Arguments.erase(m_Arguments.begin() + (i-1));
 		}
 	}
 
 	return m_ParseOK;
 }
 
 // CParser
 // ---------------------------------------------------------------------| Class
 
 // ctor
 CParser::CParser()
 {
 }
 
 // dtor
 CParser::~CParser()
 {
 	// Delete all task type trees
 	vector<CParserTaskType>::iterator itTT;
 	for (itTT = m_TaskTypes.begin();
 		 itTT != m_TaskTypes.end();
 		 ++itTT)
 	{
 		itTT->DeleteTree();
 	}
 }
 
 // InputTaskType
 // ------------------------------------------------------------------| Function
 // A task-type is a string representing the acquired syntax when parsing
 //  This function converts that string into a binary tree, making it easier
 //  and faster to later parse.
 bool CParser::InputTaskType(const string& strName, const string& strSyntax)
 {
 	// Locals
 	CParserTaskType TaskType;	// Object we acquire to create
 	char Buffer[REGULAR_MAX_LENGTH];
 	size_t ExtractPos = 0;
 	bool Extract = false;
 	bool Error = false;
 	size_t i;
 	bool ConstructNew = false;	// If it's the first input, then don't
 								//  construct a new node, because we
 								//  we already have m_BaseNode
 
 	// Construct base node
 	TaskType.m_BaseNode = new CParserTaskTypeNode();
 
 	// Working node
 	CParserTaskTypeNode *CurNode = TaskType.m_BaseNode;
 
 	// Loop through the string and construct nodes in the binary tree
 	//  when applicable
 	for (i=0; i<strSyntax.size(); ++i)
 	{
 		// Extract is a variable that is true when we want to extract
 		//  parts that is longer than one character.
 		if (!Extract)
 		{
 			if (strSyntax[i] == REGULAR_EXPRESSION)
 			{
 				Extract = true;
 				ExtractPos = i+1; // Skip $
 				memset((void*)Buffer, '\0', sizeof(char)*REGULAR_MAX_LENGTH);
 
 				// We don't want to extract '$' so we'll just continue to next loop run
 				continue;
 			}
 			else
 			if (strSyntax[i] == START_DYNAMIC || strSyntax[i] == START_OPTIONAL)
 			{
 
 				// Slight hack: because things are stored in a binary tree,
 				// it can't handle "<[a][b]>" -- the <...> node has only
 				// one slot for an optional [...] node. To avoid this problem,
 				// typeNull nodes are used to indicate things that always
 				// succeed but can have altnodes attached:
 /*
 	parent			parent
 	  \		===>	  \
 	<...>	===>	<...>		<-- CurNode
 	/	\			/	\
    /	 \		   /	 \
 next	[a]		Null	[a]		<-- added NewNode
 				/  \
 			  next	[b]
 */
 				if (CurNode->m_AltNode)
 				{
 
 					// Rearrange the tree, as shown above:
 
 					// Create NewNode
 					CParserTaskTypeNode* NewNode = new CParserTaskTypeNode();
 					NewNode->m_ParentNode = CurNode;
 					NewNode->m_Letter = '\0';
 					NewNode->m_Type = typeNull;
 
 					// Copy 'next' into NewNode
 					NewNode->m_NextNode = CurNode->m_NextNode;
 					// Replace 'next' with NewNode inside CurNode
 					CurNode->m_NextNode = NewNode;
 		
 					// Update CurNode, so the following code inserts into [b]
 					CurNode = NewNode;
 				}
 
 				// Dive into the alternative node
 				debug_assert(! CurNode->m_AltNode);
 				CurNode->m_AltNode = new CParserTaskTypeNode();
 				CurNode->m_AltNode->m_ParentNode = CurNode;
 
 				// It's repeatable
 				CurNode->m_AltNodeRepeatable = bool(strSyntax[i]==START_DYNAMIC);
 
 				// Set to current
 				CurNode = CurNode->m_AltNode;
 
 				ConstructNew = false;
 
 				// We're done extracting for now
 				continue;
 			}
 			else
 			if (strSyntax[i] == END_DYNAMIC || strSyntax[i] == END_OPTIONAL)
 			{
 				CParserTaskTypeNode *OldNode = NULL;
 
 				// Jump out of this alternative route
 				for(;;)
 				{
 					OldNode = CurNode;
 					CurNode = CurNode->m_ParentNode;
 
 					if (CurNode == NULL)
 					{
 						// Syntax error
 						Error = true;
 						break;
 					}
 
 					if (CurNode->m_AltNode == OldNode)
 					{
 						break;
 					}
 				}
 				
 				if (Error)break;
 			}
 			else
 			{
 				// Check if this is the first input
 				// CONSTRUCT A CHILD NODE
 				debug_assert(! CurNode->m_NextNode);
 				CurNode->m_NextNode = new CParserTaskTypeNode();
 				CurNode->m_NextNode->m_ParentNode = CurNode;
 
 				// Jump into !
 				CurNode = CurNode->m_NextNode;	
 
 				// Set CurNode
 				CurNode->m_Letter = strSyntax[i];		
 			}
 		}
 
 		// Extact
 		if (Extract)
 		{
 			// No type names are longer than REGULAR_MAX_LENGTH characters
 			if (i-ExtractPos >= REGULAR_MAX_LENGTH)
 			{
 				Extract=false;
 			}
 			else
 			{
 				// Extract string after $ !
 				// break whenever we reach a sign that's not A-Z a-z
 				if (_IsStrictNameChar(strSyntax[i]))
 				{
 					Buffer[i-ExtractPos] = strSyntax[i];
 				}
 				else
 				{
 					// Extraction is finished
 					Extract=false;
 
 					// strLine[i] is now a non-regular character
 					//  we'll jump back one step so that will
 					//  be included next loop
 					--i;
 				}
 
 				// Check if string is complete
 				if (i == strSyntax.size()-1)
 					Extract=false;
 			}
 
 			// If extraction was finished! Input Buffer
 			if (Extract == false)
 			{
 				// CONSTRUCT A CHILD NODE
 				debug_assert(! CurNode->m_NextNode);
 				CurNode->m_NextNode = new CParserTaskTypeNode();
 				CurNode->m_NextNode->m_ParentNode = CurNode;
 
 				// Jump into !
 				CurNode = CurNode->m_NextNode;					
 
 				CurNode->m_Letter = '\0';
 
 				string str = string(Buffer);
 
 				// Check value and set up CurNode accordingly
 				if (str == "value")		CurNode->m_Type = typeValue;
 				else 
 				if (str == "ident")		CurNode->m_Type = typeIdent;
 				else
 				if (str == "rest")		CurNode->m_Type = typeRest;
 				else
 				if (str == "rbracket")	CurNode->m_Letter = '>';
 				else 
 				if (str == "lbracket")	CurNode->m_Letter = '<';
 				else 
 				if (str == "rbrace")	CurNode->m_Letter = ']';
 				else 
 				if (str == "lbrace")	CurNode->m_Letter = '[';
 				else 
 				if (str == "dollar")	CurNode->m_Letter = '$';
 				else
 				if (str == "arg")
 				{
 					// After $arg, you need a parenthesis, within that parenthesis is a string
 					//  that will be added as an argument when it's passed through
 
 					CurNode->m_Type = typeAddArg;
 					
 					// Check length, it has to have place for at least a '(' and ')' after $arg
 					if (ExtractPos+4 >= strSyntax.size())
 					{
 						Error = true;
 						break;
 					}
 					
 					// We want to extract what's inside the parenthesis after $arg
 					//  if it's not there at all, it's a syntactical error
 					if (strSyntax[ExtractPos+3] != '(')
 					{
 						Error = true;
 						break;
 					}
 
 					// Now try finding the second ')'
 					size_t Pos = strSyntax.find(")", ExtractPos+5);
 
 					// Check if ')' exists at all
 					if (Pos == string::npos)
 					{
 						Error = true;
 						break;
 					}
 
 					// Now extract string within ( and )
 					CurNode->m_String = strSyntax.substr(ExtractPos+4, Pos-(ExtractPos+4));
 
 					// Now update position
 					i = (int)Pos;
 				}
 				else
 				{
 					// TODO Gee report in log too
 					Error = true;
 				}
 			}
 		}
 	}
 
 	// Input TaskType
 	if (!Error)
 	{
 		// Set name and input
 		TaskType.m_Name = strName;
 		m_TaskTypes.push_back(TaskType);
 	}
 
 	return !Error;
 }
 
 
 CParserCache::CacheType CParserCache::m_Cached;
 
 CParser& CParserCache::Get(const char* str)
 {
 	CacheType::iterator it = m_Cached.find(str);
 	if (it == m_Cached.end())
 	{
 		CParser* parser = new CParser;
 		parser->InputTaskType("", str);
 		m_Cached[str] = parser;
 		return *parser;
 	}
 	else
 	{
 		CParser* parser = it->second;
 		return *parser;
 	}
 }
-
-
-
-
-
-//----------------------------------------------------------------------------
-// built-in self test
-//----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-static void test1()
-{
-	CParser Parser;
-	Parser.InputTaskType("test", "_$ident_=_$value_");
-
-	std::string str;
-	int i;
-
-	CParserLine Line;
-
-	TEST(Line.ParseString(Parser, "value=23"));
-
-	TEST(Line.GetArgString(0, str) && str == "value");
-	TEST(Line.GetArgInt(1, i) && i == 23);
-}
-
-
-static void test2()
-{
-	CParser Parser;
-	Parser.InputTaskType("test", "_$value_[$value]_");
-
-	std::string str;
-
-	CParserLine Line;
-
-	TEST(Line.ParseString(Parser, "12 34"));
-	TEST(Line.GetArgCount() == 2);
-	TEST(Line.GetArgString(0, str) && str == "12");
-	TEST(Line.GetArgString(1, str) && str == "34");
-
-	TEST(Line.ParseString(Parser, "56"));
-	TEST(Line.GetArgCount() == 1);
-	TEST(Line.GetArgString(0, str) && str == "56");
-
-	TEST(! Line.ParseString(Parser, " "));
-}
-
-
-static void test3()
-{
-	CParser Parser;
-	Parser.InputTaskType("test", "_[$value]_[$value]_[$value]_");
-
-	std::string str;
-
-	CParserLine Line;
-
-	TEST(Line.ParseString(Parser, "12 34 56"));
-	TEST(Line.GetArgCount() == 3);
-	TEST(Line.GetArgString(0, str) && str == "12");
-	TEST(Line.GetArgString(1, str) && str == "34");
-	TEST(Line.GetArgString(2, str) && str == "56");
-
-	TEST(Line.ParseString(Parser, "78 90"));
-	TEST(Line.GetArgCount() == 2);
-	TEST(Line.GetArgString(0, str) && str == "78");
-	TEST(Line.GetArgString(1, str) && str == "90");
-
-	TEST(Line.ParseString(Parser, "ab"));
-	TEST(Line.GetArgCount() == 1);
-	TEST(Line.GetArgString(0, str) && str == "ab");
-
-	TEST(Line.ParseString(Parser, " "));
-	TEST(Line.GetArgCount() == 0);
-}
-
-
-static void test4()
-{
-	CParser Parser;
-	Parser.InputTaskType("test", "<[_a_][_b_]_x_>");
-
-	std::string str;
-
-	CParserLine Line;
-	TEST(Line.ParseString(Parser, "a b x a b x"));
-	TEST(Line.ParseString(Parser, "a x b x"));
-	TEST(Line.ParseString(Parser, "a x"));
-	TEST(Line.ParseString(Parser, "b x"));
-	TEST(Line.ParseString(Parser, "x"));
-	TEST(! Line.ParseString(Parser, "a x c x"));
-	TEST(! Line.ParseString(Parser, "a b a x"));
-	TEST(! Line.ParseString(Parser, "a"));
-	TEST(! Line.ParseString(Parser, "a a x"));
-	TEST(Line.ParseString(Parser, "a x a b x a x b x b x b x b x a x a x a b x a b x b x a x"));
-}
-
-static void self_test()
-{
-	test1();
-	test2();
-	test3();
-	test4();
-}
-
-SELF_TEST_RUN;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
Index: ps/trunk/source/lib/timer.h
===================================================================
--- ps/trunk/source/lib/timer.h	(revision 3910)
+++ ps/trunk/source/lib/timer.h	(revision 3911)
@@ -1,277 +1,277 @@
 /**
  * =========================================================================
  * File        : timer.h
  * Project     : 0 A.D.
  * Description : platform-independent high resolution timer and
  *             : FPS measuring code.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2003-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #ifndef TIMER_H
 #define TIMER_H
 
 #include <string>
 
 #include "debug.h"	// debug_printf
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 // high resolution (> 1 us) timestamp [s], starting at or near 0 s.
 extern double get_time(void);
 
 // return resolution (expressed in [s]) of the time source underlying
 // get_time.
 extern double timer_res(void);
 
 // calculate fps (call once per frame)
 // several smooth filters (tuned for ~100 FPS)
 // => less fluctuation, but rapid tracking
 
 extern int fps;	// for user display
 extern float spf;	// for time-since-last-frame use
 
 extern void calc_fps(void);
 
 
 // since TIMER_ACCRUE et al. are called so often, we try to keep
 // overhead to an absolute minimum. this flag allows storing
-// raw tick counts (e.g. CPU cycles returned by rdtsc) instead of
+// raw tick counts (e.g. CPU cycles returned by ia32_rdtsc) instead of
 // absolute time. there are two benefits:
 // - no need to convert from raw->time on every call
 //   (instead, it's only done once when displaying the totals)
 // - possibly less overhead to querying the time itself
 //   (get_time may be using slower time sources with ~3us overhead)
 //
 // however, the cycle count is not necessarily a measure of wall-clock time.
 // therefore, on systems with SpeedStep active, measurements of
 // I/O or other non-CPU bound activity may be skewed. this is ok because
 // the timer is only used for profiling; just be aware of the issue.
 // if it's a problem or no raw tick source is available, disable this.
 // 
 // note that overflow isn't an issue either way (63 bit cycle counts
 // at 10 GHz cover intervals of 29 years).
 #define TIMER_USE_RAW_TICKS 1
 #if TIMER_USE_RAW_TICKS
 typedef i64 TimerUnit;
 #else
 typedef double TimerUnit;
 #endif
 
 
 //
 // cumulative timer API
 //
 
 // this supplements in-game profiling by providing low-overhead,
 // high resolution time accounting.
 
 
 // opaque - do not access its fields!
 // note: must be defined here because clients instantiate them;
 // fields cannot be made private due to C compatibility requirement.
 struct TimerClient
 {
 	TimerUnit sum;	// total bill
 
 	// only store a pointer for efficiency.
 	const char* description;
 
 	TimerClient* next;
 
 	// how often timer_bill_client was called (helps measure relative
 	// performance of something that is done indeterminately often).
 	uint num_calls;
 };
 
 
 // make the given TimerClient (usually instantiated as static data)
 // ready for use. returns its address for TIMER_ADD_CLIENT's convenience.
 // this client's total (added to by timer_bill_client) will be
 // displayed by timer_display_client_totals.
 // notes:
 // - may be called at any time;
 // - always succeeds (there's no fixed limit);
 // - free() is not needed nor possible.
 // - description must remain valid until exit; a string literal is safest.
 extern TimerClient* timer_add_client(TimerClient* tc, const char* description);
 
 // add <dt> to the client's total.
 extern void timer_bill_client(TimerClient* tc, TimerUnit dt);
 
 // display all clients' totals; does not reset them.
 // typically called at exit.
 extern void timer_display_client_totals();
 
 #ifdef __cplusplus
 }
 #endif
 
 
 
 
 // used via TIMER* macros below.
 class ScopeTimer
 {
 	double t0;
 	const char* description;
 
 public:
 	ScopeTimer(const char* _description)
 	{
 		t0 = get_time();
 		description = _description;
 	}
 
 	~ScopeTimer()
 	{
 		double t1 = get_time();
 		double dt = t1-t0;
 
 		// determine scale factor for pretty display
 		double scale = 1e6;
 		const char* unit = "us";
 		if(dt > 1.0)
 			scale = 1, unit = "s";
 		else if(dt > 1e-3)
 			scale = 1e3, unit = "ms";
 
 		debug_printf("TIMER| %s: %g %s\n", description, dt*scale, unit);
 	}
 
 	// disallow copying (makes no sense)
 private:
 	ScopeTimer& operator=(const ScopeTimer&);
 };
 
 /*
 Measure the time taken to execute code up until end of the current scope; 
 display it via debug_printf. Can safely be nested.
 Useful for measuring time spent in a function or basic block.
 <description> must remain valid over the lifetime of this object;
 a string literal is safest.
 
 Example usage:
 	void func()
 	{
 		TIMER("description");
 		// code to be measured
 	}
 */
 #define TIMER(description) ScopeTimer UID__(description)
 
 /*
 Measure the time taken to execute code between BEGIN and END markers;
 display it via debug_printf. Can safely be nested.
 Useful for measuring several pieces of code within the same function/block.
 <description> must remain valid over the lifetime of this object;
 a string literal is safest.
 
 Caveats:
 - this wraps the code to be measured in a basic block, so any
   variables defined there are invisible to surrounding code.
 - the description passed to END isn't inspected; you are responsible for
   ensuring correct nesting!
 
 Example usage:
 	void func2()
 	{
 		// uninteresting code
 		TIMER_BEGIN("description2");
 		// code to be measured
 		TIMER_END("description2");
 		// uninteresting code
 	}
 */
 #define TIMER_BEGIN(description) { ScopeTimer UID__(description)
 #define TIMER_END(description) }
 
 
 // used via TIMER_ACCRUE
 class ScopeTimerAccrue
 {
 	TimerUnit t0;
 	TimerClient* tc;
 
 public:
 	ScopeTimerAccrue(TimerClient* tc_)
 	{
 #if TIMER_USE_RAW_TICKS
 # if CPU_IA32
-		t0 = rdtsc();
+		t0 = ia32_rdtsc();
 # else
 #  error "port"
 # endif
 #else
 		t0 = get_time();
 #endif
 		tc = tc_;
 	}
 	~ScopeTimerAccrue()
 	{
 #if TIMER_USE_RAW_TICKS
 # if CPU_IA32
-		TimerUnit t1 = rdtsc();
+		TimerUnit t1 = ia32_rdtsc();
 # else
 #  error "port"
 # endif
 #else
 		TimerUnit t1 = get_time();
 #endif
 		TimerUnit dt = t1-t0;
 		timer_bill_client(tc, dt);
 	}
 
 	// disallow copying (makes no sense)
 private:
 	ScopeTimerAccrue& operator=(const ScopeTimerAccrue&);
 };
 
 
 // "allocate" a new TimerClient that will keep track of the total time
 // billed to it, along with a description string. These are displayed when
 // timer_display_client_totals is called.
 // Invoke this at file or function scope; a (static) TimerClient pointer of
 // name <id> will be defined, which should be passed to TIMER_ACCRUE.
 #define TIMER_ADD_CLIENT(id)\
 	static TimerClient UID__;\
 	static TimerClient* id = timer_add_client(&UID__, #id);
 
 /*
 Measure the time taken to execute code up until end of the current scope; 
 bill it to the given TimerClient object. Can safely be nested.
 Useful for measuring total time spent in a function or basic block over the
 entire program.
 <description> must remain valid over the lifetime of this object;
 a string literal is safest.
 
 Example usage:
 	TIMER_ADD_CLIENT(identifier)
 
 	void func()
 	{
 		TIMER_ACCRUE(name_of_pointer_to_client);
 		// code to be measured
 	}
 
 	[at exit]
 	timer_display_client_totals();
 */
 #define TIMER_ACCRUE(client) ScopeTimerAccrue UID__(client)
 
 #endif	// #ifndef TIMER_H
Index: ps/trunk/source/lib/sysdep/ia32.cpp
===================================================================
--- ps/trunk/source/lib/sysdep/ia32.cpp	(revision 3910)
+++ ps/trunk/source/lib/sysdep/ia32.cpp	(revision 3911)
@@ -1,714 +1,675 @@
 /**
  * =========================================================================
  * File        : ia32.cpp
  * Project     : 0 A.D.
  * Description : C++ and inline asm implementations for IA-32.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2003-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include "lib.h"
 #include "posix.h"
 #include "ia32.h"
 #include "timer.h"
 
 // HACK (see call to wtime_reset_impl)
 #if OS_WIN
 #include "lib/sysdep/win/wtime.h"
 #endif
 
 #include <string.h>
 #include <stdio.h>
 
 #include <vector>
 #include <algorithm>
 
 #if !HAVE_MS_ASM && !HAVE_GNU_ASM
 #error ia32.cpp needs inline assembly support!
 #endif
 
-#define SELF_TEST_ENABLED 1
-#include "self_test.h"
-
 // set by ia32_init, referenced by ia32_memcpy (asm)
 extern "C" u32 ia32_memcpy_size_mask = 0;
 
 void ia32_init()
 {
 	ia32_asm_init();
 
 	// memcpy init: set the mask that is applied to transfer size before
 	// choosing copy technique. this is the mechanism for disabling
 	// codepaths that aren't supported on all CPUs; see article for details.
 	// .. check for PREFETCHNTA and MOVNTQ support. these are part of the SSE
 	// instruction set, but also supported on older Athlons as part of
 	// the extended AMD MMX set.
-	if(ia32_cap(SSE) || ia32_cap(AMD_MMX_EXT))
+	if(ia32_cap(IA32_CAP_SSE) || ia32_cap(IA32_CAP_AMD_MMX_EXT))
 		ia32_memcpy_size_mask = ~0u;
 }
 
 
 //-----------------------------------------------------------------------------
 // fast implementations of some sysdep.h functions; see documentation there
 //-----------------------------------------------------------------------------
 
 #if HAVE_MS_ASM
 
 // notes:
 // - declspec naked is significantly faster: it avoids redundant
 //   store/load, even though it prevents inlining.
 // - stupid VC7 gets arguments wrong when using __declspec(naked);
 //   we need to use DWORD PTR and esp-relative addressing.
 
 // if on 64-bit systems, [esp+4] will have to change
 cassert(sizeof(int)*CHAR_BIT == 32);
 
 __declspec(naked) float ia32_rintf(float)
 {
 	__asm fld		[esp+4]
 	__asm frndint
 	__asm ret
 }
 
 __declspec(naked) double ia32_rint(double)
 {
 	__asm fld		QWORD PTR [esp+4]
 	__asm frndint
 	__asm ret
 }
 
 __declspec(naked) float ia32_fminf(float, float)
 {
 	__asm
 	{
 		fld		DWORD PTR [esp+4]
 		fld		DWORD PTR [esp+8]
 		fcomi	st(0), st(1)
 		fcmovnb	st(0), st(1)
 		fxch
 		fstp	st(0)
 		ret
 	}
 }
 
 __declspec(naked) float ia32_fmaxf(float, float)
 {
 	__asm
 	{
 		fld		DWORD PTR [esp+4]
 		fld		DWORD PTR [esp+8]
 		fcomi	st(0), st(1)
 		fcmovb	st(0), st(1)
 		fxch
 		fstp	st(0)
 		ret
 	}
 }
 
 #endif	// HAVE_MS_ASM
 
 
 #if USE_IA32_FLOAT_TO_INT	// implies HAVE_MS_ASM
 
 // notes:
 // - PTR is necessary because __declspec(naked) means the assembler
 //   cannot refer to parameter argument type to get it right.
 // - to conform with the fallback implementation (a C cast), we need to
 //   end up with truncate/"chop" rounding. subtracting does the trick,
 //   assuming RC is the IA-32 default round-to-nearest mode.
 
 static const float round_bias = 0.4999999f;
 
 __declspec(naked) i32 ia32_i32_from_float(float f)
 {
 	UNUSED2(f);
 __asm{
 	push		eax
 	fld			DWORD PTR [esp+8]
 	fsub		[round_bias]
 	fistp		DWORD PTR [esp]
 	pop			eax
 	ret
 }}
 
 __declspec(naked) i32 ia32_i32_from_double(double d)
 {
 	UNUSED2(d);
 __asm{
 	push		eax
 	fld			QWORD PTR [esp+8]
 	fsub		[round_bias]
 	fistp		DWORD PTR [esp]
 	pop			eax
 	ret
 }}
 
 __declspec(naked) i64 ia32_i64_from_double(double d)
 {
 	UNUSED2(d);
 __asm{
 	push		edx
 	push		eax
 	fld			QWORD PTR [esp+12]
 	fsub		[round_bias]
 	fistp		QWORD PTR [esp]
 	pop			eax
 	pop			edx
 	ret
 }}
 
 #endif	// USE_IA32_FLOAT_TO_INT
 
 
 //-----------------------------------------------------------------------------
 
 // rationale: this function should return its output (instead of setting
 // out params) to simplify its callers. it is written in inline asm
 // (instead of moving to ia32.asm) to insulate from changing compiler
 // calling conventions.
 // MSC, ICC and GCC currently return 64 bits in edx:eax, which even
 // matches rdtsc output, but we play it safe and return a temporary.
-u64 rdtsc()
+u64 ia32_rdtsc()
 {
 	u64 c;
 #if HAVE_MS_ASM
 __asm
 {
 	cpuid
 	rdtsc
 	mov			dword ptr [c], eax
 	mov			dword ptr [c+4], edx
 }
 #elif HAVE_GNU_ASM
 	__asm__ __volatile__ (
 		"cpuid; rdtsc"
 		: "=A" (c)
 		: /* no input */
 		: "ebx", "ecx" /* cpuid clobbers ebx and ecx */);
 #endif
 	return c;
 }
 
 
 void ia32_debug_break()
 {
 #if HAVE_MS_ASM
 	__asm int 3
 // note: this probably isn't necessary, since unix_debug_break
 // (SIGTRAP) is most probably available if HAVE_GNU_ASM.
 // we include it for completeness, though.
 #elif HAVE_GNU_ASM
 	__asm__ __volatile__ ("mfence");
 #endif
 }
 
 
 //-----------------------------------------------------------------------------
 // support code for lock-free primitives
 //-----------------------------------------------------------------------------
 
 // enforce strong memory ordering.
 void mfence()
 {
 	// Pentium IV
-	if(ia32_cap(SSE2))
+	if(ia32_cap(IA32_CAP_SSE2))
 #if HAVE_MS_ASM
 		__asm mfence
 #elif HAVE_GNU_ASM
 		__asm__ __volatile__ ("mfence");
 #endif
 }
 
 void serialize()
 {
 #if HAVE_MS_ASM
 	__asm cpuid
 #elif HAVE_GNU_ASM
 	__asm__ __volatile__ ("cpuid");
 #endif
 }
 
 
 //-----------------------------------------------------------------------------
 // CPU / feature detect
 //-----------------------------------------------------------------------------
 
-bool ia32_cap(CpuCap cap)
+bool ia32_cap(IA32Cap cap)
 {
 	// treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
 	// keep in sync with enum CpuCap!
 	static u32 caps[4];
 	ONCE(\
 		u32 regs[4];
 		if(ia32_cpuid(1, regs))\
 		{\
 			caps[0] = regs[ECX];\
 			caps[1] = regs[EDX];\
 		}\
 		if(ia32_cpuid(0x80000001, regs))\
 		{\
 			caps[2] = regs[ECX];\
 			caps[3] = regs[EDX];\
 		}\
 	);
 
 	const uint tbl_idx = cap >> 5;
 	const uint bit_idx = cap & 0x1f;
 	if(tbl_idx > 3)
 	{
 		debug_warn("cap invalid");
 		return false;
 	}
 	return (caps[tbl_idx] & BIT(bit_idx)) != 0;
 }
 
 
 
 
 
 // we only store enum Vendor rather than the string because that
 // is easier to compare.
 enum Vendor { UNKNOWN, INTEL, AMD };
 static Vendor vendor = UNKNOWN;
 
 
 
 enum MiscCpuCapBits
 {
 	// AMD PowerNow! flags (returned in edx by CPUID 0x80000007)
 	POWERNOW_FREQ_ID_CTRL = 2
 };
 
 
 
 static void get_cpu_vendor()
 {
 	u32 regs[4];
 	if(!ia32_cpuid(0, regs))
 		return;
 
 	// copy regs to string
 	// note: 'strange' ebx,edx,ecx reg order is due to ModR/M encoding order.
 	char vendor_str[13];
 	u32* vendor_str_u32 = (u32*)vendor_str;
 	vendor_str_u32[0] = regs[EBX];
 	vendor_str_u32[1] = regs[EDX];
 	vendor_str_u32[2] = regs[ECX];
 	vendor_str[12] = '\0';	// 0-terminate
 
 	if(!strcmp(vendor_str, "AuthenticAMD"))
 		vendor = AMD;
 	else if(!strcmp(vendor_str, "GenuineIntel"))
 		vendor = INTEL;
 	else
 		debug_warn("unknown vendor");
 }
 
 
 static void get_cpu_type()
 {
 	// get processor signature
 	u32 regs[4];
 	if(!ia32_cpuid(1, regs))
 		debug_warn("cpuid 1 failed");
 	const uint model  = bits(regs[EAX], 4, 7);
 	const uint family = bits(regs[EAX], 8, 11);
 
 	// get brand string (if available)
 	// note: ia32_cpuid writes 4 u32s directly to cpu_type -
 	// be very careful with pointer arithmetic!
 	u32* cpu_type_u32 = (u32*)cpu_type;
 	bool have_brand_string = false;
 	if(ia32_cpuid(0x80000002, cpu_type_u32+0 ) &&
 	   ia32_cpuid(0x80000003, cpu_type_u32+4) &&
 	   ia32_cpuid(0x80000004, cpu_type_u32+8))
 		have_brand_string = true;
 
 
 	// note: cpu_type is guaranteed to hold 48+1 chars, since that's the
 	// length of the CPU brand string => we can safely copy short literals.
 	// (this macro hides us from 'unsafe string code' searches)
 #define SAFE_STRCPY str##cpy
 
 	// fall back to manual detect of CPU type because either:
 	// - CPU doesn't support brand string (we use a flag to indicate this
 	//   rather than comparing against a default value because it is safer);
 	// - the brand string is useless, e.g. "Unknown". this happens on
 	//   some older boards whose BIOS reprograms the string for CPUs it
 	//   doesn't recognize.
 	if(!have_brand_string || strncmp(cpu_type, "Unknow", 6) == 0)
 	{
 		if(vendor == AMD)
 		{
 			// everything else is either too old, or should have a brand string.
 			if(family == 6)
 			{
 				if(model == 3 || model == 7)
 					SAFE_STRCPY(cpu_type, "AMD Duron");
 				else if(model <= 5)
 					SAFE_STRCPY(cpu_type, "AMD Athlon");
 				else
 				{
-					if(ia32_cap(AMD_MP))
+					if(ia32_cap(IA32_CAP_AMD_MP))
 						SAFE_STRCPY(cpu_type, "AMD Athlon MP");
 					else
 						SAFE_STRCPY(cpu_type, "AMD Athlon XP");
 				}
 			}
 		}
 		else if(vendor == INTEL)
 		{
 			// everything else is either too old, or should have a brand string.
 			if(family == 6)
 			{
 				if(model == 1)
 					SAFE_STRCPY(cpu_type, "Intel Pentium Pro");
 				else if(model == 3 || model == 5)
 					SAFE_STRCPY(cpu_type, "Intel Pentium II");
 				else if(model == 6)
 					SAFE_STRCPY(cpu_type, "Intel Celeron");	
 				else
 					SAFE_STRCPY(cpu_type, "Intel Pentium III");
 			}
 		}
 	}
 	// cpu_type already holds a valid brand string; pretty it up.
 	else
 	{
 		// strip (tm) from Athlon string
 		if(!strncmp(cpu_type, "AMD Athlon(tm)", 14))
 			memmove(cpu_type+10, cpu_type+14, 35);
 
 		// remove 2x (R) and CPU freq from P4 string
 		float freq;
 		// we can't use this because it isn't necessarily correct - the CPU
 		// may be overclocked. a variable must be passed, though, since
 		// scanf returns the number of fields actually stored.
 		if(sscanf(cpu_type, " Intel(R) Pentium(R) 4 CPU %fGHz", &freq) == 1)
 			SAFE_STRCPY(cpu_type, "Intel Pentium 4");
 	}
 }
 
 
 //-----------------------------------------------------------------------------
 
 static uint log_id_bits;	// bit index; divides APIC ID into log and phys
 
 static const uint INVALID_ID = ~0u;
 static uint last_phys_id = INVALID_ID, last_log_id = INVALID_ID;
 static uint phys_ids = 0, log_ids = 0;
 
 // count # distinct physical and logical APIC IDs for get_cpu_count.
 // called on each OS-visible "CPU" by on_each_cpu.
 static void count_ids()
 {
 	// get APIC id
 	u32 regs[4];
 	if(!ia32_cpuid(1, regs))
 		debug_warn("cpuid 1 failed");
 	const uint id = bits(regs[EBX], 24, 31);
 
 	// partition into physical and logical ID
 	const uint phys_id = bits(id, 0, log_id_bits-1);
 	const uint log_id  = bits(id, log_id_bits, 7);
 
 	// note: APIC IDs are assigned sequentially, so we compare against the
 	// last one encountered.
 	if(last_phys_id != INVALID_ID && last_phys_id != phys_id)
 		cpus++;
 	if(last_log_id  != INVALID_ID && last_log_id  != log_id )
 		cpus++;
 	last_phys_id = phys_id;
 	last_log_id  = log_id;
 }
 
 
 // fix CPU count reported by OS (incorrect if HT active or multicore);
 // also separates it into cpu_ht_units and cpu_cores.
 static void get_cpu_count()
 {
 	debug_assert(cpus > 0 && "must know # 'CPU's (call OS-specific detect first)");
 
 	// get # "logical CPUs" per package (uniform over all packages).
 	// TFM is unclear but seems to imply this includes HT units *and* cores!
 	u32 regs[4];
 	if(!ia32_cpuid(1, regs))
 		debug_warn("ia32_cpuid(1) failed");
 	const uint log_cpu_per_package = bits(regs[EBX], 16, 23);
 	// .. and # cores
 	if(ia32_cpuid(4, regs))
 		cpu_cores = bits(regs[EBX], 26, 31)+1;
 	else
 		cpu_cores = 1;
 
 	// if HT is active (enabled in BIOS and OS), we have a problem:
 	// OSes (Windows at least) report # CPUs as packages * cores * HT_units.
 	// there is no direct way to determine if HT is actually enabled,
 	// so if it is supported, we have to examine all APIC IDs and
 	// figure out what kind of "CPU" each one is. *sigh*
 	//
 	// note: we don't check if it's Intel and P4 or above - HT may be
 	// supported on other CPUs in future. all processors should set this
 	// feature bit correctly, so it's not a problem.
-	if(ia32_cap(HT))
+	if(ia32_cap(IA32_CAP_HT))
 	{
 		log_id_bits = log2(log_cpu_per_package);	// see above
 		last_phys_id = last_log_id = INVALID_ID;
 		phys_ids = log_ids = 0;
 		if(sys_on_each_cpu(count_ids) == 0)
 		{
 			cpus         = phys_ids;
 			cpu_ht_units = log_ids / cpu_cores;
 			return;	// this is authoritative
 		}
 		// OS apparently doesn't support CPU affinity.
 		// HT might be disabled, but return # units anyway.
 		else
 			cpu_ht_units = log_cpu_per_package / cpu_cores;
 	}
 	// not HT-capable; return 1 to allow total = cpus * HT_units * cores.
 	else
 		cpu_ht_units = 1;
 
 	cpus /= cpu_cores;
 }
 
 
 
 
 static void check_for_speedstep()
 {
 	if(vendor == INTEL)
 	{
-		if(ia32_cap(EST))
+		if(ia32_cap(IA32_CAP_EST))
 			cpu_speedstep = 1;
 	}
 	else if(vendor == AMD)
 	{
 		u32 regs[4];
 		if(ia32_cpuid(0x80000007, regs))
 			if(regs[EDX] & POWERNOW_FREQ_ID_CTRL)
 				cpu_speedstep = 1;
 	}
 }
 
 
 static void measure_cpu_freq()
 {
 	// set max priority, to reduce interference while measuring.
 	int old_policy; static sched_param old_param;	// (static => 0-init)
 	pthread_getschedparam(pthread_self(), &old_policy, &old_param);
 	static sched_param max_param;
 	max_param.sched_priority = sched_get_priority_max(SCHED_FIFO);
 	pthread_setschedparam(pthread_self(), SCHED_FIFO, &max_param);
 
 	// make sure the TSC is available, because we're going to
 	// measure actual CPU clocks per known time interval.
 	// counting loop iterations ("bogomips") is unreliable.
-	if(ia32_cap(TSC))
+	if(ia32_cap(IA32_CAP_TSC))
 	{
 		// note: no need to "warm up" cpuid - it will already have been
 		// called several times by the time this code is reached.
-		// (background: it's used in rdtsc() to serialize instruction flow;
+		// (background: it's used in ia32_rdtsc() to serialize instruction flow;
 		// the first call is documented to be slower on Intel CPUs)
 
 		int num_samples = 16;
 		// if clock is low-res, do less samples so it doesn't take too long.
 		// balance measuring time (~ 10 ms) and accuracy (< 1 0/00 error -
 		// ok for using the TSC as a time reference)
 		if(timer_res() >= 1e-3)
 			num_samples = 8;
 		std::vector<double> samples(num_samples);
 
 		int i;
 		for(i = 0; i < num_samples; i++)
 		{
 			double dt;
 			i64 dc;
 			// i64 because VC6 can't convert u64 -> double,
 			// and we don't need all 64 bits.
 
 			// count # of clocks in max{1 tick, 1 ms}:
 			// .. wait for start of tick.
 			const double t0 = get_time();
 			u64 c1; double t1;
 			do
 			{
 				// note: get_time effectively has a long delay (up to 5 us)
-				// before returning the time. we call it before rdtsc to
+				// before returning the time. we call it before ia32_rdtsc to
 				// minimize the delay between actually sampling time / TSC,
 				// thus decreasing the chance for interference.
 				// (if unavoidable background activity, e.g. interrupts,
 				// delays the second reading, inaccuracy is introduced).
 				t1 = get_time();
-				c1 = rdtsc();
+				c1 = ia32_rdtsc();
 			}
 			while(t1 == t0);
 			// .. wait until start of next tick and at least 1 ms elapsed.
 			do
 			{
 				const double t2 = get_time();
-				const u64 c2 = rdtsc();
+				const u64 c2 = ia32_rdtsc();
 				dc = (i64)(c2 - c1);
 				dt = t2 - t1;
 			}
 			while(dt < 1e-3);
 
 			// .. freq = (delta_clocks) / (delta_seconds);
-			//    cpuid/rdtsc/timer overhead is negligible.
+			//    ia32_rdtsc/timer overhead is negligible.
 			const double freq = dc / dt;
 			samples[i] = freq;
 		}
 
 		std::sort(samples.begin(), samples.end());
 
 		// median filter (remove upper and lower 25% and average the rest).
 		// note: don't just take the lowest value! it could conceivably be
 		// too low, if background processing delays reading c1 (see above).
 		double sum = 0.0;
 		const int lo = num_samples/4, hi = 3*num_samples/4;
 		for(i = lo; i < hi; i++)
 			sum += samples[i];
 		cpu_freq = sum / (hi-lo);
 
 	}
 	// else: TSC not available, can't measure; cpu_freq remains unchanged.
 
 	// restore previous policy and priority.
 	pthread_setschedparam(pthread_self(), old_policy, &old_param);
 }
 
 
 
 void ia32_get_cpu_info()
 {
 	get_cpu_vendor();
 	get_cpu_type();
 	get_cpu_count();
 	check_for_speedstep();
 	measure_cpu_freq();
 
 	// HACK: on Windows, the HRT makes its final implementation choice
 	// in the first calibrate call where cpu info is available.
 	// call wtime_reset_impl here to have that happen now,
 	// so app code isn't surprised by a timer change, although the HRT
 	// does try to keep the timer continuous.
 #if OS_WIN
 	wtime_reset_impl();
 #endif
 }
 
 //-----------------------------------------------------------------------------
 
 
 // checks if there is an IA-32 CALL instruction right before ret_addr.
 // returns ERR_OK if so and ERR_FAIL if not.
 // also attempts to determine the call target. if that is possible
 // (directly addressed relative or indirect jumps), it is stored in
 // target, which is otherwise 0.
 //
 // this is useful for walking the stack manually.
 LibError ia32_get_call_target(void* ret_addr, void** target)
 {
 	*target = 0;
 
 	// points to end of the CALL instruction (which is of unknown length)
 	const u8* c = (const u8*)ret_addr;
 	// this would allow for avoiding exceptions when accessing ret_addr
 	// close to the beginning of the code segment. it's not currently set
 	// because this is really unlikely and not worth the trouble.
 	const size_t len = ~0u;
 
 	// CALL rel32 (E8 cd)
 	if(len >= 5 && c[-5] == 0xE8)
 	{
 		*target = (u8*)ret_addr + *(i32*)(c-4);
 		return ERR_OK;
 	}
 
 	// CALL r/m32 (FF /2)
 	// .. CALL [r32 + r32*s]          => FF 14 SIB
 	if(len >= 3 && c[-3] == 0xFF && c[-2] == 0x14)
 		return ERR_OK;
 	// .. CALL [disp32]               => FF 15 disp32
 	if(len >= 6 && c[6] == 0xFF && c[-5] == 0x15)
 	{
 		void* addr_of_target = *(void**)(c-4);
 		if(!debug_is_pointer_bogus(addr_of_target))
 		{
 			*target = *(void**)addr_of_target;
 			return ERR_OK;
 		}
 	}
 	// .. CALL [r32]                  => FF 00-3F(!14/15)
 	if(len >= 2 && c[-2] == 0xFF && c[-1] < 0x40 && c[-1] != 0x14 && c[-1] != 0x15)
 		return ERR_OK;
 	// .. CALL [r32 + r32*s + disp8]  => FF 54 SIB disp8
 	if(len >= 4 && c[-4] == 0xFF && c[-3] == 0x54)
 		return ERR_OK;
 	// .. CALL [r32 + disp8]          => FF 50-57(!54) disp8
 	if(len >= 3 && c[-3] == 0xFF && (c[-2] & 0xF8) == 0x50 && c[-2] != 0x54)
 		return ERR_OK;
 	// .. CALL [r32 + r32*s + disp32] => FF 94 SIB disp32
 	if(len >= 7 && c[-7] == 0xFF && c[-6] == 0x94)
 		return ERR_OK;
 	// .. CALL [r32 + disp32]         => FF 90-97(!94) disp32
 	if(len >= 6 && c[-6] == 0xFF && (c[-5] & 0xF8) == 0x90 && c[-5] != 0x94)
 		return ERR_OK;
 	// .. CALL r32                    => FF D0-D7                 
 	if(len >= 2 && c[-2] == 0xFF && (c[-1] & 0xF8) == 0xD0)
 		return ERR_OK;
 
 	WARN_RETURN(ERR_CPU_UNKNOWN_OPCODE);
 }
-
-
-//----------------------------------------------------------------------------
-// built-in self test
-//----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-	static void test_float_int()
-	{
-		TEST(i32_from_float(0.99999f) == 0);
-		TEST(i32_from_float(1.0f) == 1);
-		TEST(i32_from_float(1.01f) == 1);
-		TEST(i32_from_float(5.6f) == 5);
-
-		TEST(i32_from_double(0.99999) == 0);
-		TEST(i32_from_double(1.0) == 1);
-		TEST(i32_from_double(1.01) == 1);
-		TEST(i32_from_double(5.6) == 5);
-
-		TEST(i64_from_double(0.99999) == 0LL);
-		TEST(i64_from_double(1.0) == 1LL);
-		TEST(i64_from_double(1.01) == 1LL);
-		TEST(i64_from_double(5.6) == 5LL);
-	}
-
-	static void self_test()
-	{
-		test_float_int();
-	}
-
-	SELF_TEST_RUN;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
Index: ps/trunk/source/lib/sysdep/win/wdbg_sym.cpp
===================================================================
--- ps/trunk/source/lib/sysdep/win/wdbg_sym.cpp	(revision 3910)
+++ ps/trunk/source/lib/sysdep/win/wdbg_sym.cpp	(revision 3911)
@@ -1,2194 +1,1951 @@
 /**
  * =========================================================================
  * File        : wdbg_sym.cpp
  * Project     : 0 A.D.
  * Description : Win32 stack trace and symbol engine.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2002-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <stdlib.h>
 #include <stdio.h>
 #include <set>
 
 #include "lib.h"
 #include "win_internal.h"
 #define _NO_CVCONST_H	// request SymTagEnum be defined
 #include "dbghelp.h"
 #include <OAIdl.h>	// VARIANT
 #include "posix.h"
 #include "sysdep/cpu.h"
 #include "wdbg.h"
 #include "debug_stl.h"
 #include "app_hooks.h"
 #include "lib/path_util.h"
 #if CPU_IA32
 # include "lib/sysdep/ia32.h"
 #endif
 
-// raises an an annoying exception, so disable unless needed
-#undef SELF_TEST_ENABLED
-#define SELF_TEST_ENABLED 0
-
-
 #if MSC_VERSION
 #pragma comment(lib, "dbghelp.lib")
 #pragma comment(lib, "oleaut32.lib")	// VariantChangeType
 #endif
 
 
 // automatic module shutdown (before process termination)
 #pragma data_seg(WIN_CALLBACK_POST_ATEXIT(b))
 WIN_REGISTER_FUNC(wdbg_sym_shutdown);
 #pragma data_seg()
 
 
 // note: it is safe to use debug_assert/debug_warn/CHECK_ERR even during a
 // stack trace (which is triggered by debug_assert et al. in app code) because
 // nested stack traces are ignored and only the error is displayed.
 
 
 // protects dbghelp (which isn't thread-safe) and
 // parameter passing to the breakpoint helper thread.
 static void lock()
 {
 	win_lock(WDBG_CS);
 }
 
 static void unlock()
 {
 	win_unlock(WDBG_CS);
 }
 
 
 //----------------------------------------------------------------------------
 // dbghelp
 //----------------------------------------------------------------------------
 
 // passed to all dbghelp symbol query functions. we're not interested in
 // resolving symbols in other processes; the purpose here is only to
 // generate a stack trace. if that changes, we need to init a local copy
 // of these in dump_sym_cb and pass them to all subsequent dump_*.
 static HANDLE hProcess;
 static ULONG64 mod_base;
 
 // for StackWalk64; taken from PE header by wdbg_init.
 static WORD machine;
 
 // call on-demand (allows handling exceptions raised before win.cpp
 // init functions are called); no effect if already initialized.
 static LibError sym_init()
 {
 	// bail if already initialized (there's nothing to do).
 	// don't use pthread_once because we need to return success/error code.
 	static uintptr_t already_initialized = 0;
 	if(!CAS(&already_initialized, 0, 1))
 		return ERR_OK;
 
 	hProcess = GetCurrentProcess();
 
 	// set options
 	// notes:
 	// - can be done before SymInitialize; we do so in case
 	//   any of the options affect it.
 	// - do not set directly - that would zero any existing flags.
 	DWORD opts = SymGetOptions();
 	opts |= SYMOPT_DEFERRED_LOADS;	// the "fastest, most efficient way"
 	//opts |= SYMOPT_DEBUG;	// lots of debug spew in output window
 	SymSetOptions(opts);
 
 	// initialize dbghelp.
 	// .. request symbols from all currently active modules be loaded.
 	const BOOL fInvadeProcess = TRUE;
 	// .. use default *symbol* search path. we don't use this to locate
 	//    our PDB file because its absolute path is stored inside the EXE.
 	PCSTR UserSearchPath = 0;
 	BOOL ok = SymInitialize(hProcess, UserSearchPath, fInvadeProcess);
 	WARN_IF_FALSE(ok);
 
 	mod_base = SymGetModuleBase64(hProcess, (u64)&sym_init);
 	IMAGE_NT_HEADERS* header = ImageNtHeader((void*)mod_base);
 	machine = header->FileHeader.Machine;
 
 	return ERR_OK;
 }
 
 
 // called from wdbg_sym_shutdown.
 static LibError sym_shutdown()
 {
 	SymCleanup(hProcess);
 	return ERR_OK;
 }
 
 
 struct SYMBOL_INFO_PACKAGEW2 : public SYMBOL_INFO_PACKAGEW
 {
 	SYMBOL_INFO_PACKAGEW2()
 	{
 		si.SizeOfStruct = sizeof(si);
 		si.MaxNameLen = MAX_SYM_NAME;
 	}
 };
 
 // note: we can't derive from TI_FINDCHILDREN_PARAMS because its members
 // aren't guaranteed to precede ours (although they do in practice).
 struct TI_FINDCHILDREN_PARAMS2
 {
 	TI_FINDCHILDREN_PARAMS2(DWORD num_children)
 	{
 		p.Start = 0;
 		p.Count = MIN(num_children, MAX_CHILDREN);
 	}
 
 	static const size_t MAX_CHILDREN = 400;
 	TI_FINDCHILDREN_PARAMS p;
 	DWORD additional_children[MAX_CHILDREN-1];
 };
 
 
 // read and return symbol information for the given address. all of the
 // output parameters are optional; we pass back as much information as is
 // available and desired. return 0 iff any information was successfully
 // retrieved and stored.
 // sym_name and file must hold at least the number of chars above;
 // file is the base name only, not path (see rationale in wdbg_sym).
 // the PDB implementation is rather slow (~500�s).
 LibError debug_resolve_symbol(void* ptr_of_interest, char* sym_name, char* file, int* line)
 {
 	sym_init();
 
 	const DWORD64 addr = (DWORD64)ptr_of_interest;
 	int successes = 0;
 
 	lock();
 
 	// get symbol name (if requested)
 	if(sym_name)
 	{
 		sym_name[0] = '\0';
 
 		SYMBOL_INFO_PACKAGEW2 sp;
 		SYMBOL_INFOW* sym = &sp.si;
 		if(SymFromAddrW(hProcess, addr, 0, sym))
 		{
 			snprintf(sym_name, DBG_SYMBOL_LEN, "%ws", sym->Name);
 			successes++;
 		}
 	}
 
 	// get source file and/or line number (if requested)
 	if(file || line)
 	{
 		file[0] = '\0';
 		*line = 0;
 
 		IMAGEHLP_LINE64 line_info = { sizeof(IMAGEHLP_LINE64) };
 		DWORD displacement; // unused but required by SymGetLineFromAddr64!
 		if(SymGetLineFromAddr64(hProcess, addr, &displacement, &line_info))
 		{
 			if(file)
 			{
 				// strip full path down to base name only.
 				// this loses information, but that isn't expected to be a
 				// problem and is balanced by not having to do this from every
 				// call site (full path is too long to display nicely).
 				const char* base_name = path_name_only(line_info.FileName);
 				snprintf(file, DBG_FILE_LEN, "%s", base_name);
 				successes++;
 			}
 
 			if(line)
 			{
 				*line = line_info.LineNumber;
 				successes++;
 			}
 		}
 	}
 
 	unlock();
 	return (successes != 0)? ERR_OK : ERR_FAIL;
 }
 
 
 //----------------------------------------------------------------------------
 // stack walk
 //----------------------------------------------------------------------------
 
 
 /*
 Subroutine linkage example code:
 
 	push	param2
 	push	param1
 	call	func
 ret_addr:
 	[..]
 
 func:
 	push	ebp
 	mov		ebp, esp
 	sub		esp, local_size
 	[..]
 
 Stack contents (down = decreasing address)
 	[param2]
 	[param1]
 	ret_addr
 	prev_ebp         (<- current ebp points at this value)
 	[local_variables]
 */
 
 
 /*
 	call	func1
 ret1:
 
 func1:
 	push	ebp
 	mov		ebp, esp
 	call	func2
 ret2:
 
 func2:
 	push	ebp
 	mov		ebp, esp
 	STARTHERE
 
 	*/
 
 #if CPU_IA32 && !CONFIG_OMIT_FP
 
 static LibError ia32_walk_stack(STACKFRAME64* sf)
 {
 	// read previous values from STACKFRAME64
 	void* prev_fp  = (void*)sf->AddrFrame .Offset;
 	void* prev_ip  = (void*)sf->AddrPC    .Offset;
 	void* prev_ret = (void*)sf->AddrReturn.Offset;
 	if(!debug_is_stack_ptr(prev_fp))
 		WARN_RETURN(ERR_11);
 	if(prev_ip && !debug_is_code_ptr(prev_ip))
 		WARN_RETURN(ERR_12);
 	if(prev_ret && !debug_is_code_ptr(prev_ret))
 		WARN_RETURN(ERR_13);
 
 	// read stack frame
 	void* fp       = ((void**)prev_fp)[0];
 	void* ret_addr = ((void**)prev_fp)[1];
 	if(!fp)
 		return INFO_ALL_COMPLETE;
 	if(!debug_is_stack_ptr(fp))
 		WARN_RETURN(ERR_14);
 	if(!debug_is_code_ptr(ret_addr))
 		WARN_RETURN(ERR_15);
 
 	void* target;
 	LibError err = ia32_get_call_target(ret_addr, &target);
 	RETURN_ERR(err);
 	if(target)	// were able to determine it from the call instruction
 		debug_assert(debug_is_code_ptr(target));
 
 	sf->AddrFrame .Offset = (DWORD64)fp;
 	sf->AddrPC    .Offset = (DWORD64)target;
 	sf->AddrReturn.Offset = (DWORD64)ret_addr;
 
 	return ERR_OK;
 }
 
 #endif	// #if CPU_IA32 && !CONFIG_OMIT_FP
 
 
 // called for each stack frame found by walk_stack, passing information
 // about the frame and <user_arg>.
 // return INFO_CB_CONTINUE to continue, anything else to stop immediately
 // and return that value to walk_stack's caller.
 //
 // rationale: we can't just pass function's address to the callback -
 // dump_frame_cb needs the frame pointer for reg-relative variables.
 typedef LibError (*StackFrameCallback)(const STACKFRAME64*, void*);
 
 // iterate over a call stack, calling back for each frame encountered.
 // if <pcontext> != 0, we start there; otherwise, at the current context.
 // return an error if callback never succeeded (returned 0).
 //
 // lock must be held.
 static LibError walk_stack(StackFrameCallback cb, void* user_arg = 0, uint skip = 0, const CONTEXT* pcontext = 0)
 {
 	// to function properly, StackWalk64 requires a CONTEXT on
 	// non-x86 systems (documented) or when in release mode (observed).
 	// exception handlers can call walk_stack with their context record;
 	// otherwise (e.g. dump_stack from debug_assert), we need to query it.
 	CONTEXT context;
 	// .. caller knows the context (most likely from an exception);
 	//    since StackWalk64 may modify it, copy to a local variable.
 	if(pcontext)
 		context = *pcontext;
 	// .. need to determine context ourselves.
 	else
 	{
 		skip++;	// skip this frame
 
 		// there are 4 ways to do so, in order of preference:
 		// - asm (easy to use but currently only implemented on IA32)
 		// - RtlCaptureContext (only available on WinXP or above)
 		// - intentionally raise an SEH exception and capture its context
 		//   (spams us with "first chance exception")
 		// - GetThreadContext while suspended* (a bit tricky + slow).
 		//
 		// * it used to be common practice to query the current thread's context,
 		// but WinXP SP2 and above require it be suspended.
 		//
 		// this MUST be done inline and not in an external function because
 		// compiler-generated prolog code trashes some registers.
 
 #if CPU_IA32
 		ia32_get_current_context(&context);
 #else
 		// try to import RtlCaptureContext (available on WinXP and later)
 		// .. note: kernel32 is always loaded into every process, so we
 		//    don't need LoadLibrary/FreeLibrary.
 		HMODULE hKernel32Dll = GetModuleHandle("kernel32.dll");
 		VOID (*pRtlCaptureContext)(PCONTEXT*);
 		*(void**)&pRtlCaptureContext = GetProcAddress(hKernel32Dll, "RtlCaptureContext");
 		if(pRtlCaptureContext)
 			pRtlCaptureContext(&context);
 		// not available: raise+handle an exception; grab the reported context.
 		else
 		{
 			__try
 			{
 				RaiseException(0xF001, 0, 0, 0);
 			}
 			__except(context = (GetExceptionInformation())->ContextRecord, EXCEPTION_CONTINUE_EXECUTION)
 			{
 			}
 		}
 #endif
 	}
 	pcontext = &context;
 
 	STACKFRAME64 sf;
 	memset(&sf, 0, sizeof(sf));
 	sf.AddrPC.Offset    = pcontext->PC_;
 	sf.AddrPC.Mode      = AddrModeFlat;
 	sf.AddrFrame.Offset = pcontext->FP_;
 	sf.AddrFrame.Mode   = AddrModeFlat;
 	sf.AddrStack.Offset = pcontext->SP_;
 	sf.AddrStack.Mode   = AddrModeFlat;
 
 	// for each stack frame found:
 	LibError ret = ERR_SYM_NO_STACK_FRAMES_FOUND;
 	for(;;)
 	{
 		// rationale:
 		// - provide a separate ia32 implementation so that simple
 		//   stack walks (e.g. to determine callers of malloc) do not
 		//   require firing up dbghelp. that takes tens of seconds when
 		//   OS symbols are installed (because symserv is wanting to access
 		//   inet), which is entirely unacceptable.
 		// - VC7.1 sometimes generates stack frames despite /Oy ;
 		//   ia32_walk_stack may appear to work, but it isn't reliable in
 		//   this case and therefore must not be used!
 		// - don't switch between ia32_stack_walk and StackWalk64 when one
 		//   of them fails: this needlessly complicates things. the ia32
 		//   code is authoritative provided its prerequisite (FP not omitted)
 		//   is met, otherwise totally unusable.
 		LibError err;
 #if CPU_IA32 && !CONFIG_OMIT_FP
 		err = ia32_walk_stack(&sf);
 #else
 		sym_init();
 		// note: unfortunately StackWalk64 doesn't always SetLastError,
 		// so we have to reset it and check for 0. *sigh*
 		SetLastError(0);
 		const HANDLE hThread = GetCurrentThread();
 		BOOL ok = StackWalk64(machine, hProcess, hThread, &sf, (PVOID)pcontext,
 			0, SymFunctionTableAccess64, SymGetModuleBase64, 0);
 		// note: don't use LibError_from_win32 because it raises a warning,
 		// and this "fails" commonly (when no stack frames are left).
 		err = ok? ERR_OK : ERR_FAIL;
 #endif
 
 		// no more frames found - abort. note: also test FP because
 		// StackWalk64 sometimes erroneously reports success.
 		void* fp = (void*)(uintptr_t)sf.AddrFrame .Offset;
 		if(err != ERR_OK || !fp)
 			return ret;
 
 		if(skip)
 		{
 			skip--;
 			continue;
 		}
 
 		ret = cb(&sf, user_arg);
 		// callback reports it's done; stop calling it and return that value.
 		// (can be either success or failure)
 		if(ret != INFO_CB_CONTINUE)
 		{
 			debug_assert(ret <= 0);	// shouldn't return > 0
 			return ret;
 		}
 	}
 }
 
 
 //
 // get address of Nth function above us on the call stack (uses walk_stack)
 //
 
 // called by walk_stack for each stack frame
 static LibError nth_caller_cb(const STACKFRAME64* sf, void* user_arg)
 {
 	void** pfunc = (void**)user_arg;
 
 	// return its address
 	*pfunc = (void*)sf->AddrPC.Offset;
 	return ERR_OK;
 }
 
 
 // return address of the Nth function on the call stack.
 // if <context> is nonzero, it is assumed to be a platform-specific
 // representation of execution state (e.g. Win32 CONTEXT) and tracing
 // starts there; this is useful for exceptions.
 // otherwise, tracing starts at the current stack position, and the given
 // number of stack frames (i.e. functions) above the caller are skipped.
 // used by mmgr to determine what function requested each allocation;
 // this is fast enough to allow that.
 void* debug_get_nth_caller(uint skip, void* pcontext)
 {
 	if(!pcontext)
 		skip++;	// skip this frame
 
 	lock();
 
 	void* func;
 	LibError err = walk_stack(nth_caller_cb, &func, skip, (const CONTEXT*)pcontext);
 
 	unlock();
 	return (err == ERR_OK)? func : 0;
 }
 
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // helper routines for symbol value dump
 //
 //////////////////////////////////////////////////////////////////////////////
 
 // overflow is impossible in practice, but check for robustness.
 // keep in sync with DumpState.
 static const uint MAX_INDIRECTION = 255;
 static const uint MAX_LEVEL = 255;
 
 struct DumpState
 {
 	// keep in sync with MAX_* above
 	uint level : 8;
 	uint indirection : 8;
 
 	DumpState()
 	{
 		level = 0;
 		indirection = 0;
 	}
 };
 
 //----------------------------------------------------------------------------
 
 static size_t out_chars_left;
 static bool out_have_warned_of_overflow;
 	// only do so once until next out_init to avoid flood of messages.
 static wchar_t* out_pos;
 
 // some top-level (*) symbols cause tons of output - so much that they may
 // single-handedly overflow the buffer (e.g. pointer to a tree of huge UDTs).
 // we can't have that, so there is a limit in place as to how much a
 // single top-level symbol can output. after that is reached, dumping is
 // aborted for that symbol but continues for the subsequent top-level symbols.
 //
 // this is implemented as follows: dump_sym_cb latches the current output
 // position; each dump_sym (through which all symbols go) checks if the
 // new position exceeds the limit and aborts if so.
 // slight wrinkle: since we don't want each level of UDTs to successively
 // realize the limit has been hit and display the error message, we
 // return ERR_SYM_SINGLE_SYMBOL_LIMIT once and thereafter ERR_SYM_SUPPRESS_OUTPUT.
 //
 // * example: local variables, as opposed to child symbols in a UDT.
 static wchar_t* out_latched_pos;
 static bool out_have_warned_of_limit;
 
 static void out_init(wchar_t* buf, size_t max_chars)
 {
 	out_pos = buf;
 	out_chars_left = max_chars;
 	out_have_warned_of_overflow = false;
 	out_have_warned_of_limit = false;
 }
 
 
 static void out(const wchar_t* fmt, ...)
 {
 	va_list args;
 	va_start(args, fmt);
 	int len = _vsnwprintf(out_pos, out_chars_left, fmt, args);
 	va_end(args);
 
 	// success
 	if(len >= 0)
 	{
 		out_pos += len;
 		// make sure out_chars_left remains nonnegative
 		if((size_t)len > out_chars_left)
 		{
 			debug_warn("apparently wrote more than out_chars_left");
 			len = (int)out_chars_left;
 		}
 		out_chars_left -= len;
 	}
 	// no more room left
 	else
 	{
 		// the buffer really is full yet out_chars_left may not be 0
 		// (since it isn't updated if _vsnwprintf returns -1).
 		// must be set so subsequent calls don't try to squeeze stuff in.
 		out_chars_left = 0;
 
 		// write a warning into the output buffer (once) so it isn't
 		// abruptly cut off (which looks like an error)
 		if(!out_have_warned_of_overflow)
 		{
 			out_have_warned_of_overflow = true;
 
 			// with the current out_pos / out_chars_left variables, there's
 			// no way of knowing where the buffer actually ends. no matter;
 			// we'll just put the warning before out_pos and eat into the
 			// second newest text.
 			const wchar_t text[] = L"(no more room in buffer)";
 			wcscpy(out_pos-ARRAY_SIZE(text), text);	// safe
 		}
 	}
 }
 
 
 static void out_erase(size_t num_chars)
 {
 	// don't do anything if end of buffer was hit (prevents repeatedly
 	// scribbling over the last few bytes).
 	if(out_have_warned_of_overflow)
 		return;
 
 	out_chars_left += (ssize_t)num_chars;
 	out_pos -= num_chars;
 	*out_pos = '\0';
 		// make sure it's 0-terminated in case there is no further output.
 }
 
 
 // (see above)
 static void out_latch_pos()
 {
 	out_have_warned_of_limit = false;
 	out_latched_pos = out_pos;
 }
 
 
 // (see above)
 static LibError out_check_limit()
 {
 	if(out_have_warned_of_limit)
 		return ERR_SYM_SUPPRESS_OUTPUT;	// NOWARN
 	if(out_pos - out_latched_pos > 3000)	// ~30 lines
 	{
 		out_have_warned_of_limit = true;
 		return ERR_SYM_SINGLE_SYMBOL_LIMIT;	// NOWARN
 	}
 
 	// no limit hit, proceed normally
 	return ERR_OK;
 }
 
 //----------------------------------------------------------------------------
 
 #define INDENT STMT(for(uint i = 0; i <= state.level; i++) out(L"    ");)
 #define UNINDENT STMT(out_erase((state.level+1)*4);)
 
 
 // does it look like an ASCII string is located at <addr>?
 // set <stride> to 2 to search for WCS-2 strings (of western characters!).
 // called by dump_sequence for its string special-case.
 //
 // algorithm: scan the "string" and count # text chars vs. garbage.
 static bool is_string(const u8* p, size_t stride)
 {
 	// note: access violations are caught by dump_sym; output is "?".
 	int score = 0;
 	for(;;)
 	{
 		// current character is:
 		const int c = *p & 0xff;	// prevent sign extension
 		p += stride;
 		// .. text
 		if(isalnum(c))
 			score += 5;
 		// .. end of string
 		else if(!c)
 			break;
 		// .. garbage
 		else if(!isprint(c))
 			score -= 4;
 
 		// got enough information either way => done.
 		// (we don't want to unnecessarily scan huge binary arrays)
 		if(abs(score) >= 10)
 			break;
 	}
 
 	return (score > 0);
 }
 
 
 
 
 // forward decl; called by dump_sequence and some of dump_sym_*.
 static LibError dump_sym(DWORD id, const u8* p, DumpState state);
 
 // from cvconst.h
 //
 // rationale: we don't provide a get_register routine, since only the
 // value of FP is known to dump_frame_cb (via STACKFRAME64).
 // displaying variables stored in registers is out of the question;
 // all we can do is display FP-relative variables.
 enum CV_HREG_e
 {
 	CV_REG_EAX = 17,
 	CV_REG_ECX = 18,
 	CV_REG_EDX = 19,
 	CV_REG_EBX = 20,
 	CV_REG_ESP = 21,
 	CV_REG_EBP = 22,
 	CV_REG_ESI = 23,
 	CV_REG_EDI = 24
 };
 
 
 static const wchar_t* string_for_register(CV_HREG_e reg)
 {
 	switch(reg)
 	{
 	case CV_REG_EAX:
 		return L"eax";
 	case CV_REG_ECX:
 		return L"ecx";
 	case CV_REG_EDX:
 		return L"edx";
 	case CV_REG_EBX:
 		return L"ebx";
 	case CV_REG_ESP:
 		return L"esp";
 	case CV_REG_EBP:
 		return L"ebp";
 	case CV_REG_ESI:
 		return L"esi";
 	case CV_REG_EDI:
 		return L"edi";
 	default:
 		{
 			static wchar_t buf[19];
 			swprintf(buf, ARRAY_SIZE(buf), L"0x%x", reg);
 			return buf;
 		}
 	}
 }
 
 
 static void dump_error(LibError err, const u8* p)
 {
 	switch(err)
 	{
 	case 0:
 		// no error => no output
 		break;
 	case ERR_SYM_SINGLE_SYMBOL_LIMIT:
 		out(L"(too much output; skipping to next top-level symbol)");
 		break;
 	case ERR_SYM_UNRETRIEVABLE_STATIC:
 		out(L"(unavailable - located in another module)");
 		break;
 	case ERR_SYM_UNRETRIEVABLE_REG:
 		out(L"(unavailable - stored in register %s)", string_for_register((CV_HREG_e)(uintptr_t)p));
 		break;
 	case ERR_SYM_TYPE_INFO_UNAVAILABLE:
 		out(L"(unavailable - type info request failed (GLE=%d))", GetLastError());
 		break;
 	case ERR_SYM_INTERNAL_ERROR:
 		out(L"(unavailable - internal error)\r\n");
 		break;
 	case ERR_SYM_SUPPRESS_OUTPUT:
 		// not an error; do not output anything. handled by caller.
 		break;
 	default:
 		out(L"(unavailable - unspecified error 0x%X (%d))", err, err);
 		break;
 	}
 }
 
 
 // split out of dump_sequence.
 static LibError dump_string(const u8* p, size_t el_size)
 {
 	// not char or wchar_t string
 	if(el_size != sizeof(char) && el_size != sizeof(wchar_t))
 		return INFO_CANNOT_HANDLE;
 	// not text
 	if(!is_string(p, el_size))
 		return INFO_CANNOT_HANDLE;
 
 	wchar_t buf[512];
 	if(el_size == sizeof(wchar_t))
 		wcscpy_s(buf, ARRAY_SIZE(buf), (const wchar_t*)p);
 	// convert to wchar_t
 	else
 	{
 		size_t i;
 		for(i = 0; i < ARRAY_SIZE(buf)-1; i++)
 		{
 			buf[i] = (wchar_t)p[i];
 			if(buf[i] == '\0')
 				break;
 		}
 		buf[i] = '\0';
 	}
 
 	out(L"\"%s\"", buf);
 	return ERR_OK;
 }
 
 
 // split out of dump_sequence.
 static void seq_determine_formatting(size_t el_size, size_t el_count,
 	bool* fits_on_one_line, size_t* num_elements_to_show)
 {
 	if(el_size == sizeof(char))
 	{
 		*fits_on_one_line = el_count <= 16;
 		*num_elements_to_show = MIN(16, el_count);
 	}
 	else if(el_size <= sizeof(int))
 	{
 		*fits_on_one_line = el_count <= 8;
 		*num_elements_to_show = MIN(12, el_count);
 	}
 	else
 	{
 		*fits_on_one_line = false;
 		*num_elements_to_show = MIN(8, el_count);
 	}
 
 	// make sure empty containers are displayed with [0] {}, otherwise
 	// the lack of output looks like an error.
 	if(!el_count)
 		*fits_on_one_line = true;
 }
 
 
 static LibError dump_sequence(DebugIterator el_iterator, void* internal,
 	size_t el_count, DWORD el_type_id, size_t el_size, DumpState state)
 {
 	const u8* el_p = 0;	// avoid "uninitialized" warning
 
 	// special case: display as a string if the sequence looks to be text.
 	// do this only if container isn't empty because the otherwise the
 	// iterator may crash.
 	if(el_count)
 	{
 		el_p = el_iterator(internal, el_size);
 
 		LibError ret = dump_string(el_p, el_size);
 		if(ret == ERR_OK)
 			return ret;
 	}
 
 	// choose formatting based on element size and count
 	bool fits_on_one_line;
 	size_t num_elements_to_show;
 	seq_determine_formatting(el_size, el_count, &fits_on_one_line, &num_elements_to_show);
 
 	out(L"[%d] ", el_count);
 	state.level++;
 	out(fits_on_one_line? L"{ " : L"\r\n");
 
 	for(size_t i = 0; i < num_elements_to_show; i++)
 	{
 		if(!fits_on_one_line)
 			INDENT;
 
 		LibError err = dump_sym(el_type_id, el_p, state);
 		el_p = el_iterator(internal, el_size);
 
 		// there was no output for this child; undo its indentation (if any),
 		// skip everything below and proceed with the next child.
 		if(err == ERR_SYM_SUPPRESS_OUTPUT)
 		{
 			if(!fits_on_one_line)
 				UNINDENT;
 			continue;
 		}
 
 		dump_error(err, el_p);	// nop if err == ERR_OK
 		// add separator unless this is the last element (can't just
 		// erase below due to additional "...").
 		if(i != num_elements_to_show-1)
 			out(fits_on_one_line? L", " : L"\r\n");
 
 		if(err == ERR_SYM_SINGLE_SYMBOL_LIMIT)
 			break;
 	}	// for each child
 
 	// indicate some elements were skipped
 	if(el_count != num_elements_to_show)
 		out(L" ...");
 
 	state.level--;
 	if(fits_on_one_line)
 		out(L" }");
 	return ERR_OK;
 }
 
 
 static const u8* array_iterator(void* internal, size_t el_size)
 {
 	const u8*& pos = *(const u8**)internal;
 	const u8* cur_pos = pos;
 	pos += el_size;
 	return cur_pos;
 }
 
 
 static LibError dump_array(const u8* p,
 	size_t el_count, DWORD el_type_id, size_t el_size, DumpState state)
 {
 	const u8* iterator_internal_pos = p;
 	return dump_sequence(array_iterator, &iterator_internal_pos,
 		el_count, el_type_id, el_size, state);
 }
 
 
 static const STACKFRAME64* current_stackframe64;
 
 static LibError determine_symbol_address(DWORD id, DWORD UNUSED(type_id), const u8** pp)
 {
 	const STACKFRAME64* sf = current_stackframe64;
 
 	DWORD data_kind;
 	if(!SymGetTypeInfo(hProcess, mod_base, id, TI_GET_DATAKIND, &data_kind))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	switch(data_kind)
 	{
 	// SymFromIndex will fail
 	case DataIsMember:
 		// pp is already correct (udt_dump_normal retrieved the offset;
 		// we do it that way so we can check it against the total
 		// UDT size for safety).
 		return ERR_OK;
 
 	// this symbol is defined as static in another module =>
 	// there's nothing we can do.
 	case DataIsStaticMember:
 		return ERR_SYM_UNRETRIEVABLE_STATIC;	// NOWARN
 
 	// ok; will handle below
 	case DataIsLocal:
 	case DataIsStaticLocal:
 	case DataIsParam:
 	case DataIsObjectPtr:
 	case DataIsFileStatic:
 	case DataIsGlobal:
 		break;
 
 	default:
 		debug_warn("unexpected data_kind");
 
 	//case DataIsConstant
 
 	}
 
 	// get SYMBOL_INFO (we need .Flags)
 	SYMBOL_INFO_PACKAGEW2 sp;
 	SYMBOL_INFOW* sym = &sp.si;
 	if(!SymFromIndexW(hProcess, mod_base, id, sym))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 
 DWORD addrofs = 0;
 ULONG64 addr2 = 0;
 DWORD ofs2 = 0;
 SymGetTypeInfo(hProcess, mod_base, id, TI_GET_ADDRESSOFFSET, &addrofs);
 SymGetTypeInfo(hProcess, mod_base, id, TI_GET_ADDRESS, &addr2);
 SymGetTypeInfo(hProcess, mod_base, id, TI_GET_OFFSET, &ofs2);
 
 
 
 	// get address
 	ULONG64 addr = sym->Address;
 	// .. relative to a register
 	//    note: we only have the FP (not SP)
 	if(sym->Flags & SYMFLAG_REGREL)
 	{
 		if(sym->Register == CV_REG_EBP)
 			goto fp_rel;
 		else
 			goto in_register;
 	}
 	// .. relative to FP (appears to be obsolete)
 	else if(sym->Flags & SYMFLAG_FRAMEREL)
 	{
 fp_rel:
 		addr += sf->AddrFrame.Offset;
 
 		// HACK: reg-relative symbols (params and locals, but not
 		// static) appear to be off by 4 bytes in release builds.
 		// no idea as to the cause, but this "fixes" it.
 #ifdef NDEBUG
 		addr += sizeof(void*);
 #endif
 	}
 	// .. in register (this happens when optimization is enabled,
 	//    but we can't do anything; see SymbolInfoRegister)
 	else if(sym->Flags & SYMFLAG_REGISTER)
 	{
 in_register:
 		*pp = (const u8*)(uintptr_t)sym->Register;
 		return ERR_SYM_UNRETRIEVABLE_REG;	// NOWARN
 	}
 
 	*pp = (const u8*)addr;
 
 debug_printf("SYM| %ws at %p  flags=%X dk=%d sym->addr=%I64X addrofs=%X addr2=%I64X ofs2=%X\n", sym->Name, *pp, sym->Flags, data_kind, sym->Address, addrofs, addr2, ofs2);
 
 	return ERR_OK;
 }
 
 
 //-----------------------------------------------------------------------------
 // dump routines for each dbghelp symbol type
 //-----------------------------------------------------------------------------
 
 // these functions return != 0 if they're not able to produce any
 // reasonable output at all; the caller (dump_sym_data, dump_sequence, etc.)
 // will display the appropriate error message via dump_error.
 // called by dump_sym; lock is held.
 
 static LibError dump_sym_array(DWORD type_id, const u8* p, DumpState state)
 { 
 	ULONG64 size_ = 0;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	const size_t size = (size_t)size_;
 
 	// get element count and size
 	DWORD el_type_id = 0;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_TYPEID, &el_type_id))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	// .. workaround: TI_GET_COUNT returns total struct size for
 	//    arrays-of-struct. therefore, calculate as size / el_size.
 	ULONG64 el_size_;
 	if(!SymGetTypeInfo(hProcess, mod_base, el_type_id, TI_GET_LENGTH, &el_size_))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	const size_t el_size = (size_t)el_size_;
 	debug_assert(el_size != 0);
 	const size_t num_elements = size/el_size;
 	debug_assert(num_elements != 0);
  
 	return dump_array(p, num_elements, el_type_id, el_size, state);
 }
 
 
 //-----------------------------------------------------------------------------
 
 static LibError dump_sym_base_type(DWORD type_id, const u8* p, DumpState state)
 {
 	DWORD base_type;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_BASETYPE, &base_type))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	ULONG64 size_ = 0;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	const size_t size = (size_t)size_;
 
 	// single out() call. note: we pass a single u64 for all sizes,
 	// which will only work on little-endian systems.
 	// must be declared before goto to avoid W4 warning.
 	const wchar_t* fmt = L"";
 
 	u64 data = movzx_64le(p, size);
 	// if value is 0xCC..CC (uninitialized mem), we display as hex. 
 	// the output would otherwise be garbage; this makes it obvious.
 	// note: be very careful to correctly handle size=0 (e.g. void*).
 	for(size_t i = 0; i < size; i++)
 	{
 		if(p[i] != 0xCC)
 			break;
 		if(i == size-1)
 			goto display_as_hex;
 	}
 
 	switch(base_type)
 	{
 		// boolean
 		case btBool:
 			debug_assert(size == sizeof(bool));
 			fmt = L"%hs";
 			data = (u64)(data? "true " : "false");
 			break;
 
 		// floating-point
 		case btFloat:
 			// C calling convention casts float params to doubles, so printf
 			// expects one when we indicate %g. there are no width flags,
 			// so we have to manually convert the float data to double.
 			if(size == sizeof(float))
 				*(double*)&data = (double)*(float*)&data;
 			else if(size != sizeof(double))
 				debug_warn("dump_sym_base_type: invalid float size");
 			fmt = L"%g";
 			break;
 
 		// signed integers (displayed as decimal)
 		case btInt:
 		case btLong:
 			if(size != 1 && size != 2 && size != 4 && size != 8)
 				debug_warn("dump_sym_base_type: invalid int size");
 			// need to re-load and sign-extend, because we output 64 bits.
 			data = movsx_64le(p, size);
 			fmt = L"%I64d";
 			break;
 
 		// unsigned integers (displayed as hex)
 		// note: 0x00000000 can get annoying (0 would be nicer),
 		// but it indicates the variable size and makes for consistently
 		// formatted structs/arrays. (0x1234 0 0x5678 is ugly)
 		case btUInt:
 		case btULong:
 display_as_hex:
 			if(size == 1)
 			{
 				// _TUCHAR
 				if(state.indirection)
 				{
 					state.indirection = 0;
 					return dump_array(p, 8, type_id, size, state);
 				}
 				fmt = L"0x%02X";
 			}
 			else if(size == 2)
 				fmt = L"0x%04X";
 			else if(size == 4)
 				fmt = L"0x%08X";
 			else if(size == 8)
 				fmt = L"0x%016I64X";
 			else
 				debug_warn("dump_sym_base_type: invalid uint size");
 			break;
 
 		// character
 		case btChar:
 		case btWChar:
 			debug_assert(size == sizeof(char) || size == sizeof(wchar_t));
 			// char*, wchar_t*
 			if(state.indirection)
 			{
 				state.indirection = 0;
 				return dump_array(p, 8, type_id, size, state);
 			}
 			// either integer or character;
 			// if printable, the character will be appended below.
 			fmt = L"%d";
 			break;
 
 		// note: void* is sometimes indicated as (pointer, btNoType).
 		case btVoid:
 		case btNoType:
 			// void* - cannot display what it's pointing to (type unknown).
 			if(state.indirection)
 			{
 				out_erase(4);	// " -> "
 				fmt = L"";
 			}
 			else
 				debug_warn("dump_sym_base_type: non-pointer btVoid or btNoType");
 			break;
 
 		default:
 			debug_warn("dump_sym_base_type: unknown type");
 			//-fallthrough
 
 		// unsupported complex types
 		case btBCD:
 		case btCurrency:
 		case btDate:
 		case btVariant:
 		case btComplex:
 		case btBit:
 		case btBSTR:
 		case btHresult:
 			return ERR_SYM_UNSUPPORTED;	// NOWARN
 	}
 
 	out(fmt, data);
 
 	// if the current value is a printable character, display in that form.
 	// this isn't only done in btChar because sometimes ints store characters.
 	if(data < 0x100)
 	{
 		int c = (int)data;
 		if(isprint(c))
 			out(L" ('%hc')", c);
 	}
 
 	return ERR_OK;
 }
 
 
 //-----------------------------------------------------------------------------
 
 static LibError dump_sym_base_class(DWORD type_id, const u8* p, DumpState state)
 {
 	DWORD base_class_type_id;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_TYPEID, &base_class_type_id))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 
 	// this is a virtual base class. we can't display those because it'd
 	// require reading the VTbl, which is difficult given lack of documentation
 	// and just not worth it.
 	DWORD vptr_ofs;
 	if(SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_VIRTUALBASEPOINTEROFFSET, &vptr_ofs))
 		return ERR_SYM_UNSUPPORTED;	// NOWARN
 
 	return dump_sym(base_class_type_id, p, state);
 	
 
 }
 
 
 //-----------------------------------------------------------------------------
 
 static LibError dump_sym_data(DWORD id, const u8* p, DumpState state)
 {
 	// display name (of variable/member)
 	const wchar_t* name;
 	if(!SymGetTypeInfo(hProcess, mod_base, id, TI_GET_SYMNAME, &name))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	out(L"%s = ", name);
 	LocalFree((HLOCAL)name);
 
 	__try
 	{
 		// get type_id and address
 		DWORD type_id;
 		if(!SymGetTypeInfo(hProcess, mod_base, id, TI_GET_TYPEID, &type_id))
 			WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 		LibError ret = determine_symbol_address(id, type_id, &p);
 		if(ret != 0)
 			return ret;
 
 		// display value recursively
 		return dump_sym(type_id, p, state);
 	}
 	__except(EXCEPTION_EXECUTE_HANDLER)
 	{
 		return ERR_SYM_INTERNAL_ERROR;	// NOWARN
 	}
 }
 
 
 //-----------------------------------------------------------------------------
 
 static LibError dump_sym_enum(DWORD type_id, const u8* p, DumpState UNUSED(state))
 {
 	ULONG64 size_ = 0;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	const size_t size = (size_t)size_;
 
 	const i64 enum_value = movsx_64le(p, size);
 
 	// get array of child symbols (enumerants).
 	DWORD num_children;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_CHILDRENCOUNT, &num_children))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	TI_FINDCHILDREN_PARAMS2 fcp(num_children);
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_FINDCHILDREN, &fcp))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	num_children = fcp.p.Count;	// was truncated to MAX_CHILDREN
 	const DWORD* children = fcp.p.ChildId;
 
 	// for each child (enumerant):
 	for(uint i = 0; i < num_children; i++)
 	{
 		DWORD child_data_id = children[i];
 
 		// get this enumerant's value. we can't make any assumptions about
 		// the variant's type or size  - no restriction is documented.
 		// rationale: VariantChangeType is much less tedious than doing
 		// it manually and guarantees we cover everything. the OLE DLL is
 		// already pulled in by e.g. OpenGL anyway.
 		VARIANT v;
 		if(!SymGetTypeInfo(hProcess, mod_base, child_data_id, TI_GET_VALUE, &v))
 			WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 		if(VariantChangeType(&v, &v, 0, VT_I8) != S_OK)
 			continue;
 
 		// it's the one we want - output its name.
 		if(enum_value == v.llVal)
 		{
 			const wchar_t* name;
 			if(!SymGetTypeInfo(hProcess, mod_base, child_data_id, TI_GET_SYMNAME, &name))
 				WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 			out(L"%s", name);
 			LocalFree((HLOCAL)name);
 			return ERR_OK;
 		}
 	}
 
 	// we weren't able to retrieve a matching enum value, but can still
 	// produce reasonable output (the numeric value).
 	// note: could goto here after a SGTI fails, but we fail instead
 	// to make sure those errors are noticed.
 	out(L"%I64d", enum_value);
 	return ERR_OK;
 }
 
 
 //-----------------------------------------------------------------------------
 
 static LibError dump_sym_function(DWORD UNUSED(type_id), const u8* UNUSED(p),
 	DumpState UNUSED(state))
 {
 	return ERR_SYM_SUPPRESS_OUTPUT;	// NOWARN
 }
 
 
 //-----------------------------------------------------------------------------
 
 static LibError dump_sym_function_type(DWORD UNUSED(type_id), const u8* p, DumpState UNUSED(state))
 {
 	// this symbol gives class parent, return type, and parameter count.
 	// unfortunately the one thing we care about, its name,
 	// isn't exposed via TI_GET_SYMNAME, so we resolve it ourselves.
 
 	unlock();	// prevent recursive lock
 
 	char name[DBG_SYMBOL_LEN];
 	LibError err = debug_resolve_symbol((void*)p, name, 0, 0);
 
 	lock();
 
 	out(L"0x%p", p);
 	if(err == ERR_OK)
 		out(L" (%hs)", name);
 	return ERR_OK;
 }
 
 
 //-----------------------------------------------------------------------------
 
 // do not follow pointers that we have already displayed. this reduces
 // clutter a bit and prevents infinite recursion for cyclical references
 // (e.g. via struct S { S* p; } s; s.p = &s;)
 
 typedef std::set<const u8*> PtrSet;
 static PtrSet* already_visited_ptrs;
 	// allocated on-demand by ptr_already_visited. this cannot be a NLSO
 	// because it may be used before _cinit.
 	// if we put it in a function, construction still fails on VC7 because
 	// the atexit table will not have been initialized yet.
 
 // called by debug_dump_stack and wdbg_sym_shutdown
 static void ptr_reset_visited()
 {
 	delete already_visited_ptrs;
 	already_visited_ptrs = 0;
 }
 
 static bool ptr_already_visited(const u8* p)
 {
 	if(!already_visited_ptrs)
 		already_visited_ptrs = new PtrSet;
 
 	std::pair<PtrSet::iterator, bool> ret = already_visited_ptrs->insert(p);
 	return !ret.second;
 }
 
 
 static LibError dump_sym_pointer(DWORD type_id, const u8* p, DumpState state)
 {
 	ULONG64 size_ = 0;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	const size_t size = (size_t)size_;
 
 	// read+output pointer's value.
 	p = (const u8*)movzx_64le(p, size);
 	out(L"0x%p", p);
 
 	// bail if it's obvious the pointer is bogus
 	// (=> can't display what it's pointing to)
 	if(debug_is_pointer_bogus(p))
 		return ERR_OK;
 
 	// avoid duplicates and circular references
 	if(ptr_already_visited(p))
 	{
 		out(L" (see above)");
 		return ERR_OK;
 	}
 
 	// display what the pointer is pointing to.
 	// if the pointer is invalid (despite "bogus" check above),
 	// dump_data_sym recovers via SEH and prints an error message.
 	// if the pointed-to value turns out to uninteresting (e.g. void*),
 	// the responsible dump_sym* will erase "->", leaving only address.
 	out(L" -> ");
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_TYPEID, &type_id))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 
 	// prevent infinite recursion just to be safe (shouldn't happen)
 	if(state.indirection >= MAX_INDIRECTION)
 		WARN_RETURN(ERR_SYM_NESTING_LIMIT);
 	state.indirection++;
 	return dump_sym(type_id, p, state);
 }
 
 
 //-----------------------------------------------------------------------------
 
 
 static LibError dump_sym_typedef(DWORD type_id, const u8* p, DumpState state)
 {
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_TYPEID, &type_id))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	return dump_sym(type_id, p, state);
 }
 
 
 //-----------------------------------------------------------------------------
 
 
 // determine type and size of the given child in a UDT.
 // useful for UDTs that contain typedefs describing their contents,
 // e.g. value_type in STL containers.
 static LibError udt_get_child_type(const wchar_t* child_name,
 	ULONG num_children, const DWORD* children,
 	DWORD* el_type_id, size_t* el_size)
 {
 	*el_type_id = 0;
 	*el_size = 0;
 
 	for(ULONG i = 0; i < num_children; i++)
 	{
 		DWORD child_id = children[i];
 
 		// find the desired child
 		wchar_t* this_child_name;
 		if(!SymGetTypeInfo(hProcess, mod_base, child_id, TI_GET_SYMNAME, &this_child_name))
 			continue;
 		const bool found_it = !wcscmp(this_child_name, child_name);
 		LocalFree(this_child_name);
 		if(!found_it)
 			continue;
 
 		// .. its type information is what we want.
 		DWORD type_id;
 		if(!SymGetTypeInfo(hProcess, mod_base, child_id, TI_GET_TYPEID, &type_id))
 			WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 
 		ULONG64 size;
 		if(!SymGetTypeInfo(hProcess, mod_base, child_id, TI_GET_LENGTH, &size))
 			WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 
 		*el_type_id = type_id;
 		*el_size = (size_t)size;
 		return ERR_OK;
 	}
 
 	// (happens if called for containers that are treated as STL but are not)
 	return ERR_SYM_CHILD_NOT_FOUND;	// NOWARN
 }
 
 
 static LibError udt_dump_std(const wchar_t* wtype_name, const u8* p, size_t size, DumpState state,
 	ULONG num_children, const DWORD* children)
 {
 	LibError err;
 
 	// not a C++ standard library object; can't handle it.
 	if(wcsncmp(wtype_name, L"std::", 5) != 0)
 		return INFO_CANNOT_HANDLE;
 
 	// check for C++ objects that should be displayed via udt_dump_normal.
 	// STL containers are special-cased and the rest (apart from those here)
 	// are ignored, because for the most part they are spew.
 	if(!wcsncmp(wtype_name, L"std::pair", 9))
 		return INFO_CANNOT_HANDLE;
 
 	// convert to char since debug_stl doesn't support wchar_t.
 	char ctype_name[DBG_SYMBOL_LEN];
 	snprintf(ctype_name, ARRAY_SIZE(ctype_name), "%ws", wtype_name);
 
 	// display contents of STL containers
 	// .. get element type
 	DWORD el_type_id;
 	size_t el_size;
  	err = udt_get_child_type(L"value_type", num_children, children, &el_type_id, &el_size);
 	if(err != ERR_OK)
 		goto not_valid_container;
 	// .. get iterator and # elements
 	size_t el_count;
 	DebugIterator el_iterator;
 	u8 it_mem[DEBUG_STL_MAX_ITERATOR_SIZE];
 	err = stl_get_container_info(ctype_name, p, size, el_size, &el_count, &el_iterator, it_mem);
 	if(err != ERR_OK)
 		goto not_valid_container;
 	return dump_sequence(el_iterator, it_mem, el_count, el_type_id, el_size, state);
 not_valid_container:
 
 	// build and display detailed "error" message.
 	char buf[100];
 	const char* text;
 	// .. object named std::* but doesn't include a "value_type" child =>
 	//    it's a non-STL C++ stdlib object. wasn't handled by the
 	//    special case above, so we just display its simplified type name
 	//    (the contents are usually spew).
 	if(err == ERR_SYM_CHILD_NOT_FOUND)
 		text = "";
 	// .. not one of the containers we can analyse.
 	if(err == ERR_STL_CNT_UNKNOWN)
 		text = "unsupported ";
 	// .. container of a known type but contents are invalid.
 	if(err == ERR_STL_CNT_INVALID)
 		text = "uninitialized/invalid ";
 	// .. some other error encountered
 	else
 	{
 		snprintf(buf, ARRAY_SIZE(buf), "error %d while analyzing ", err);
 		text = buf;
 	}
 	out(L"(%hs%hs)", text, stl_simplify_name(ctype_name));
 	return ERR_OK;
 }
 
 
 static bool udt_should_suppress(const wchar_t* type_name)
 {
 	// specialized HANDLEs are defined as pointers to structs by
 	// DECLARE_HANDLE. we only want the numerical value (pointer address),
 	// so prevent these structs from being displayed.
 	// note: no need to check for indirection; these are only found in
 	// HANDLEs (which are pointers).
 	// removed obsolete defs: HEVENT, HFILE, HUMPD
 	if(type_name[0] != 'H')
 		goto not_handle;
 #define SUPPRESS_HANDLE(name) if(!wcscmp(type_name, L#name L"__")) return true;
 	SUPPRESS_HANDLE(HACCEL);
 	SUPPRESS_HANDLE(HBITMAP);
 	SUPPRESS_HANDLE(HBRUSH);
 	SUPPRESS_HANDLE(HCOLORSPACE);
 	SUPPRESS_HANDLE(HCURSOR);
 	SUPPRESS_HANDLE(HDC);
 	SUPPRESS_HANDLE(HENHMETAFILE);
 	SUPPRESS_HANDLE(HFONT);
 	SUPPRESS_HANDLE(HGDIOBJ);
 	SUPPRESS_HANDLE(HGLOBAL);
 	SUPPRESS_HANDLE(HGLRC);
 	SUPPRESS_HANDLE(HHOOK);
 	SUPPRESS_HANDLE(HICON);
 	SUPPRESS_HANDLE(HIMAGELIST);
 	SUPPRESS_HANDLE(HIMC);
 	SUPPRESS_HANDLE(HINSTANCE);
 	SUPPRESS_HANDLE(HKEY);
 	SUPPRESS_HANDLE(HKL);
 	SUPPRESS_HANDLE(HKLOCAL);
 	SUPPRESS_HANDLE(HMENU);
 	SUPPRESS_HANDLE(HMETAFILE);
 	SUPPRESS_HANDLE(HMODULE);
 	SUPPRESS_HANDLE(HMONITOR);
 	SUPPRESS_HANDLE(HPALETTE);
 	SUPPRESS_HANDLE(HPEN);
 	SUPPRESS_HANDLE(HRGN);
 	SUPPRESS_HANDLE(HRSRC);
 	SUPPRESS_HANDLE(HSTR);
 	SUPPRESS_HANDLE(HTASK);
 	SUPPRESS_HANDLE(HWINEVENTHOOK);
 	SUPPRESS_HANDLE(HWINSTA);
 	SUPPRESS_HANDLE(HWND);
 not_handle:
 
 	return false;
 }
 
 
 static LibError udt_dump_suppressed(const wchar_t* type_name, const u8* UNUSED(p), size_t UNUSED(size),
 	DumpState state, ULONG UNUSED(num_children), const DWORD* UNUSED(children))
 {
 	if(!udt_should_suppress(type_name))
 		return INFO_CANNOT_HANDLE;
 
 	// the data symbol is pointer-to-UDT. since we won't display its
 	// contents, leave only the pointer's value.
 	if(state.indirection)
 		out_erase(4);	// " -> "
 
 	// indicate something was deliberately left out
 	// (otherwise, lack of output may be taken for an error)
 	out(L" (..)");
 
 	return ERR_OK;
 }
 
 
 // (by now) non-trivial heuristic to determine if a UDT should be
 // displayed on one line or several. split out of udt_dump_normal.
 static bool udt_fits_on_one_line(const wchar_t* type_name, size_t child_count, size_t total_size)
 {
 	// special case: always put CStr* on one line
 	// (std::*string are displayed directly, but these go through
 	// udt_dump_normal. we want to avoid the ensuing 3-line output)
 	if(!wcscmp(type_name, L"CStr") || !wcscmp(type_name, L"CStr8") || !wcscmp(type_name, L"CStrW"))
 		return true;
 
 	// try to get actual number of relevant children
 	// (typedefs etc. are never displayed, but are included in child_count.
 	// we have to balance that vs. tons of static members, which aren't
 	// reflected in total_size).
 	// .. prevent division by 0.
 	if(child_count == 0)
 		child_count = 1;
 	// special-case a few types that would otherwise be classified incorrectly
 	// (due to having more or less than expected relevant children)
 	if(!wcsncmp(type_name, L"std::pair", 9))
 		child_count = 2;
 
 	const size_t avg_size = total_size / child_count;
 		// (if 0, no worries - child_count will probably be large and
 		// we return false, which is a safe default)
 
 	// small UDT with a few (small) members: fits on one line.
 	if(child_count <= 3 && avg_size <= sizeof(int))
 		return true;
 
 	return false;
 }
 
 
 static LibError udt_dump_normal(const wchar_t* type_name, const u8* p, size_t size,
 	DumpState state, ULONG num_children, const DWORD* children)
 {
 	const bool fits_on_one_line = udt_fits_on_one_line(type_name, num_children, size);
 
 	// prevent infinite recursion just to be safe (shouldn't happen)
 	if(state.level >= MAX_LEVEL)
 		WARN_RETURN(ERR_SYM_NESTING_LIMIT);
 	state.level++;
 
 	out(fits_on_one_line? L"{ " : L"\r\n");
 
 	bool displayed_anything = false;
 	for(ULONG i = 0; i < num_children; i++)
 	{
 		const DWORD child_id = children[i];
 
 		// get offset. if not available, skip this child
 		// (we only display data here, not e.g. typedefs)
 		DWORD ofs = 0;
 		if(!SymGetTypeInfo(hProcess, mod_base, child_id, TI_GET_OFFSET, &ofs))
 			continue;
 		debug_assert(ofs < size);
 
 		if(!fits_on_one_line)
 			INDENT;
 
 		const u8* el_p = p+ofs;
 		LibError err = dump_sym(child_id, el_p, state);
 
 		// there was no output for this child; undo its indentation (if any),
 		// skip everything below and proceed with the next child.
 		if(err == ERR_SYM_SUPPRESS_OUTPUT)
 		{
 			if(!fits_on_one_line)
 				UNINDENT;
 			continue;
 		}
 
 		displayed_anything = true;
 		dump_error(err, el_p);	// nop if err == ERR_OK
 		out(fits_on_one_line? L", " : L"\r\n");
 
 		if(err == ERR_SYM_SINGLE_SYMBOL_LIMIT)
 			break;
 	}	// for each child
 
 	state.level--;
 
 	if(!displayed_anything)
 	{
 		out_erase(2);	// "{ " or "\r\n"
 		out(L"(%s)", type_name);
 		return ERR_OK;
 	}
 
 	// remove trailing comma separator
 	// note: we can't avoid writing it by checking if i == num_children-1:
 	// each child might be the last valid data member.
 	if(fits_on_one_line)
 	{
 		out_erase(2);	// ", "
 		out(L" }");
 	}
 
 	return ERR_OK;
 }
 
 
 static LibError dump_sym_udt(DWORD type_id, const u8* p, DumpState state)
 {
 	ULONG64 size_ = 0;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	const size_t size = (size_t)size_;
 
 	// get array of child symbols (members/functions/base classes).
 	DWORD num_children;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_CHILDRENCOUNT, &num_children))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	TI_FINDCHILDREN_PARAMS2 fcp(num_children);
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_FINDCHILDREN, &fcp))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	num_children = fcp.p.Count;	// was truncated to MAX_CHILDREN
 	const DWORD* children = fcp.p.ChildId;
 
 	const wchar_t* type_name;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_SYMNAME, &type_name))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 
 	LibError ret;
 	// note: order is important (e.g. STL special-case must come before
 	// suppressing UDTs, which tosses out most other C++ stdlib classes)
 
 	ret = udt_dump_std       (type_name, p, size, state, num_children, children);
 	if(ret != INFO_CANNOT_HANDLE)
 		goto done;
 
 	ret = udt_dump_suppressed(type_name, p, size, state, num_children, children);
 	if(ret != INFO_CANNOT_HANDLE)
 		goto done;
 
 	ret = udt_dump_normal    (type_name, p, size, state, num_children, children);
 	if(ret != INFO_CANNOT_HANDLE)
 		goto done;
 
 done:
 	LocalFree((HLOCAL)type_name);
 	return ret;
 }
 
 
 //-----------------------------------------------------------------------------
 
 
 static LibError dump_sym_vtable(DWORD UNUSED(type_id), const u8* UNUSED(p), DumpState UNUSED(state))
 {
 	// unsupported (vtable internals are undocumented; too much work).
 	return ERR_SYM_SUPPRESS_OUTPUT;	// NOWARN
 }
 
 
 //-----------------------------------------------------------------------------
 
 
 static LibError dump_sym_unknown(DWORD type_id, const u8* UNUSED(p), DumpState UNUSED(state))
 {
 	// redundant (already done in dump_sym), but this is rare.
 	DWORD type_tag;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_SYMTAG, &type_tag))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 
 	debug_printf("SYM| unknown tag: %d\n", type_tag);
 	out(L"(unknown symbol type)");
 	return ERR_OK;
 }
 
 
 //-----------------------------------------------------------------------------
 
 
 // write name and value of the symbol <type_id> to the output buffer.
 // delegates to dump_sym_* depending on the symbol's tag.
 static LibError dump_sym(DWORD type_id, const u8* p, DumpState state)
 {
 	RETURN_ERR(out_check_limit());
 
 	DWORD type_tag;
 	if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_SYMTAG, &type_tag))
 		WARN_RETURN(ERR_SYM_TYPE_INFO_UNAVAILABLE);
 	switch(type_tag)
 	{
 	case SymTagArrayType:
 		return dump_sym_array         (type_id, p, state);
 	case SymTagBaseType:
 		return dump_sym_base_type     (type_id, p, state);
 	case SymTagBaseClass:
 		return dump_sym_base_class    (type_id, p, state);
 	case SymTagData:
 		return dump_sym_data          (type_id, p, state);
 	case SymTagEnum:
 		return dump_sym_enum          (type_id, p, state);
 	case SymTagFunction:
 		return dump_sym_function      (type_id, p, state);
 	case SymTagFunctionType:
 		return dump_sym_function_type (type_id, p, state);
 	case SymTagPointerType:
 		return dump_sym_pointer       (type_id, p, state);
 	case SymTagTypedef:
 		return dump_sym_typedef       (type_id, p, state);
 	case SymTagUDT:
 		return dump_sym_udt           (type_id, p, state);
 	case SymTagVTable:
 		return dump_sym_vtable        (type_id, p, state);
 	default:
 		return dump_sym_unknown       (type_id, p, state);
 	}
 }
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // stack trace
 //
 //////////////////////////////////////////////////////////////////////////////
 
 // output the symbol's name and value via dump_sym*.
 // called from dump_frame_cb for each local symbol; lock is held.
 static BOOL CALLBACK dump_sym_cb(SYMBOL_INFO* sym, ULONG UNUSED(size), void* UNUSED(ctx))
 {
 	out_latch_pos();	// see decl
 	mod_base = sym->ModBase;
 	const u8* p = (const u8*)sym->Address;
 	DumpState state;
 
 	INDENT;
 	LibError err = dump_sym(sym->Index, p, state);
 	dump_error(err, p);
 	if(err == ERR_SYM_SUPPRESS_OUTPUT)
 		UNINDENT;
 	else
 		out(L"\r\n");
 
 	return TRUE;	// continue
 }
 
 
 //////////////////////////////////////////////////////////////////////////////
 
 
 struct IMAGEHLP_STACK_FRAME2 : public IMAGEHLP_STACK_FRAME
 {
 	IMAGEHLP_STACK_FRAME2(const STACKFRAME64* sf)
 	{
 		// apparently only PC, FP and SP are necessary, but
 		// we go whole-hog to be safe.
 		memset(this, 0, sizeof(IMAGEHLP_STACK_FRAME2));
 		InstructionOffset  = sf->AddrPC.Offset;
 		ReturnOffset       = sf->AddrReturn.Offset;
 		FrameOffset        = sf->AddrFrame.Offset;
 		StackOffset        = sf->AddrStack.Offset;
 		BackingStoreOffset = sf->AddrBStore.Offset;
 		FuncTableEntry     = (ULONG64)sf->FuncTableEntry;
 		Virtual            = sf->Virtual;
 		// (note: array of different types, can't copy directly)
 		for(int i = 0; i < 4; i++)
 			Params[i] = sf->Params[i];
 	}
 };
 
 // called by walk_stack for each stack frame
 static LibError dump_frame_cb(const STACKFRAME64* sf, void* UNUSED(user_arg))
 {
 	current_stackframe64 = sf;
 	void* func = (void*)sf->AddrPC.Offset;
 
 	char func_name[DBG_SYMBOL_LEN]; char file[DBG_FILE_LEN]; int line;
 	if(debug_resolve_symbol(func, func_name, file, &line) == 0)
 	{
 		// don't trace back further than the app's entry point
 		// (noone wants to see this frame). checking for the
 		// function name isn't future-proof, but not stopping is no big deal.
 		// an alternative would be to check if module=kernel32, but
 		// that would cut off callbacks as well.
 		if(!strcmp(func_name, "_BaseProcessStart@4"))
 			return ERR_OK;
 
 		out(L"%hs (%hs:%d)\r\n", func_name, file, line);
 	}
 	else
 		out(L"%p\r\n", func);
 
 	// only enumerate symbols for this stack frame
 	// (i.e. its locals and parameters)
 	// problem: debug info is scope-aware, so we won't see any variables
 	// declared in sub-blocks. we'd have to pass an address in that block,
 	// which isn't worth the trouble. since 
 	IMAGEHLP_STACK_FRAME2 imghlp_frame(sf);
 	SymSetContext(hProcess, &imghlp_frame, 0);	// last param is ignored
 
 	SymEnumSymbols(hProcess, 0, 0, dump_sym_cb, 0);
 		// 2nd and 3rd params indicate scope set by SymSetContext
 		// should be used.
 
 	out(L"\r\n");
 	return INFO_CB_CONTINUE;
 }
 
 
 // write a complete stack trace (including values of local variables) into
 // the specified buffer. if <context> is nonzero, it is assumed to be a
 // platform-specific representation of execution state (e.g. Win32 CONTEXT)
 // and tracing starts there; this is useful for exceptions.
 // otherwise, tracing starts at the current stack position, and the given
 // number of stack frames (i.e. functions) above the caller are skipped.
 // this prevents functions like debug_assert_failed from
 // cluttering up the trace. returns the buffer for convenience.
 const wchar_t* debug_dump_stack(wchar_t* buf, size_t max_chars, uint skip, void* pcontext)
 {
 	static uintptr_t already_in_progress;
 	if(!CAS(&already_in_progress, 0, 1))
 	{
 		wcscpy_s(buf, max_chars,
 			L"(cannot start a nested stack trace; what probably happened is that "
 			L"an debug_assert/debug_warn/CHECK_ERR fired during the current trace.)"
 		);
 		return buf;
 	}
 
 	if(!pcontext)
 		skip++;	// skip this frame
 
 	lock();
 
 	out_init(buf, max_chars);
 	ptr_reset_visited();
 
 	LibError err = walk_stack(dump_frame_cb, 0, skip, (const CONTEXT*)pcontext);
 	if(err < 0)
 		out(L"(error while building stack trace: %d)", err);
 
 	unlock();
 
 	already_in_progress = 0;
 	return buf;
 }
 
 
 
 
 
 // write out a "minidump" containing register and stack state; this enables
 // examining the crash in a debugger. called by wdbg_exception_filter.
 // heavily modified from http://www.codeproject.com/debug/XCrashReportPt3.asp
 // lock must be held.
 void wdbg_write_minidump(EXCEPTION_POINTERS* exception_pointers)
 {
 	lock();
 
 	// note: we go through some gyrations here (strcpy+strcat) to avoid
 	// dependency on file code (path_append).
 	char N_path[PATH_MAX];
 	strcpy_s(N_path, ARRAY_SIZE(N_path), ah_get_log_dir());
 	strcat_s(N_path, ARRAY_SIZE(N_path), "crashlog.dmp");
 	HANDLE hFile = CreateFile(N_path, GENERIC_WRITE, FILE_SHARE_WRITE, 0, CREATE_ALWAYS, 0, 0);
 	if(hFile == INVALID_HANDLE_VALUE)
 		goto fail;
 
 	MINIDUMP_EXCEPTION_INFORMATION mei;
 	mei.ThreadId = GetCurrentThreadId();
 	mei.ExceptionPointers = exception_pointers;
 	mei.ClientPointers = FALSE;
 	// exception_pointers is not in our address space.
 
 	// note: we don't store other crashlog info within the dump file
 	// (UserStreamParam), since we will need to generate a plain text file on
 	// non-Windows platforms. users will just have to send us both files.
 
 	HANDLE hProcess = GetCurrentProcess(); DWORD pid = GetCurrentProcessId();
 	if(!MiniDumpWriteDump(hProcess, pid, hFile, MiniDumpNormal, &mei, 0, 0))
 	{
 fail:
 		DISPLAY_ERROR(L"Unable to generate minidump.");
 	}
 
 	CloseHandle(hFile);
 	unlock();
 }
 
 
 
 
 static LibError wdbg_sym_shutdown()
 {
 	ptr_reset_visited();
 	return sym_shutdown();
 }
-
-
-//----------------------------------------------------------------------------
-// built-in self test
-//----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-#pragma optimize("", off)
-
-
-
-static void test_array()
-{
-	struct Small
-	{
-		int i1;
-		int i2;
-	};
-
-	struct Large
-	{
-		double d1;
-		double d2;
-		double d3;
-		double d4;
-	};
-
-	Large large_array_of_large_structs[8] = { { 0.0,0.0,0.0,0.0 } }; UNUSED2(large_array_of_large_structs);
-	Large small_array_of_large_structs[2] = { { 0.0,0.0,0.0,0.0 } }; UNUSED2(small_array_of_large_structs);
-	Small large_array_of_small_structs[8] = { { 1,2 } }; UNUSED2(large_array_of_small_structs);
-	Small small_array_of_small_structs[2] = { { 1,2 } }; UNUSED2(small_array_of_small_structs);
-
-	int ints[] = { 1,2,3,4,5 };	UNUSED2(ints);
-	wchar_t chars[] = { 'w','c','h','a','r','s',0 }; UNUSED2(chars);
-
-	// note: prefer simple error (which also generates stack trace) to
-	// exception, because it is guaranteed to work (no issues with the
-	// debugger swallowing exceptions).
-	DISPLAY_ERROR(L"wdbg_sym self test: check if stack trace below is ok.");
-	//RaiseException(0xf001,0,0,0);
-}
-
-// also used by test_stl as an element type
-struct Nested
-{
-	int nested_member;
-	struct Nested* self_ptr;
-};
-
-static void test_udt()
-{
-	Nested nested = { 123 }; nested.self_ptr = &nested;
-
-	typedef struct
-	{
-		u8 s1;
-		u8 s2;
-		char s3;
-	}
-	Small;
-	Small small__ = { 0x55, 0xaa, -1 }; UNUSED2(small__);
-
-	struct Large
-	{
-		u8 large_member_u8;
-		std::string large_member_string;
-		double large_member_double;
-	}
-	large = { 0xff, "large struct string", 123456.0 }; UNUSED2(large);
-
-
-	class Base
-	{
-		int base_int;
-		std::wstring base_wstring;
-	public:
-		Base()
-			: base_int(123), base_wstring(L"base wstring")
-		{
-		}
-	};
-	class Derived : private Base
-	{
-		double derived_double;
-	public:
-		Derived()
-			: derived_double(-1.0)
-		{
-		}
-	}
-	derived;
-
-	test_array();
-}
-
-// STL containers and their contents
-static void test_stl()
-{
-	std::vector<std::wstring> v_wstring;
-	v_wstring.push_back(L"ws1"); v_wstring.push_back(L"ws2");
-
-	std::deque<int> d_int;
-	d_int.push_back(1); d_int.push_back(2); d_int.push_back(3);
-	std::deque<std::string> d_string;
-	d_string.push_back("a"); d_string.push_back("b"); d_string.push_back("c");
-
-	std::list<float> l_float;
-	l_float.push_back(0.1f); l_float.push_back(0.2f); l_float.push_back(0.3f); l_float.push_back(0.4f); 
-
-	std::map<std::string, int> m_string_int;
-	m_string_int.insert(std::make_pair<std::string,int>("s5", 5));
-	m_string_int.insert(std::make_pair<std::string,int>("s6", 6));
-	m_string_int.insert(std::make_pair<std::string,int>("s7", 7));
-	std::map<int, std::string> m_int_string;
-	m_int_string.insert(std::make_pair<int,std::string>(1, "s1"));
-	m_int_string.insert(std::make_pair<int,std::string>(2, "s2"));
-	m_int_string.insert(std::make_pair<int,std::string>(3, "s3"));
-	std::map<int, int> m_int_int;
-	m_int_int.insert(std::make_pair<int,int>(1, 1));
-	m_int_int.insert(std::make_pair<int,int>(2, 2));
-	m_int_int.insert(std::make_pair<int,int>(3, 3));
-
-	STL_HASH_MAP<std::string, int> hm_string_int;
-	hm_string_int.insert(std::make_pair<std::string,int>("s5", 5));
-	hm_string_int.insert(std::make_pair<std::string,int>("s6", 6));
-	hm_string_int.insert(std::make_pair<std::string,int>("s7", 7));
-	STL_HASH_MAP<int, std::string> hm_int_string;
-	hm_int_string.insert(std::make_pair<int,std::string>(1, "s1"));
-	hm_int_string.insert(std::make_pair<int,std::string>(2, "s2"));
-	hm_int_string.insert(std::make_pair<int,std::string>(3, "s3"));
-	STL_HASH_MAP<int, int> hm_int_int;
-	hm_int_int.insert(std::make_pair<int,int>(1, 1));
-	hm_int_int.insert(std::make_pair<int,int>(2, 2));
-	hm_int_int.insert(std::make_pair<int,int>(3, 3));
-
-
-	std::set<uintptr_t> s_uintptr;
-	s_uintptr.insert(0x123); s_uintptr.insert(0x456);
-
-	// empty
-	std::deque<u8> d_u8_empty;
-	std::list<Nested> l_nested_empty;
-	std::map<double,double> m_double_empty;
-	std::multimap<int,u8> mm_int_empty;
-	std::set<uint> s_uint_empty;
-	std::multiset<char> ms_char_empty;
-	std::vector<double> v_double_empty;
-	std::queue<double> q_double_empty;
-	std::stack<double> st_double_empty;
-#if HAVE_STL_HASH
-	STL_HASH_MAP<double,double> hm_double_empty;
-	STL_HASH_MULTIMAP<double,std::wstring> hmm_double_empty;
-	STL_HASH_SET<double> hs_double_empty;
-	STL_HASH_MULTISET<double> hms_double_empty;
-#endif
-#if HAVE_STL_SLIST
-	STL_SLIST<double> sl_double_empty;
-#endif
-	std::string str_empty;
-	std::wstring wstr_empty;
-
-	test_udt();
-
-	// uninitialized
-	std::deque<u8> d_u8_uninit;
-	std::list<Nested> l_nested_uninit;
-	std::map<double,double> m_double_uninit;
-	std::multimap<int,u8> mm_int_uninit;
-	std::set<uint> s_uint_uninit;
-	std::multiset<char> ms_char_uninit;
-	std::vector<double> v_double_uninit;
-	std::queue<double> q_double_uninit;
-	std::stack<double> st_double_uninit;
-#if HAVE_STL_HASH
-	STL_HASH_MAP<double,double> hm_double_uninit;
-	STL_HASH_MULTIMAP<double,std::wstring> hmm_double_uninit;
-	STL_HASH_SET<double> hs_double_uninit;
-	STL_HASH_MULTISET<double> hms_double_uninit;
-#endif
-#if HAVE_STL_SLIST
-	STL_SLIST<double> sl_double_uninit;
-#endif
-	std::string str_uninit;
-	std::wstring wstr_uninit;
-}
-
-
-// also exercises all basic types because we need to display some values
-// anyway (to see at a glance whether symbol engine addrs are correct)
-static void test_addrs(int p_int, double p_double, char* p_pchar, uintptr_t p_uintptr)
-{
-	debug_printf("\nTEST_ADDRS\n");
-
-	uint l_uint = 0x1234;
-	bool l_bool = true; UNUSED2(l_bool);
-	wchar_t l_wchars[] = L"wchar string";
-	enum TestEnum { VAL1=1, VAL2=2 } l_enum = VAL1;
-	u8 l_u8s[] = { 1,2,3,4 };
-	void (*l_funcptr)(void) = test_stl;
-
-	static double s_double = -2.718;
-	static char s_chars[] = {'c','h','a','r','s',0};
-	static void (*s_funcptr)(int, double, char*, uintptr_t) = test_addrs;
-	static void* s_ptr = (void*)(uintptr_t)0x87654321;
-	static HDC s_hdc = (HDC)0xff0;
-
-	debug_printf("p_int     addr=%p val=%d\n", &p_int, p_int);
-	debug_printf("p_double  addr=%p val=%g\n", &p_double, p_double);
-	debug_printf("p_pchar   addr=%p val=%s\n", &p_pchar, p_pchar);
-	debug_printf("p_uintptr addr=%p val=%lu\n", &p_uintptr, p_uintptr);
-
-	debug_printf("l_uint    addr=%p val=%u\n", &l_uint, l_uint);
-	debug_printf("l_wchars  addr=%p val=%ws\n", &l_wchars, l_wchars);
-	debug_printf("l_enum    addr=%p val=%d\n", &l_enum, l_enum);
-	debug_printf("l_u8s     addr=%p val=%d\n", &l_u8s, l_u8s);
-	debug_printf("l_funcptr addr=%p val=%p\n", &l_funcptr, l_funcptr);
-
-	test_stl();
-
-	int uninit_int; UNUSED2(uninit_int);
-	float uninit_float; UNUSED2(uninit_float);
-	double uninit_double; UNUSED2(uninit_double);
-	bool uninit_bool; UNUSED2(uninit_bool);
-	HWND uninit_hwnd; UNUSED2(uninit_hwnd);
-}
-
-
-static void self_test()
-{
-	test_addrs(123, 3.1415926535897932384626, "pchar string", 0xf00d);
-}
-
-SELF_TEST_RUN;
-
-#pragma optimize("", on)
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
Index: ps/trunk/source/lib/path_util.h
===================================================================
--- ps/trunk/source/lib/path_util.h	(revision 3910)
+++ ps/trunk/source/lib/path_util.h	(revision 3911)
@@ -1,153 +1,154 @@
 /**
  * =========================================================================
  * File        : path_util.h
  * Project     : 0 A.D.
  * Description : helper functions for path strings.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2004-2006 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 // notes:
 // - this module is split out of lib/res/file so that it can be used from
 //   other code without pulling in the entire file manager.
 // - there is no restriction on buffer lengths except the underlying OS.
 //   input buffers must not exceed PATH_MAX chars, while outputs
 //   must hold at least that much.
 // - unless otherwise mentioned, all functions are intended to work with
 //   native and portable and VFS paths.
 //   when reading, both '/' and DIR_SEP are accepted; '/' is written.
 
 #ifndef PATH_UTIL_H__
 #define PATH_UTIL_H__
 
 #include "posix.h"	// PATH_MAX
 
 // if path is invalid (see source for criteria), return a
 // descriptive error code, otherwise ERR_OK.
 extern LibError path_validate(const char* path);
 #define CHECK_PATH(path) RETURN_ERR(path_validate(path))
 
 // if name is invalid, (see source for criteria), return a
 // descriptive error code, otherwise ERR_OK.
 extern LibError path_component_validate(const char* name);
 
 
 // is s2 a subpath of s1, or vice versa?
+// (equal counts as subpath)
 extern bool path_is_subpath(const char* s1, const char* s2);
 
 // if path is invalid, return a descriptive error code, otherwise ERR_OK.
 extern LibError path_validate(const char* path);
 
 // if name is invalid, return a descriptive error code, otherwise ERR_OK.
 // (name is a path component, i.e. that between directory separators)
 extern LibError path_component_validate(const char* name);
 
 // copy path strings (provided for convenience).
 extern void path_copy(char* dst, const char* src);
 
 enum PathAppendFlags
 {
 	// make sure <dst> ends up with a trailing slash. this is useful for
 	// VFS directory paths, which have that requirement.
 	PATH_APPEND_SLASH = 1
 };
 
 // combine <path1> and <path2> into one path, and write to <dst>.
 // if necessary, a directory separator is added between the paths.
 // each may be empty, filenames, or full paths.
 // total path length (including '\0') must not exceed PATH_MAX.
 extern LibError path_append(char* dst, const char* path1, const char* path2,
 	uint flags = 0);
 
 // strip <remove> from the start of <src>, prepend <replace>,
 // and write to <dst>.
 // returns ERR_FAIL (without warning!) if the beginning of <src> doesn't
 // match <remove>.
 extern LibError path_replace(char* dst, const char* src, const char* remove, const char* replace);
 
 
 // return pointer to the name component within path (i.e. skips over all
 // characters up to the last dir separator, if any).
 extern const char* path_name_only(const char* path);
 
 // return last component within path. this is similar to path_name_only,
 // but correctly handles VFS paths, which must end with '/'.
 // (path_name_only would return "")
 extern const char* path_last_component(const char* path);
 
 // if <path> contains a name component, it is stripped away.
 extern void path_strip_fn(char* path);
 
 // fill <dir> with the directory path portion of <path>
 // ("" if root dir, otherwise ending with '/').
 // note: copies to <dir> and proceeds to path_strip_fn it.
 extern void path_dir_only(const char* path, char* dir);
 
 // return extension of <fn>, or "" if there is none.
 // NOTE: does not include the period; e.g. "a.bmp" yields "bmp".
 extern const char* path_extension(const char* fn);
 
 
 // called for each component in a path string, indicating if it's
 // a directory (i.e. <component> is followed by a slash in the original
 // path).
 // if path is empty (i.e. ""), this is not called.
 //
 // component: 0-terminated name of the component (does not include any
 // trailing slash!)
 // ctx: context parameter that was passed to path_foreach_component.
 // return: INFO_CB_CONTINUE to continue operation normally; anything else
 // will cause path_foreach_component to abort immediately and return that.
 // no need to 'abort' (e.g. return ERR_OK) after a filename is encountered -
 // that's taken care of automatically.
 //
 // rationale:
 // - we indicate if it's a directory via bool. this isn't as nice as a
 //   flag or enum, but vfs_tree already has TNodeType and we don't want
 //   to expose that or create a new one.
 typedef LibError (*PathComponentCb)(const char* component, bool is_dir, void* ctx);
 
 // call <cb> with <ctx> for each component in <path>.
 extern LibError path_foreach_component(const char* path, PathComponentCb cb, void* ctx);
 
 
 //-----------------------------------------------------------------------------
 
 // convenience "class" that simplifies successively appending a filename to
 // its parent directory. this avoids needing to allocate memory and calling
 // strlen/strcat. used by wdll_ver and dir_next_ent.
 // we want to maintain C compatibility, so this isn't a C++ class.
 
 struct PathPackage
 {
 	char* end;
 	size_t chars_left;
 	char path[PATH_MAX];
 };
 
 // write the given directory path into our buffer and set end/chars_left
 // accordingly. <dir> need not but can end with a directory separator.
 //
 // note: <dir> and the filename set via path_package_append_file are separated by
 // '/'. this is to allow use on portable paths; the function otherwise
 // does not care if paths are relative/portable/absolute.
 extern LibError path_package_set_dir(PathPackage* pp, const char* dir);
 
 // append the given filename to the directory established by the last
 // path_package_set_dir on this package. the whole path is accessible at pp->path.
 extern LibError path_package_append_file(PathPackage* pp, const char* path);
 
 #endif	// #ifndef PATH_UTIL_H__
Index: ps/trunk/source/lib/lockfree.cpp
===================================================================
--- ps/trunk/source/lib/lockfree.cpp	(revision 3910)
+++ ps/trunk/source/lib/lockfree.cpp	(revision 3911)
@@ -1,968 +1,741 @@
 /**
  * =========================================================================
  * File        : lockfree.cpp
  * Project     : 0 A.D.
  * Description : lock-free synchronized data structures.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <set>
 #include <algorithm>
 
 #include "lib.h"
 #include "posix.h"
 #include "sysdep/cpu.h"
 #include "lockfree.h"
 #include "timer.h"
 
-// known to fail on P4 due to mem reordering and lack of membars.
-#undef SELF_TEST_ENABLED
-#define SELF_TEST_ENABLED 0
-
 /*
 liberties taken:
 - R(H) will remain constant
   (since TLS rlist is fixed-size, and we don't care about O(1)
   amortization proofs)
 
 
 lacking from pseudocode:
 - mark HPRec as active when allocated
 
 
 questions:
 - does hp0 ("private, static") need to be in TLS? or is per-"find()" ok?
 - memory barriers where?
 
 
 todo:
 -make sure retired node array doesn't overflow. add padding (i.e.  "Scan" if half-full?)
 -see why SMR had algo extension of HelpScan
 -simple iteration is safe?
 */
 
 // total number of hazard pointers needed by each thread.
 // determined by the algorithms using SMR; the LF list requires 2.
 static const uint NUM_HPS = 2;
 
 // number of slots for the per-thread node freelist.
 // this is a reasonable size and pads struct TLS to 64 bytes.
 static const size_t MAX_RETIRED = 11;
 
 
 // used to allocate a flat array of all hazard pointers.
 // changed via atomic_add by TLS when a thread first calls us / exits.
 static intptr_t active_threads;
 
 // basically module refcount; we can't shut down before it's 0.
 // changed via atomic_add by each data structure's init/free.
 static intptr_t active_data_structures;
 
 
 // Nodes are internal to this module. having callers pass them in would
 // be more convenient but risky, since they might change <next> and <key>,
 // or not allocate via malloc (necessary since Nodes are garbage-collected
 // and allowing user-specified destructors would be more work).
 //
 // to still allow storing arbitrary user data without requiring an
 // additional memory alloc per node, we append <user_size> bytes to the
 // end of the Node structure; this is what is returned by find.
 struct Node
 {
 	Node* next;
 	uintptr_t key;
 
 	// <additional_bytes> are allocated here at the caller's discretion.
 };
 
 static inline Node* node_alloc(size_t additional_bytes)
 {
 	return (Node*)calloc(1, sizeof(Node) + additional_bytes);
 }
 
 static inline void node_free(Node* n)
 {
 	free(n);
 }
 
 static inline void* node_user_data(Node* n)
 {
 	return (u8*)n + sizeof(Node);
 }
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // thread-local storage for SMR
 //
 //////////////////////////////////////////////////////////////////////////////
 
 static pthread_key_t tls_key;
 static pthread_once_t tls_once = PTHREAD_ONCE_INIT;
 
 struct TLS
 {
 	TLS* next;
 
 	void* hp[NUM_HPS];
 	uintptr_t active;	// used as bool, but set by CAS
 
 	Node* retired_nodes[MAX_RETIRED];
 	size_t num_retired_nodes;
 };
 
 static TLS* tls_list = 0;
 
 
 // mark a participating thread's slot as unused; clear its hazard pointers.
 // called during smr_try_shutdown and when a thread exits
 // (by pthread dtor, which is registered in tls_init).
 static void tls_retire(void* tls_)
 {
 	TLS* tls = (TLS*)tls_;
 
 	// our hazard pointers are no longer in use
 	for(size_t i = 0; i < NUM_HPS; i++)
 		tls->hp[i] = 0;
 
 	// successfully marked as unused (must only decrement once)
 	if(CAS(&tls->active, 1, 0))
 	{
 		atomic_add(&active_threads, -1);
 		debug_assert(active_threads >= 0);
 	}
 }
 
 
 // (called via pthread_once from tls_get)
 static void tls_init()
 {
 	WARN_ERR(pthread_key_create(&tls_key, tls_retire));
 }
 
 
 // free all TLS info. called by smr_try_shutdown.
 static void tls_shutdown()
 {
 	WARN_ERR(pthread_key_delete(tls_key));
 	tls_key = 0;
 
 	while(tls_list)
 	{
 		TLS* tls = tls_list;
 		tls_list = tls->next;
 		free(tls);
 	}
 }
 
 
 // return a new TLS struct ready for use; either a previously
 // retired slot, or if none are available, a newly allocated one.
 // if out of memory, return (TLS*)-1; see fail path.
 // called from tls_get after tls_init.
 static TLS* tls_alloc()
 {
 	// make sure we weren't shut down in the meantime - re-init isn't
 	// possible since pthread_once (which can't be reset) calls tls_init.
 	debug_assert(tls_key != 0);
 
 	TLS* tls;
 
 	// try to reuse a retired TLS slot
 	for(tls = tls_list; tls; tls = tls->next)
 		// .. succeeded in reactivating one.
 		if(CAS(&tls->active, 0, 1))
 			goto have_tls;
 
 	// no unused slots available - allocate another
 	{
 	tls = (TLS*)calloc(1, sizeof(TLS));
 	// .. not enough memory. poison the thread's TLS value to
 	//    prevent a later tls_get from succeeding, because that
 	//    would potentially break the user's LF data structure.
 	if(!tls)
 	{
 		tls = (TLS*)-1;
 		WARN_ERR(pthread_setspecific(tls_key, tls));
 		return tls;
 	}
 	tls->active = 1;
 	// insert at front of list (wait free since # threads is finite).
 	TLS* old_tls_list;
 	do
 	{
 		old_tls_list = tls_list;
 		tls->next = old_tls_list;
 	}
 	while(!CAS(&tls_list, old_tls_list, tls));
 	}
 
 
 have_tls:
 	atomic_add(&active_threads, 1);
 
 	WARN_ERR(pthread_setspecific(tls_key, tls));
 	return tls;
 }
 
 
 // return this thread's struct TLS, or (TLS*)-1 if tls_alloc failed.
 // called from each lfl_* function, so don't waste any time.
 static TLS* tls_get()
 {
 	WARN_ERR(pthread_once(&tls_once, tls_init));
 
 	// already allocated or tls_alloc failed.
 	TLS* tls = (TLS*)pthread_getspecific(tls_key);
 	if(tls)
 		return tls;
 
 	// first call: return a newly allocated slot.
 	return tls_alloc();
 }
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // "Safe Memory Reclamation for Lock-Free Objects" via hazard pointers
 //
 //////////////////////////////////////////////////////////////////////////////
 
 // is one of the hazard pointers in <hps> pointing at <node>?
 static bool is_node_referenced(Node* node, void** hps, size_t num_hps)
 {
 	for(size_t i = 0; i < num_hps; i++)
 		if(hps[i] == node)
 			return true;
 
 	return false;
 }
 
 
 // "Scan"
 // run through all retired nodes in this thread's freelist; any of them
 // not currently referenced are released (their memory freed).
 static void smr_release_unreferenced_nodes(TLS* tls)
 {
 	// nothing to do, and taking address of array[-1] isn't portable.
 	// we're called from smr_try_shutdown,
 	if(tls->num_retired_nodes == 0)
 		return;
 
 	// required for head/tail below; guaranteed by callers.
 	debug_assert(tls->num_retired_nodes != 0);
 
 	//
 	// build array of all active (non-NULL) hazard pointers (more efficient
 	// than walking through tls_list on every is_node_referenced call)
 	//
 retry:
 	const size_t max_hps = (active_threads+3) * NUM_HPS;
 		// allow for creating a few additional threads during the loop
 	void** hps = (void**)alloca(max_hps * sizeof(void*));
 	size_t num_hps = 0;
 	// for each participating thread:
 	for(TLS* t = tls_list; t; t = t->next)
 		// for each of its non-NULL hazard pointers:
 		for(int i = 0; i < NUM_HPS-1; i++)
 		{
 			void* hp = t->hp[i];
 			if(!hp)
 				continue;
 
 			// many threads were created after choosing max_hps =>
 			// start over. this won't realistically happen, though.
 			if(num_hps >= max_hps)
 			{
 				debug_warn("max_hps overrun - why?");
 				goto retry;
 			}
 
 			hps[num_hps++] = hp;
 		}
 
 	//
 	// free all discarded nodes that are no longer referenced
 	// (i.e. no element in hps[] points to them). no need to lock or
 	// clone the retired_nodes list since it's in TLS.
 	//
 	Node** head = tls->retired_nodes;
 	Node** tail = head + tls->num_retired_nodes-1;
 	while(head <= tail)
 	{
 		Node* node = *head;
 		// still in use - just skip to the next
 		if(is_node_referenced(node, hps, num_hps))
 			head++;
 		else
 		{
 			node_free(node);
 
 			// to avoid holes in the freelist, replace with last entry.
 			// this is easier than building a new list.
 			*head = *tail;	// if last element, no-op
 			tail--;
 			tls->num_retired_nodes--;
 		}
 	}
 }
 
 
 // note: we don't implement "HelpScan" - it is sufficient for the
 // freelists in retired but never-reused TLS slots to be emptied at exit,
 // since huge spikes of active threads are unrealistic.
 static void smr_retire_node(Node* node)
 {
 	TLS* tls = tls_get();
 	debug_assert(tls != (void*)-1);
 		// if this triggers, tls_alloc called from lfl_init failed due to
 		// lack of memory and the caller didn't check its return value.
 
 	tls->retired_nodes[tls->num_retired_nodes++] = node;
 	if(tls->num_retired_nodes >= MAX_RETIRED)
 		smr_release_unreferenced_nodes(tls);
 }
 
 
 //
 // shutdown
 //
 
 // although not strictly necessary (the OS will free resources at exit),
 // we want to free all nodes and TLS to avoid spamming leak detectors.
 // that can only happen after our users indicate all data structures are
 // no longer in use (i.e. active_data_structures == 0).
 //
 // problem: if the first user of a data structure is finished before
 // program termination, we'd shut down and not be able to reinitialize
 // (see tls_alloc). therefore, we don't shut down before
 // static destructors are called, i.e. end of program is at hand.
 
 static bool is_static_dtor_time = false;
 
 // call when a data structure is freed (i.e. no longer in use);
 // we shut down if it is time to do so.
 static void smr_try_shutdown()
 {
 	// shouldn't or can't shut down yet.
 	if(!is_static_dtor_time || active_data_structures != 0)
 		return;
 
 	for(TLS* t = tls_list; t; t = t->next)
 	{
 		tls_retire(t);
 			// wipe out hazard pointers so that everything can be freed.
 		smr_release_unreferenced_nodes(t);
 	}
 
 	tls_shutdown();
 }
 
 // non-local static object - its destructor being called indicates
 // program end is at hand. could use atexit for this, but registering
 // that would be a bit more work.
 static struct NLSO
 {
 	NLSO()
 	{
 	}
 
 	~NLSO()
 	{
 		is_static_dtor_time = true;
 
 		// trigger shutdown in case all data structures have
 		// already been freed.
 		smr_try_shutdown();
 	}
 }
 nlso;
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // lock-free singly linked list
 //
 //////////////////////////////////////////////////////////////////////////////
 
 // output of lfl_lookup
 struct ListPos
 {
 	Node** pprev;
 	Node* cur;
 	Node* next;
 };
 
 
 // we 'mark' the next pointer of a retired node to prevent linking
 // to it in concurrent inserts. since all pointers returned by malloc are
 // at least 2-byte aligned, we can use the least significant bit.
 static inline bool is_marked_as_deleted(Node* p)
 {
 	const uintptr_t u = (uintptr_t)p;
 	return (u & BIT(0)) != 0;
 }
 
 static inline Node* with_mark(Node* p)
 {
 	debug_assert(!is_marked_as_deleted(p));	// paranoia
 	return p+1;
 }
 
 static inline Node* without_mark(Node* p)
 {
 	debug_assert(is_marked_as_deleted(p));	// paranoia
 	return p-1;
 }
 
 
 
 // make ready a previously unused(!) list object. if a negative error
 // code (currently only ERR_NO_MEM) is returned, the list can't be used.
 LibError lfl_init(LFList* list)
 {
 	// make sure a TLS slot has been allocated for this thread.
 	// if not (out of memory), the list object must not be used -
 	// other calls don't have a "tls=0" failure path.
 	// (it doesn't make sense to allow some calls to fail until more
 	// memory is available, since that might leave the list in an
 	// invalid state or leak memory)
 	TLS* tls = tls_get();
 	if(!tls)
 	{
 		list->head = (void*)-1;	// 'poison' prevents further use
 		return ERR_NO_MEM;
 	}
 
 	list->head = 0;
 	atomic_add(&active_data_structures, 1);
 	return ERR_OK;
 }
 
 
 // call when list is no longer needed; should no longer hold any references.
 void lfl_free(LFList* list)
 {
 	// TODO: is this iteration safe?
 	Node* cur = (Node*)list->head;
 	while(cur)
 	{
 		Node* next = cur->next;
 			// must latch before smr_retire_node, since that may
 			// actually free the memory.
 		smr_retire_node(cur);
 		cur = next;
 	}
 
 	atomic_add(&active_data_structures, -1);
 	debug_assert(active_data_structures >= 0);
 	smr_try_shutdown();
 }
 
 
 // "Find"
 // look for a given key in the list; return true iff found.
 // pos points to the last inspected node and its successor and predecessor.
 static bool list_lookup(LFList* list, uintptr_t key, ListPos* pos)
 {
 	TLS* tls = tls_get();
 	debug_assert(tls != (void*)-1);
 		// if this triggers, tls_alloc called from lfl_init failed due to
 		// lack of memory and the caller didn't check its return value.
 
 	void** hp0 = &tls->hp[0];	// protects cur
 	void** hp1 = &tls->hp[1];	// protects *pprev
 
 retry:
 	pos->pprev = (Node**)&list->head;
 		// linearization point of erase and find if list is empty.
 		// already protected by virtue of being the root node.
 	pos->cur = *pos->pprev;
 
 	// until end of list:
 	while(pos->cur)
 	{
 		*hp0 = pos->cur;
 
 		// pprev changed (<==> *pprev or cur was removed) => start over.
 		// lock-free, since other threads thereby make progress.
 		if(*pos->pprev != pos->cur)
 			goto retry;
 
 		pos->next = pos->cur->next;
 			// linearization point of the following if list is not empty:
 			// unsuccessful insert or erase; find.
 
 		// this node has been removed from the list; retire it before
 		// continuing (we don't want to add references to it).
 		if(is_marked_as_deleted(pos->next))
 		{
 			Node* next = without_mark(pos->next);
 			if(!CAS(pos->pprev, pos->cur, next))
 				goto retry;
 
 			smr_retire_node(pos->cur);
 			pos->cur = next;
 		}
 		else
 		{
 			// (see above goto)
 			if(*pos->pprev != pos->cur)
 				goto retry;
 
 			// the nodes are sorted in ascending key order, so we've either
 			// found <key>, or it's not in the list.
 			const uintptr_t cur_key = pos->cur->key;
 			if(cur_key >= key)
 				return (cur_key == key);
 
 			pos->pprev = &pos->cur->next;
 			pos->cur   = pos->next;
 
 			// protect pprev in the subsequent iteration; it has assumed an
 			// arithmetic variation of cur (adding offsetof(Node, next)).
 			// note that we don't need to validate *pprev, since *hp0 is
 			// already protecting cur.
 			std::swap(hp0, hp1);
 		}
 	}
 
 	// hit end of list => not found.
 	return false;
 }
 
 
 // return pointer to "user data" attached to <key>,
 // or 0 if not found in the list.
 void* lfl_find(LFList* list, uintptr_t key)
 {
 	ListPos* pos = (ListPos*)alloca(sizeof(ListPos));
 	if(!list_lookup(list, key, pos))
 		return 0;
 	return node_user_data(pos->cur);
 }
 
 
 // insert into list in order of increasing key. ensures items are unique
 // by first checking if already in the list. returns 0 if out of memory,
 // otherwise a pointer to "user data" attached to <key>. the optional
 // <was_inserted> return variable indicates whether <key> was added.
 void* lfl_insert(LFList* list, uintptr_t key, size_t additional_bytes, int* was_inserted)
 {
 	TLS* tls = tls_get();
 	debug_assert(tls != (void*)-1);
 		// if this triggers, tls_alloc called from lfl_init failed due to
 		// lack of memory and the caller didn't check its return value.
 
 	ListPos* pos = (ListPos*)alloca(sizeof(ListPos));
 
 	Node* node = 0;
 	if(was_inserted)
 		*was_inserted = 0;
 
 retry:
 	// already in list - return it and leave <was_inserted> 'false'
 	if(list_lookup(list, key, pos))
 	{
 		// free in case we allocated below, but CAS failed;
 		// no-op if node == 0, i.e. it wasn't allocated.
 		node_free(node);
 
 		node = pos->cur;
 		goto have_node;
 	}
 	// else: not yet in list, so allocate a new Node if we haven't already.
 	// doing that after list_lookup avoids needless alloc/free.
 	if(!node)
 	{
 		node = node_alloc(additional_bytes);
 		// .. out of memory
 		if(!node)
 			return 0;
 	}
 	node->key  = key;
 	node->next = pos->cur;
 
 	// atomic insert immediately before pos->cur. failure implies
 	// at least of the following happened after list_lookup; we try again.
 	// - *pprev was removed (i.e. it's 'marked')
 	// - cur was retired (i.e. no longer reachable from *phead)
 	// - a new node was inserted immediately before cur
 	if(!CAS(pos->pprev, pos->cur, node))
 		goto retry;
 	// else: successfully inserted; linearization point
 	if(was_inserted)
 		*was_inserted = 1;
 
 have_node:
 	return node_user_data(node);
 }
 
 
 // remove from list; return -1 if not found, or 0 on success.
 LibError lfl_erase(LFList* list, uintptr_t key)
 {
 	TLS* tls = tls_get();
 	debug_assert(tls != (void*)-1);
 		// if this triggers, tls_alloc called from lfl_init failed due to
 		// lack of memory and the caller didn't check its return value.
 
 	ListPos* pos = (ListPos*)alloca(sizeof(ListPos));
 
 retry:
 	// not found in list - abort.
 	if(!list_lookup(list, key, pos))
 		return ERR_FAIL;
 	// mark as removed (avoids subsequent linking to it). failure implies
 	// at least of the following happened after list_lookup; we try again.
 	// - next was removed
 	// - cur was retired (i.e. no longer reachable from *phead)
 	// - a new node was inserted immediately after cur
 	if(!CAS(&pos->cur->next, pos->next, with_mark(pos->next)))
 		goto retry;
 	// remove from list; if successful, this is the
 	// linearization point and *pprev isn't marked.
 	if(CAS(pos->pprev, pos->cur, pos->next))
 		smr_retire_node(pos->cur);
 	// failed: another thread removed cur after it was marked above.
 	// call list_lookup to ensure # non-released nodes < # threads.
 	else
 		list_lookup(list, key, pos);
 	return ERR_OK;
 }
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // lock-free hash table
 //
 //////////////////////////////////////////////////////////////////////////////
 
 // note: implemented via lfl, so we don't need to track
 // active_data_structures or call smr_try_shutdown here.
 
 static void validate(LFHash* hash)
 {
 	debug_assert(hash->tbl);
 	debug_assert(is_pow2(hash->mask+1));
 }
 
 // return hash "chain" (i.e. linked list) that is assigned to <key>.
 static LFList* chain(LFHash* hash, uintptr_t key)
 {
 	validate(hash);
 	return &hash->tbl[key & hash->mask];
 }
 
 
 // make ready a previously unused(!) hash object. table size will be
 // <num_entries>; this cannot currently be expanded. if a negative error
 // code (currently only ERR_NO_MEM) is returned, the hash can't be used.
 LibError lfh_init(LFHash* hash, size_t num_entries)
 {
 	hash->tbl  = 0;
 	hash->mask = ~0u;
 
 	if(!is_pow2((long)num_entries))
 	{
 		debug_warn("lfh_init: size must be power of 2");
 		return ERR_INVALID_PARAM;
 	}
 
 	hash->tbl = (LFList*)malloc(sizeof(LFList) * num_entries);
 	if(!hash->tbl)
 		return ERR_NO_MEM;
 	hash->mask = (uint)num_entries-1;
 
 	for(int i = 0; i < (int)num_entries; i++)
 	{
 		int err = lfl_init(&hash->tbl[i]);
 		if(err < 0)
 		{
 			// failed - free all and bail
 			for(int j = 0; j < i; j++)
 				lfl_free(&hash->tbl[j]);
 			return ERR_NO_MEM;
 		}
 	}
 
 	return ERR_OK;
 }
 
 
 // call when hash is no longer needed; should no longer hold any references.
 void lfh_free(LFHash* hash)
 {
 	validate(hash);
 
 	// free all chains
 	for(size_t i = 0; i < hash->mask+1; i++)
 		lfl_free(&hash->tbl[i]);
 
 	free(hash->tbl);
 	hash->tbl  = 0;
 	hash->mask = 0;
 }
 
 
 // return pointer to "user data" attached to <key>,
 // or 0 if not found in the hash.
 void* lfh_find(LFHash* hash, uintptr_t key)
 {
 	return lfl_find(chain(hash,key), key);
 }
 
 
 // insert into hash if not already present. returns 0 if out of memory,
 // otherwise a pointer to "user data" attached to <key>. the optional
 // <was_inserted> return variable indicates whether <key> was added.
 void* lfh_insert(LFHash* hash, uintptr_t key, size_t additional_bytes, int* was_inserted)
 {
 	return lfl_insert(chain(hash,key), key, additional_bytes, was_inserted);
 }
 
 
 // remove from hash; return -1 if not found, or 0 on success.
 LibError lfh_erase(LFHash* hash, uintptr_t key)
 {
 	return lfl_erase(chain(hash,key), key);
 }
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// built-in self test
-//
-//////////////////////////////////////////////////////////////////////////////
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-#define TEST_CALL(expr) TEST(expr == 0)
-
-// make sure the data structures work at all; doesn't test thread-safety.
-static void basic_single_threaded_test()
-{
-	void* user_data;
-
-	const uint ENTRIES = 50;
-	// should be more than max # retired nodes to test release..() code
-	uintptr_t key = 0x1000;
-	uint sig = 10;
-
-	LFList list;
-	TEST_CALL(lfl_init(&list));
-
-	LFHash hash;
-	TEST_CALL(lfh_init(&hash, 8));
-
-	// add some entries; store "signatures" (ascending int values)
-	for(uint i = 0; i < ENTRIES; i++)
-	{
-		int was_inserted;
-
-		user_data = lfl_insert(&list, key+i, sizeof(int), &was_inserted);
-		TEST(user_data != 0 && was_inserted);
-		*(uint*)user_data = sig+i;
-
-		user_data = lfh_insert(&hash, key+i, sizeof(int), &was_inserted);
-		TEST(user_data != 0 && was_inserted);
-		*(uint*)user_data = sig+i;
-	}
-
-	// make sure all "signatures" are present in list
-	for(uint i = 0; i < ENTRIES; i++)
-	{
-		user_data = lfl_find(&list, key+i);
-		TEST(user_data != 0);
-		TEST(*(uint*)user_data == sig+i);
-
-		user_data = lfh_find(&hash, key+i);
-		TEST(user_data != 0);
-		TEST(*(uint*)user_data == sig+i);
-
-	}
-
-	lfl_free(&list);
-	lfh_free(&hash);
-}
-
-
-//
-// multithreaded torture test
-//
-
-// poor man's synchronization "barrier"
-static bool is_complete;
-static intptr_t num_active_threads;
-
-static LFList list;
-static LFHash hash;
-
-typedef std::set<uintptr_t> KeySet; 
-typedef KeySet::const_iterator KeySetIt;
-static KeySet keys;
-static pthread_mutex_t mutex;	// protects <keys>
-
-
-static void* thread_func(void* arg)
-{
-	debug_set_thread_name("LF_test");
-
-	const uintptr_t thread_number = (uintptr_t)arg;
-
-	atomic_add(&num_active_threads, 1);
-
-	// chosen randomly every iteration (int_value % 4)
-	enum TestAction
-	{
-		TA_FIND   = 0,
-		TA_INSERT = 1,
-		TA_ERASE  = 2,
-		TA_SLEEP  = 3
-	};
-	static const char* const action_strings[] =
-	{
-		"find", "insert", "erase", "sleep"
-	};
-
-	while(!is_complete)
-	{
-		void* user_data;
-
-		const int action            = rand(0, 4);
-		const uintptr_t key         = rand(0, 100);
-		const int sleep_duration_ms = rand(0, 100);
-		debug_printf("thread %d: %s\n", thread_number, action_strings[action]);
-
-		//
-		pthread_mutex_lock(&mutex);
-		const bool was_in_set = keys.find(key) != keys.end();
-		if(action == TA_INSERT)
-			keys.insert(key);
-		else if(action == TA_ERASE)
-			keys.erase(key);
-		pthread_mutex_unlock(&mutex);
-
-		switch(action)
-		{
-		case TA_FIND:
-			{
-			user_data = lfl_find(&list, key);
-			TEST(was_in_set == (user_data != 0));
-			if(user_data)
-				TEST(*(uintptr_t*)user_data == ~key);
-
-			user_data = lfh_find(&hash, key);
-			// typical failure site if lockfree data structure has bugs.
-			TEST(was_in_set == (user_data != 0));
-			if(user_data)
-				TEST(*(uintptr_t*)user_data == ~key);
-			}
-			break;
-
-		case TA_INSERT:
-			{
-			int was_inserted;
-
-			user_data = lfl_insert(&list, key, sizeof(uintptr_t), &was_inserted);
-			TEST(user_data != 0);	// only triggers if out of memory
-			*(uintptr_t*)user_data = ~key;	// checked above
-			TEST(was_in_set == !was_inserted);
-
-			user_data = lfh_insert(&hash, key, sizeof(uintptr_t), &was_inserted);
-			TEST(user_data != 0);	// only triggers if out of memory
-			*(uintptr_t*)user_data = ~key;	// checked above
-			TEST(was_in_set == !was_inserted);
-			}
-			break;
-
-		case TA_ERASE:
-			{
-			int err;
-
-			err = lfl_erase(&list, key);
-			TEST(was_in_set == (err == ERR_OK));
-
-			err = lfh_erase(&hash, key);
-			TEST(was_in_set == (err == ERR_OK));
-			}
-			break;
-
-		case TA_SLEEP:
-			usleep(sleep_duration_ms*1000);
-			break;
-
-		default:
-			DISPLAY_ERROR(L"invalid TA_* action");
-			break;
-		}	// switch
-	}	// while !is_complete
-
-	atomic_add(&num_active_threads, -1);
-	TEST(num_active_threads >= 0);
-
-	return 0;
-}
-
-
-static void multithreaded_torture_test()
-{
-	// this test is randomized; we need deterministic results.
-	srand(1);
-
-	static const double TEST_LENGTH = 30.;	// [seconds]
-	const double end_time = get_time() + TEST_LENGTH;
-	is_complete = false;
-
-	WARN_ERR(lfl_init(&list));
-	WARN_ERR(lfh_init(&hash, 128));
-	WARN_ERR(pthread_mutex_init(&mutex, 0));
-
-	// spin off test threads (many, to force preemption)
-	const uint NUM_THREADS = 16;
-	for(uintptr_t i = 0; i < NUM_THREADS; i++)
-		pthread_create(0, 0, thread_func, (void*)i);
-
-	// wait until time interval elapsed (if we get that far, all is well).
-	while(get_time() < end_time)
-		usleep(10*1000);
-
-	// signal and wait for all threads to complete (poor man's barrier -
-	// those aren't currently implemented in wpthread).
-	is_complete = true;
-	while(num_active_threads > 0)
-		usleep(5*1000);
-
-	lfl_free(&list);
-	lfh_free(&hash);
-	WARN_ERR(pthread_mutex_destroy(&mutex));
-}
-
-
-static void self_test()
-{
-	basic_single_threaded_test();
-	multithreaded_torture_test();
-}
-
-SELF_TEST_RUN;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
Index: ps/trunk/source/maths/Matrix3D.cpp
===================================================================
--- ps/trunk/source/maths/Matrix3D.cpp	(revision 3910)
+++ ps/trunk/source/maths/Matrix3D.cpp	(revision 3911)
@@ -1,597 +1,526 @@
 //***********************************************************
 //
 // Name:		Matrix3D.Cpp
 // Last Update:	31/1/02
 // Author:		Poya Manouchehri
 //
 // Description: A Matrix class used for holding and 
 //				manipulating transformation info.
 //
 //***********************************************************
 
 #include "precompiled.h"
 
 #include "Matrix3D.h"
 #include "Quaternion.h"
 #include "self_test.h"
 
 
 CMatrix3D::CMatrix3D ()
 {
 }
 
 CMatrix3D::CMatrix3D(float a11,float a12,float a13,float a14,float a21,float a22,float a23,float a24,
 			float a31,float a32,float a33,float a34,float a41,float a42,float a43,float a44)
 {
 	_11=a11;
 	_12=a12;
 	_13=a13;
 	_14=a14;
 
 	_21=a21;
 	_22=a22;
 	_23=a23;
 	_24=a24;
 
 	_31=a31;
 	_32=a32;
 	_33=a33;
 	_34=a34;
 
 	_41=a41;
 	_42=a42;
 	_43=a43;
 	_44=a44;
 }
 
 
 CMatrix3D::CMatrix3D(float data[])
 {
 	for(int i=0; i<16; i++)
 	{
 		_data[i] = data[i];
 	}
 }
 
 //Matrix multiplication
 CMatrix3D CMatrix3D::operator*(const CMatrix3D& matrix) const
 {
 	return CMatrix3D(
 		_11*matrix._11 + _12*matrix._21 + _13*matrix._31 + _14*matrix._41,
 		_11*matrix._12 + _12*matrix._22 + _13*matrix._32 + _14*matrix._42,
 		_11*matrix._13 + _12*matrix._23 + _13*matrix._33 + _14*matrix._43,
 		_11*matrix._14 + _12*matrix._24 + _13*matrix._34 + _14*matrix._44,
 
 		_21*matrix._11 + _22*matrix._21 + _23*matrix._31 + _24*matrix._41,
 		_21*matrix._12 + _22*matrix._22 + _23*matrix._32 + _24*matrix._42,
 		_21*matrix._13 + _22*matrix._23 + _23*matrix._33 + _24*matrix._43,
 		_21*matrix._14 + _22*matrix._24 + _23*matrix._34 + _24*matrix._44,
 
 		_31*matrix._11 + _32*matrix._21 + _33*matrix._31 + _34*matrix._41,
 		_31*matrix._12 + _32*matrix._22 + _33*matrix._32 + _34*matrix._42,
 		_31*matrix._13 + _32*matrix._23 + _33*matrix._33 + _34*matrix._43,
 		_31*matrix._14 + _32*matrix._24 + _33*matrix._34 + _34*matrix._44,
 
 		_41*matrix._11 + _42*matrix._21 + _43*matrix._31 + _44*matrix._41,
 		_41*matrix._12 + _42*matrix._22 + _43*matrix._32 + _44*matrix._42,
 		_41*matrix._13 + _42*matrix._23 + _43*matrix._33 + _44*matrix._43,
 		_41*matrix._14 + _42*matrix._24 + _43*matrix._34 + _44*matrix._44
 	);
 }
 
 //Matrix multiplication/assignment
 CMatrix3D& CMatrix3D::operator*=(const CMatrix3D& matrix)
 {
 	Concatenate(matrix);	
 	return *this;
 }
 
 //Matrix scaling
 CMatrix3D CMatrix3D::operator*(float f) const
 {
 	CMatrix3D tmp;
 	for (int i=0;i<16;i++) {
 		tmp._data[i]=_data[i]*f;
 	}
 	return tmp;
 }
 
 //Matrix scaling/assignment
 CMatrix3D& CMatrix3D::operator*=(float f) 
 {
 	for (int i=0;i<16;i++) {
 		_data[i]*=f;
 	}
 	return *this;
 }
 
 //Matrix addition
 CMatrix3D CMatrix3D::operator+(const CMatrix3D& m) const
 {
 	CMatrix3D tmp;
 	for (int i=0;i<16;i++) {
 		tmp._data[i]=_data[i]+m._data[i];
 	}
 	return tmp;
 }
 
 //Matrix addition/assignment
 CMatrix3D& CMatrix3D::operator+=(const CMatrix3D& m) 
 {
 	for (int i=0;i<16;i++) {
 		_data[i]+=m._data[i];
 	}
 	return *this;
 }
 
 //Sets the identity matrix
 void CMatrix3D::SetIdentity ()
 {
 	_11=1.0f; _12=0.0f; _13=0.0f; _14=0.0f;
 	_21=0.0f; _22=1.0f; _23=0.0f; _24=0.0f;
 	_31=0.0f; _32=0.0f; _33=1.0f; _34=0.0f;
 	_41=0.0f; _42=0.0f; _43=0.0f; _44=1.0f;
 }
 
 //Sets the zero matrix
 void CMatrix3D::SetZero ()
 {
 	_11=0.0f; _12=0.0f; _13=0.0f; _14=0.0f;
 	_21=0.0f; _22=0.0f; _23=0.0f; _24=0.0f;
 	_31=0.0f; _32=0.0f; _33=0.0f; _34=0.0f;
 	_41=0.0f; _42=0.0f; _43=0.0f; _44=0.0f;
 }
 
 //The following clear the matrix and set the 
 //rotation of each of the 3 axes
 
 void CMatrix3D::SetXRotation (float angle)
 {
 	float Cos = cosf (angle);
 	float Sin = sinf (angle);
 	
 	_11=1.0f; _12=0.0f; _13=0.0f; _14=0.0f;
 	_21=0.0f; _22=Cos;  _23=-Sin; _24=0.0f;
 	_31=0.0f; _32=Sin;  _33=Cos;  _34=0.0f;
 	_41=0.0f; _42=0.0f; _43=0.0f; _44=1.0f;
 }
 
 void CMatrix3D::SetYRotation (float angle)
 {
 	float Cos = cosf (angle);
 	float Sin = sinf (angle);
 
 	_11=Cos;  _12=0.0f; _13=Sin;  _14=0.0f;
 	_21=0.0f; _22=1.0f; _23=0.0f; _24=0.0f;
 	_31=-Sin; _32=0.0f; _33=Cos;  _34=0.0f;
 	_41=0.0f; _42=0.0f; _43=0.0f; _44=1.0f;
 }
 
 void CMatrix3D::SetZRotation (float angle)
 {
 	float Cos = cosf (angle);
 	float Sin = sinf (angle);
 
 	_11=Cos;  _12=-Sin; _13=0.0f; _14=0.0f;
 	_21=Sin;  _22=Cos;  _23=0.0f; _24=0.0f;
 	_31=0.0f; _32=0.0f; _33=1.0f; _34=0.0f;
 	_41=0.0f; _42=0.0f; _43=0.0f; _44=1.0f;
 }
 
 //The following apply a rotation to the matrix
 //about each of the axes;
 
 void CMatrix3D::RotateX (float angle)
 {
 	CMatrix3D Temp;
 	Temp.SetXRotation (angle);
 	Concatenate(Temp);	
 }
 
 void CMatrix3D::RotateY (float angle)
 {
 	CMatrix3D Temp;
 	Temp.SetYRotation (angle);
 	Concatenate(Temp);
 }
 
 void CMatrix3D::RotateZ (float angle)
 {
 	CMatrix3D Temp;
 	Temp.SetZRotation(angle);
 	Concatenate(Temp);
 }
 
 //Sets the translation of the matrix
 void CMatrix3D::SetTranslation (float x, float y, float z)
 {
 	_11=1.0f; _12=0.0f; _13=0.0f; _14=x;
 	_21=0.0f; _22=1.0f; _23=0.0f; _24=y;
 	_31=0.0f; _32=0.0f; _33=1.0f; _34=z;
 	_41=0.0f; _42=0.0f; _43=0.0f; _44=1.0f;
 }
 
 void CMatrix3D::SetTranslation(const CVector3D& vector)
 {
 	SetTranslation(vector.X, vector.Y, vector.Z);	
 }
 
 //Applies a translation to the matrix
 void CMatrix3D::Translate(float x, float y, float z)
 {
 	CMatrix3D Temp;
 	Temp.SetTranslation(x,y,z);
 	Concatenate(Temp);
 }
 
 void CMatrix3D::Translate(const CVector3D &vector)
 {
 	Translate(vector.X,vector.Y,vector.Z);
 }
 
 void CMatrix3D::Concatenate(const CMatrix3D& m)
 {
 	(*this)=m*(*this);
 }
 
 CVector3D CMatrix3D::GetTranslation() const
 {
 	CVector3D Temp;
 
 	Temp.X = _14;
 	Temp.Y = _24;
 	Temp.Z = _34;
 
 	return Temp;
 }
 
 //Clears and sets the scaling of the matrix
 void CMatrix3D::SetScaling (float x_scale, float y_scale, float z_scale)
 {
 	_11=x_scale; _12=0.0f;	  _13=0.0f;	   _14=0.0f;
 	_21=0.0f;	 _22=y_scale; _23=0.0f;	   _24=0.0f;
 	_31=0.0f;	 _32=0.0f;	  _33=z_scale; _34=0.0f;
 	_41=0.0f;	 _42=0.0f;	  _43=0.0f;    _44=1.0f;
 }
 
 //Scales the matrix
 void CMatrix3D::Scale (float x_scale, float y_scale, float z_scale)
 {
 	CMatrix3D Temp;
 	Temp.SetScaling(x_scale,y_scale,z_scale);
 	Concatenate(Temp);
 }
 
 //Returns the transpose of the matrix. For orthonormal
 //matrices, this is the same is the inverse matrix
 void CMatrix3D::GetTranspose(CMatrix3D& result) const
 {
 	result._11 = _11;
 	result._21 = _12;
 	result._31 = _13;
 	result._41 = _14;
 
 	result._12 = _21;
 	result._22 = _22;
 	result._32 = _23;
 	result._42 = _24;
 
 	result._13 = _31;
 	result._23 = _32;
 	result._33 = _33;
 	result._43 = _34;
 
 	result._14 = _41;
 	result._24 = _42;
 	result._34 = _43;
 	result._44 = _44;
 }
 
 
 //Get a vector which points to the left of the matrix
 CVector3D CMatrix3D::GetLeft () const
 {
 	CVector3D Temp;
 
 	Temp.X = -_11;
 	Temp.Y = -_21;
 	Temp.Z = -_31;
 
 	return Temp;
 }
 
 //Get a vector which points up from the matrix
 CVector3D CMatrix3D::GetUp () const
 {
 	CVector3D Temp;
 
 	Temp.X = _12;
 	Temp.Y = _22;
 	Temp.Z = _32;
 
 	return Temp;
 }
 
 //Get a vector which points to front of the matrix
 CVector3D CMatrix3D::GetIn () const
 {
 	CVector3D Temp;
 
 	Temp.X = _13;
 	Temp.Y = _23;
 	Temp.Z = _33;
 
 	return Temp;
 }
 
 
 //Transform a vector by this matrix
 CVector3D CMatrix3D::Transform (const CVector3D &vector) const
 {
 	CVector3D result;
 	Transform(vector,result);
 	return result;
 }
 
 void CMatrix3D::Transform(const CVector3D& vector,CVector3D& result) const
 {
 	result.X = _11*vector.X + _12*vector.Y + _13*vector.Z + _14;
 	result.Y = _21*vector.X + _22*vector.Y + _23*vector.Z + _24;
 	result.Z = _31*vector.X + _32*vector.Y + _33*vector.Z + _34;
 }
 
 //Transform a vector by this matrix
 CVector4D CMatrix3D::Transform(const CVector4D &vector) const
 {
 	CVector4D result;
 	Transform(vector,result);
 	return result;
 }
 
 void CMatrix3D::Transform(const CVector4D& vector,CVector4D& result) const
 {
 	result[0] = _11*vector[0] + _12*vector[1] + _13*vector[2] + _14*vector[3];
 	result[1] = _21*vector[0] + _22*vector[1] + _23*vector[2] + _24*vector[3];
 	result[2] = _31*vector[0] + _32*vector[1] + _33*vector[2] + _34*vector[3];
 	result[3] = _41*vector[0] + _42*vector[1] + _43*vector[2] + _44*vector[3];
 }
 
 //Only rotate (not translate) a vector by this matrix
 CVector3D CMatrix3D::Rotate(const CVector3D& vector) const
 {
 	CVector3D result;
 	Rotate(vector,result);
 	return result;
 }
 
 void CMatrix3D::Rotate(const CVector3D& vector,CVector3D& result) const
 {
 	result.X = _11*vector.X + _12*vector.Y + _13*vector.Z;
 	result.Y = _21*vector.X + _22*vector.Y + _23*vector.Z;
 	result.Z = _31*vector.X + _32*vector.Y + _33*vector.Z;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // RotateTransposed: rotate a vector by the transpose of this matrix
 CVector3D CMatrix3D::RotateTransposed(const CVector3D& vector) const
 {
 	CVector3D result;
 	RotateTransposed(vector,result);
 	return result;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // RotateTransposed: rotate a vector by the transpose of this matrix
 void CMatrix3D::RotateTransposed(const CVector3D& vector,CVector3D& result) const
 {
 	result.X = _11*vector.X + _21*vector.Y + _31*vector.Z;
 	result.Y = _12*vector.X + _22*vector.Y + _32*vector.Z;
 	result.Z = _13*vector.X + _23*vector.Y + _33*vector.Z;
 }
 
 
 void CMatrix3D::GetInverse(CMatrix3D& dst) const
 {
 	float tmp[12];	// temp array for pairs 
 	float src[16];	// array of transpose source matrix 
 	float det;		// determinant 
 	
 	// transpose matrix 
 	for (int i = 0; i < 4; ++i) {
 		src[i] = _data[i*4];
 		src[i + 4] = _data[i*4 + 1];
 		src[i + 8] = _data[i*4 + 2];
 		src[i + 12] = _data[i*4 + 3];
 	}
 
 	// calculate pairs for first 8 elements (cofactors) 
 	tmp[0] = src[10] * src[15];
 	tmp[1] = src[11] * src[14];
 	tmp[2] = src[9] * src[15];
 	tmp[3] = src[11] * src[13];
 	tmp[4] = src[9] * src[14];
 	tmp[5] = src[10] * src[13];
 	tmp[6] = src[8] * src[15];
 	tmp[7] = src[11] * src[12];
 	tmp[8] = src[8] * src[14];
 	tmp[9] = src[10] * src[12];
 	tmp[10] = src[8] * src[13];
 	tmp[11] = src[9] * src[12];
 	
 	// calculate first 8 elements (cofactors)
 	dst._data[0] = tmp[0]*src[5] + tmp[3]*src[6] + tmp[4]*src[7];
 	dst._data[0] -= tmp[1]*src[5] + tmp[2]*src[6] + tmp[5]*src[7];
 	dst._data[1] = tmp[1]*src[4] + tmp[6]*src[6] + tmp[9]*src[7];
 	dst._data[1] -= tmp[0]*src[4] + tmp[7]*src[6] + tmp[8]*src[7];
 	dst._data[2] = tmp[2]*src[4] + tmp[7]*src[5] + tmp[10]*src[7];
 	dst._data[2] -= tmp[3]*src[4] + tmp[6]*src[5] + tmp[11]*src[7];
 	dst._data[3] = tmp[5]*src[4] + tmp[8]*src[5] + tmp[11]*src[6];
 	dst._data[3] -= tmp[4]*src[4] + tmp[9]*src[5] + tmp[10]*src[6];
 	dst._data[4] = tmp[1]*src[1] + tmp[2]*src[2] + tmp[5]*src[3];
 	dst._data[4] -= tmp[0]*src[1] + tmp[3]*src[2] + tmp[4]*src[3];
 	dst._data[5] = tmp[0]*src[0] + tmp[7]*src[2] + tmp[8]*src[3];
 	dst._data[5] -= tmp[1]*src[0] + tmp[6]*src[2] + tmp[9]*src[3];
 	dst._data[6] = tmp[3]*src[0] + tmp[6]*src[1] + tmp[11]*src[3];
 	dst._data[6] -= tmp[2]*src[0] + tmp[7]*src[1] + tmp[10]*src[3];
 	dst._data[7] = tmp[4]*src[0] + tmp[9]*src[1] + tmp[10]*src[2];
 	dst._data[7] -= tmp[5]*src[0] + tmp[8]*src[1] + tmp[11]*src[2];
 	
 	// calculate pairs for second 8 elements (cofactors) 
 	tmp[0] = src[2]*src[7];
 	tmp[1] = src[3]*src[6];
 	tmp[2] = src[1]*src[7];
 	tmp[3] = src[3]*src[5];
 	tmp[4] = src[1]*src[6];
 	tmp[5] = src[2]*src[5];
 	tmp[6] = src[0]*src[7];
 	tmp[7] = src[3]*src[4];
 	tmp[8] = src[0]*src[6];
 	tmp[9] = src[2]*src[4];
 	tmp[10] = src[0]*src[5];
 	tmp[11] = src[1]*src[4];
 
 	// calculate second 8 elements (cofactors) 
 	dst._data[8] = tmp[0]*src[13] + tmp[3]*src[14] + tmp[4]*src[15];
 	dst._data[8] -= tmp[1]*src[13] + tmp[2]*src[14] + tmp[5]*src[15];
 	dst._data[9] = tmp[1]*src[12] + tmp[6]*src[14] + tmp[9]*src[15];
 	dst._data[9] -= tmp[0]*src[12] + tmp[7]*src[14] + tmp[8]*src[15];
 	dst._data[10] = tmp[2]*src[12] + tmp[7]*src[13] + tmp[10]*src[15];
 	dst._data[10]-= tmp[3]*src[12] + tmp[6]*src[13] + tmp[11]*src[15];
 	dst._data[11] = tmp[5]*src[12] + tmp[8]*src[13] + tmp[11]*src[14];
 	dst._data[11]-= tmp[4]*src[12] + tmp[9]*src[13] + tmp[10]*src[14];
 	dst._data[12] = tmp[2]*src[10] + tmp[5]*src[11] + tmp[1]*src[9];
 	dst._data[12]-= tmp[4]*src[11] + tmp[0]*src[9] + tmp[3]*src[10];
 	dst._data[13] = tmp[8]*src[11] + tmp[0]*src[8] + tmp[7]*src[10];
 	dst._data[13]-= tmp[6]*src[10] + tmp[9]*src[11] + tmp[1]*src[8];
 	dst._data[14] = tmp[6]*src[9] + tmp[11]*src[11] + tmp[3]*src[8];
 	dst._data[14]-= tmp[10]*src[11] + tmp[2]*src[8] + tmp[7]*src[9];
 	dst._data[15] = tmp[10]*src[10] + tmp[4]*src[8] + tmp[9]*src[9];
 	dst._data[15]-= tmp[8]*src[9] + tmp[11]*src[10] + tmp[5]*src[8];
 
 	// calculate matrix inverse 
 	det=src[0]*dst._data[0]+src[1]*dst._data[1]+src[2]*dst._data[2]+src[3]*dst._data[3];
 	det = 1/det;
 	for ( int j = 0; j < 16; j++) {
 		dst._data[j] *= det;
 	}
 }
 
 void CMatrix3D::Rotate(const CQuaternion& quat)
 {
 	CMatrix3D rotationMatrix=quat.ToMatrix();
 	Concatenate(rotationMatrix);
 }
 
 CQuaternion CMatrix3D::GetRotation() const
 {
 	float tr = _data2d[0][0] + _data2d[1][1] + _data2d[2][2];
 
 	int next[] = { 1, 2, 0 };
 
 	float quat[4];
 
 	if (tr > 0.f)
 	{
 		float s = sqrtf(tr + 1.f);
 		quat[3] = s * 0.5f;
 		s = 0.5f / s;
 		quat[0] = (_data2d[1][2] - _data2d[2][1]) * s;
 		quat[1] = (_data2d[2][0] - _data2d[0][2]) * s;
 		quat[2] = (_data2d[0][1] - _data2d[1][0]) * s;
 	}
 	else
 	{
 		int i = 0;
 		if (_data2d[1][1] > _data2d[0][0]) i = 1;
 		if (_data2d[2][2] > _data2d[i][i]) i = 2;
 		int j = next[i];
 		int k = next[j];
 
 		float s = sqrtf((_data2d[i][i] - (_data2d[j][j] + _data2d[k][k])) + 1.f);
 		quat[i] = s * 0.5f;
 
 		if (s != 0.f) s = 0.5f / s;
 
 		quat[3] = (_data2d[j][k] - _data2d[k][j]) * s;
 		quat[j] = (_data2d[i][j] + _data2d[j][i]) * s;
 		quat[k] = (_data2d[i][k] + _data2d[k][i]) * s;
 	}
 
 	return CQuaternion(quat[0], quat[1], quat[2], quat[3]);
 }
 
 void CMatrix3D::SetRotation(const CQuaternion& quat)
 {
 	quat.ToMatrix(*this);
 }
-
-
-//----------------------------------------------------------------------------
-// built-in self test
-//----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-static void test_inverse()
-{
-	CMatrix3D m;
-	srand(0);
-	for (int i = 0; i < 4; ++i)
-	{
-		for (int j = 0; j < 16; ++j)
-			m._data[j] = -1.0f + 2.0f*(rand()/(float)RAND_MAX);
-		CMatrix3D n;
-		m.GetInverse(n);
-		m *= n;
-		for (int x = 0; x < 4; ++x)
-			for (int y = 0; y < 4; ++y)
-			{
-				float expected = (x==y)? 1.0f : 0.0f;	// identity should have 1s on diagonal
-				TEST(feq(m(x,y), expected));
-			}
-	}
-}
-
-static void test_quats()
-{
-	srand(0);
-	for (int i = 0; i < 4; ++i)
-	{
-		CQuaternion q;
-		q.FromEulerAngles(
-			-6.28f + 12.56f*(rand()/(float)RAND_MAX),
-			-6.28f + 12.56f*(rand()/(float)RAND_MAX),
-			-6.28f + 12.56f*(rand()/(float)RAND_MAX)
-		);
-		CMatrix3D m;
-		q.ToMatrix(m);
-		CQuaternion q2 = m.GetRotation();
-
-		// I hope there's a good reason why they're sometimes negated, and
-		// it's not just a bug...
-		bool ok_oneway = 
-			feq(q2.m_W, q.m_W) &&
-			feq(q2.m_V.X, q.m_V.X) &&
-			feq(q2.m_V.Y, q.m_V.Y) &&
-			feq(q2.m_V.Z, q.m_V.Z);
-		bool ok_otherway =
-			feq(q2.m_W, -q.m_W) &&
-			feq(q2.m_V.X, -q.m_V.X) &&
-			feq(q2.m_V.Y, -q.m_V.Y) &&
-			feq(q2.m_V.Z, -q.m_V.Z);
-		TEST(ok_oneway ^ ok_otherway);
-	}
-}
-
-
-static void self_test()
-{
-	test_inverse();
-	test_quats();
-}
-
-SELF_TEST_RUN;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
Index: ps/trunk/source/lib/lib.cpp
===================================================================
--- ps/trunk/source/lib/lib.cpp	(revision 3910)
+++ ps/trunk/source/lib/lib.cpp	(revision 3911)
@@ -1,629 +1,562 @@
 /**
  * =========================================================================
  * File        : lib.cpp
  * Project     : 0 A.D.
  * Description : various utility functions.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2003-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 
 #include "lib/types.h"
 #include "lib.h"
 #include "lib/app_hooks.h"
 
 #include "sysdep/sysdep.h"
 
-#ifndef SELF_TEST_ENABLED
-#define SELF_TEST_ENABLED 0
-#endif
-
-
-// FNV1-A hash - good for strings.
-// if len = 0 (default), treat buf as a C-string;
-// otherwise, hash <len> bytes of buf.
-u32 fnv_hash(const void* buf, size_t len)
-{
-	u32 h = 0x811c9dc5u;
-		// give distinct values for different length 0 buffers.
-		// value taken from FNV; it has no special significance.
-
-	const u8* p = (const u8*)buf;
-
-	// expected case: string
-	if(!len)
-	{
-		while(*p)
-		{
-			h ^= *p++;
-			h *= 0x01000193u;
-		}
-	}
-	else
-	{
-		size_t bytes_left = len;
-		while(bytes_left != 0)
-		{
-			h ^= *p++;
-			h *= 0x01000193u;
-
-			bytes_left--;
-		}
-	}
-
-	return h;
-}
-
-
-// FNV1-A hash - good for strings.
-// if len = 0 (default), treat buf as a C-string;
-// otherwise, hash <len> bytes of buf.
-u64 fnv_hash64(const void* buf, size_t len)
-{
-	u64 h = 0xCBF29CE484222325ull;
-		// give distinct values for different length 0 buffers.
-		// value taken from FNV; it has no special significance.
-
-	const u8* p = (const u8*)buf;
-
-	// expected case: string
-	if(!len)
-	{
-		while(*p)
-		{
-			h ^= *p++;
-			h *= 0x100000001B3ull;
-		}
-	}
-	else
-	{
-		size_t bytes_left = len;
-		while(bytes_left != 0)
-		{
-			h ^= *p++;
-			h *= 0x100000001B3ull;
-
-			bytes_left--;
-		}
-	}
-
-	return h;
-}
-
-
-// special version for strings: first converts to lowercase
-// (useful for comparing mixed-case filenames).
-// note: still need <len>, e.g. to support non-0-terminated strings
-u32 fnv_lc_hash(const char* str, size_t len)
-{
-	u32 h = 0x811c9dc5u;
-		// give distinct values for different length 0 buffers.
-		// value taken from FNV; it has no special significance.
-
-	// expected case: string
-	if(!len)
-	{
-		while(*str)
-		{
-			h ^= tolower(*str++);
-			h *= 0x01000193u;
-		}
-	}
-	else
-	{
-		size_t bytes_left = len;
-		while(bytes_left != 0)
-		{
-			h ^= tolower(*str++);
-			h *= 0x01000193u;
-
-			bytes_left--;
-		}
-	}
-
-	return h;
-}
-
-
-
-
+//-----------------------------------------------------------------------------
+// bit bashing
+//-----------------------------------------------------------------------------
 
 bool is_pow2(uint n)
 {
 	// 0 would pass the test below but isn't a POT.
 	if(n == 0)
 		return false;
 	return (n & (n-1l)) == 0;
 }
 
 
 // return -1 if not an integral power of 2,
 // otherwise the base2 logarithm
 
 int ilog2(uint n)
 {
 	int bit_index;	// return value
 
 #if CPU_IA32 && HAVE_MS_ASM
 
 	__asm
 	{
 		mov		ecx, [n]
 		or		eax, -1			// return value if not a POT
 		test	ecx, ecx
 		jz		not_pot
 		lea		edx, [ecx-1]
 		test	ecx, edx
 		jnz		not_pot
 		bsf		eax, ecx
 	not_pot:
 		mov		[bit_index], eax
 	}
 
 #else
 
 	if(!is_pow2(n))
 		return -1;
 
 	bit_index = 0;
 	// note: compare against n directly because it is known to be a POT.
 	for(uint bit_value = 1; bit_value != n; bit_value *= 2)
 		bit_index++;
 
 #endif
 
 	debug_assert(-1 <= bit_index && bit_index < (int)sizeof(int)*CHAR_BIT);
 	debug_assert(bit_index == -1 || n == (1u << bit_index));
 	return bit_index;
 }
 
-
 // return log base 2, rounded up.
 uint log2(uint x)
 {
 	uint bit = 1;
 	uint l = 0;
 	while(bit < x)
 	{
 		l++;
 		bit += bit;
 	}
 
 	return l;
 }
 
+int ilog2(const float x)
+{
+	const u32 i = *(u32*)&x;
+	u32 biased_exp = (i >> 23) & 0xff;
+	return (int)biased_exp - 127;
+}
+
 
-cassert(sizeof(int)*CHAR_BIT == 32);	// otherwise change round_up_to_pow2
+// round_up_to_pow2 implementation assumes 32-bit int.
+// if 64, add "x |= (x >> 32);"
+cassert(sizeof(int)*CHAR_BIT == 32);
 
 uint round_up_to_pow2(uint x)
 {
 	// fold upper bit into lower bits; leaves same MSB set but
 	// everything below it 1. adding 1 yields next POT.
 	x |= (x >> 1);
 	x |= (x >> 2);
 	x |= (x >> 4);
 	x |= (x >> 8);
 	x |= (x >> 16);
 	return x+1;
 }
 
 
-int ilog2(const float x)
-{
-	const u32 i = *(u32*)&x;
-	u32 biased_exp = (i >> 23) & 0xff;
-	return (int)biased_exp - 127;
-}
-
+//-----------------------------------------------------------------------------
+// misc arithmetic
 
 
 // multiple must be a power of two.
 uintptr_t round_up(const uintptr_t n, const uintptr_t multiple)
 {
 	debug_assert(is_pow2((long)multiple));
 	const uintptr_t result = (n + multiple-1) & ~(multiple-1);
 	debug_assert(n <= result && result < n+multiple);
 	return result;
 }
 
 // multiple must be a power of two.
 uintptr_t round_down(const uintptr_t n, const uintptr_t multiple)
 {
 	debug_assert(is_pow2((long)multiple));
 	const uintptr_t result = n & ~(multiple-1);
 	debug_assert(result <= n && n < result+multiple);
 	return result;
 }
 
 
 u16 addusw(u16 x, u16 y)
 {
 	u32 t = x;
 	return (u16)MIN(t+y, 0xffffu);
 }
 
-
 u16 subusw(u16 x, u16 y)
 {
 	long t = x;
 	return (u16)(MAX(t-y, 0));
 }
 
-// zero-extend <size> (truncated to 8) bytes of little-endian data to u64,
-// starting at address <p> (need not be aligned).
-u64 movzx_64le(const u8* p, size_t size)
-{
-	if(size > 8)
-		size = 8;
 
-	u64 data = 0;
-	for(u64 i = 0; i < MIN(size,8); i++)
-		data |= ((u64)p[i]) << (i*8);
-
-	return data;
-}
+//-----------------------------------------------------------------------------
+// rand
 
+// return random integer in [min, max).
+// avoids several common pitfalls; see discussion at
+// http://www.azillionmonkeys.com/qed/random.html
 
-// sign-extend <size> (truncated to 8) bytes of little-endian data to i64,
-// starting at address <p> (need not be aligned).
-i64 movsx_64le(const u8* p, size_t size)
+// rand() is poorly implemented (e.g. in VC7) and only returns < 16 bits;
+// double that amount by concatenating 2 random numbers.
+// this is not to fix poor rand() randomness - the number returned will be
+// folded down to a much smaller interval anyway. instead, a larger XRAND_MAX
+// decreases the probability of having to repeat the loop.
+#if RAND_MAX < 65536
+static const uint XRAND_MAX = (RAND_MAX+1)*(RAND_MAX+1) - 1;
+static uint xrand()
 {
-	if(size > 8)
-		size = 8;
-
-	u64 data = movzx_64le(p, size);
+	return rand()*(RAND_MAX+1) + rand();
+}
+// rand() is already ok; no need to do anything.
+#else
+static const uint XRAND_MAX = RAND_MAX;
+static uint xrand()
+{
+	return rand();
+}
+#endif
 
-	// no point in sign-extending if >= 8 bytes were requested
-	if(size < 8)
+uint rand(uint min_inclusive, uint max_exclusive)
+{
+	const uint range = (max_exclusive-min_inclusive);
+	// huge interval or min >= max
+	if(range == 0 || range > XRAND_MAX)
 	{
-		u64 sign_bit = 1;
-		sign_bit <<= (size*8)-1;
-		// be sure that we don't shift more than variable's bit width
-
-		// number would be negative in the smaller type,
-		// so sign-extend, i.e. set all more significant bits.
-		if(data & sign_bit)
-		{
-			const u64 size_mask = (sign_bit+sign_bit)-1;
-			data |= ~size_mask;
-		}
+		WARN_ERR(ERR_INVALID_PARAM);
+		return 0;
 	}
 
-	return (i64)data;
+	const uint inv_range = XRAND_MAX / range;
+
+	// generate random number in [0, range)
+	// idea: avoid skewed distributions when <range> doesn't evenly divide
+	// XRAND_MAX by simply discarding values in the "remainder".
+	// not expected to run often since XRAND_MAX is large.
+	uint x;
+	do
+		x = xrand();
+	while(x >= range * inv_range);
+	x /= inv_range;
+
+	x += min_inclusive;
+	debug_assert(x < max_exclusive);
+	return x;
 }
 
 
+//-----------------------------------------------------------------------------
+// type conversion
+
 // these avoid a common mistake in using >> (ANSI requires shift count be
 // less than the bit width of the type).
 
 u32 u64_hi(u64 x)
 {
 	return (u32)(x >> 32);
 }
 
 u32 u64_lo(u64 x)
 {
 	return (u32)(x & 0xFFFFFFFF);
 }
 
 u16 u32_hi(u32 x)
 {
 	return (u16)(x >> 16);
 }
 
 u16 u32_lo(u32 x)
 {
 	return (u16)(x & 0xFFFF);
 }
 
 
-
 u64 u64_from_u32(u32 hi, u32 lo)
 {
 	u64 x = (u64)hi;
 	x <<= 32;
 	x |= lo;
 	return x;
 }
 
 u32 u32_from_u16(u16 hi, u16 lo)
 {
 	u32 x = (u32)hi;
 	x <<= 16;
 	x |= lo;
 	return x;
 }
 
 
+// zero-extend <size> (truncated to 8) bytes of little-endian data to u64,
+// starting at address <p> (need not be aligned).
+u64 movzx_64le(const u8* p, size_t size)
+{
+	size = MIN(size, 8);
+
+	u64 data = 0;
+	for(u64 i = 0; i < size; i++)
+		data |= ((u64)p[i]) << (i*8);
+
+	return data;
+}
+
+// sign-extend <size> (truncated to 8) bytes of little-endian data to i64,
+// starting at address <p> (need not be aligned).
+i64 movsx_64le(const u8* p, size_t size)
+{
+	size = MIN(size, 8);
+
+	u64 data = movzx_64le(p, size);
+
+	// no point in sign-extending if >= 8 bytes were requested
+	if(size < 8)
+	{
+		u64 sign_bit = 1;
+		sign_bit <<= (size*8)-1;
+		// be sure that we don't shift more than variable's bit width
+
+		// number would be negative in the smaller type,
+		// so sign-extend, i.e. set all more significant bits.
+		if(data & sign_bit)
+		{
+			const u64 size_mask = (sign_bit+sign_bit)-1;
+			data |= ~size_mask;
+		}
+	}
+
+	return (i64)data;
+}
 
 
 // input in [0, 1); convert to u8 range
 u8 fp_to_u8(double in)
 {
 	if(!(0.0 <= in && in < 1.0))
 	{
 		debug_warn("clampf not in [0,1)");
 		return 255;
 	}
 
 	int l = (int)(in * 255.0);
 	debug_assert((unsigned int)l <= 255u);
 	return (u8)l;
 }
 
-
 // input in [0, 1); convert to u16 range
 u16 fp_to_u16(double in)
 {
 	if(!(0.0 <= in && in < 1.0))
 	{
 		debug_warn("clampf not in [0,1)");
 		return 65535;
 	}
 
 	long l = (long)(in * 65535.0);
 	debug_assert((unsigned long)l <= 65535u);
 	return (u16)l;
 }
 
 
-
-
+//-----------------------------------------------------------------------------
+// string processing
 
 // big endian!
-void base32(const int len, const u8* in, u8* out)
+void base32(const size_t len, const u8* in, u8* out)
 {
-	int bits = 0;
-	u32 pool = 0;
+	u32 pool = 0;	// of bits from buffer
+	uint bits =	0;	// # bits currently in buffer
 
-	static u8 tbl[33] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
+	static const u8 tbl[33] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
 
-	for(int i = 0; i < len; i++)
+	for(size_t i = 0; i < len; i++)
 	{
 		if(bits < 5)
 		{
 			pool <<= 8;
 			pool |= *in++;
 			bits += 8;
 		}
 
 		bits -= 5;
-		int c = (pool >> bits) & 31;
+		uint c = (pool >> bits) & 31;
 		*out++ = tbl[c];
 	}
 }
 
 
-
-
-
-
-// case-insensitive check if string <s> matches the pattern <w>,
-// which may contain '?' or '*' wildcards. if so, return 1, otherwise 0.
-// idea from http://www.codeproject.com/string/wildcmp.asp .
-// note: NULL wildcard pattern matches everything!
 int match_wildcard(const char* s, const char* w)
 {
 	if(!w)
 		return 1;
 
 	// saved position in both strings, used to expand '*':
 	// s2 is advanced until match.
 	// initially 0 - we abort on mismatch before the first '*'.
 	const char* s2 = 0;
 	const char* w2 = 0;
 
 	while(*s)
 	{
 		const int wc = *w;
 		if(wc == '*')
 		{
 			// wildcard string ended with * => match.
 			if(*++w == '\0')
 				return 1;
 
 			w2 = w;
 			s2 = s+1;
 		}
 		// match one character
 		else if(toupper(wc) == toupper(*s) || wc == '?')
 		{
 			w++;
 			s++;
 		}
 		// mismatched character
 		else
 		{
 			// no '*' found yet => mismatch.
 			if(!s2)
 				return 0;
 
 			// resume at previous position+1
 			w = w2;
 			s = s2++;
 		}
 	}
 
 	// strip trailing * in wildcard string
 	while(*w == '*')
 		w++;
 
 	return (*w == '\0');
 }
 
 int match_wildcardw(const wchar_t* s, const wchar_t* w)
 {
 	if(!w)
 		return 1;
 
 	// saved position in both strings, used to expand '*':
 	// s2 is advanced until match.
 	// initially 0 - we abort on mismatch before the first '*'.
 	const wchar_t* s2 = 0;
 	const wchar_t* w2 = 0;
 
 	while(*s)
 	{
 		const wchar_t wc = *w;
 		if(wc == '*')
 		{
 			// wildcard string ended with * => match.
 			if(*++w == '\0')
 				return 1;
 
 			w2 = w;
 			s2 = s+1;
 		}
 		// match one character
 		else if(towupper(wc) == towupper(*s) || wc == '?')
 		{
 			w++;
 			s++;
 		}
 		// mismatched character
 		else
 		{
 			// no '*' found yet => mismatch.
 			if(!s2)
 				return 0;
 
 			// resume at previous position+1
 			w = w2;
 			s = s2++;
 		}
 	}
 
 	// strip trailing * in wildcard string
 	while(*w == '*')
 		w++;
 
 	return (*w == '\0');
 }
 
 
-
-
-// return random integer in [min, max).
-// avoids several common pitfalls; see discussion at
-// http://www.azillionmonkeys.com/qed/random.html
-
-// rand() is poorly implemented (e.g. in VC7) and only returns < 16 bits;
-// double that amount by concatenating 2 random numbers.
-// this is not to fix poor rand() randomness - the number returned will be
-// folded down to a much smaller interval anyway. instead, a larger XRAND_MAX
-// decreases the probability of having to repeat the loop.
-#if RAND_MAX < 65536
-static const uint XRAND_MAX = (RAND_MAX+1)*(RAND_MAX+1) - 1;
-static uint xrand()
-{
-	return rand()*(RAND_MAX+1) + rand();
-}
-// rand() is already ok; no need to do anything.
-#else
-static const uint XRAND_MAX = RAND_MAX;
-static uint xrand()
+// FNV1-A hash - good for strings.
+// if len = 0 (default), treat buf as a C-string;
+// otherwise, hash <len> bytes of buf.
+u32 fnv_hash(const void* buf, size_t len)
 {
-	return rand();
-}
-#endif
+	u32 h = 0x811c9dc5u;
+	// give distinct values for different length 0 buffers.
+	// value taken from FNV; it has no special significance.
 
-uint rand(uint min_inclusive, uint max_exclusive)
-{
-	const uint range = (max_exclusive-min_inclusive);
-	// huge interval or min >= max
-	if(range == 0 || range > XRAND_MAX)
+	const u8* p = (const u8*)buf;
+
+	// expected case: string
+	if(!len)
 	{
-		WARN_ERR(ERR_INVALID_PARAM);
-		return 0;
+		while(*p)
+		{
+			h ^= *p++;
+			h *= 0x01000193u;
+		}
 	}
+	else
+	{
+		size_t bytes_left = len;
+		while(bytes_left != 0)
+		{
+			h ^= *p++;
+			h *= 0x01000193u;
 
-	const uint inv_range = XRAND_MAX / range;
-
-	// generate random number in [0, range)
-	// idea: avoid skewed distributions when <range> doesn't evenly divide
-	// XRAND_MAX by simply discarding values in the "remainder".
-	// not expected to run often since XRAND_MAX is large.
-	uint x;
-	do
-		x = xrand();
-	while(x >= range * inv_range);
-	x /= inv_range;
+			bytes_left--;
+		}
+	}
 
-	x += min_inclusive;
-	debug_assert(x < max_exclusive);
-	return x;
+	return h;
 }
 
 
-//-----------------------------------------------------------------------------
-// built-in self test
-//-----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-static void test_log2()
+// FNV1-A hash - good for strings.
+// if len = 0 (default), treat buf as a C-string;
+// otherwise, hash <len> bytes of buf.
+u64 fnv_hash64(const void* buf, size_t len)
 {
-	TEST(ilog2(0u) == -1);
-	TEST(ilog2(3u) == -1);
-	TEST(ilog2(0xffffffffu) == -1);
-	TEST(ilog2(1u) == 0);
-	TEST(ilog2(256u) == 8);
-	TEST(ilog2(0x80000000u) == 31);
-}
+	u64 h = 0xCBF29CE484222325ull;
+	// give distinct values for different length 0 buffers.
+	// value taken from FNV; it has no special significance.
 
-static void test_rand()
-{
-	// complain if huge interval or min > max
-	TEST(rand(1, 0) == 0);
-	TEST(rand(2, ~0u) == 0);
+	const u8* p = (const u8*)buf;
 
-	// returned number must be in [min, max)
-	for(int i = 0; i < 100; i++)
+	// expected case: string
+	if(!len)
 	{
-		uint min = rand(), max = min+rand();
-		uint x = rand(min, max);
-		TEST(min <= x && x < max);
+		while(*p)
+		{
+			h ^= *p++;
+			h *= 0x100000001B3ull;
+		}
 	}
-
-	// make sure both possible values are hit
-	uint ones = 0, twos = 0;
-	for(int i = 0; i < 100; i++)
+	else
 	{
-		uint x = rand(1, 3);
-		// paranoia: don't use array (x might not be 1 or 2 - checked below)
-		if(x == 1) ones++; if(x == 2) twos++;
+		size_t bytes_left = len;
+		while(bytes_left != 0)
+		{
+			h ^= *p++;
+			h *= 0x100000001B3ull;
+
+			bytes_left--;
+		}
 	}
-	TEST(ones+twos == 100);
-	TEST(ones > 10 && twos > 10);
+
+	return h;
 }
 
-static void self_test()
+
+// special version for strings: first converts to lowercase
+// (useful for comparing mixed-case filenames).
+// note: still need <len>, e.g. to support non-0-terminated strings
+u32 fnv_lc_hash(const char* str, size_t len)
 {
-	test_log2();
-	test_rand();
-}
+	u32 h = 0x811c9dc5u;
+	// give distinct values for different length 0 buffers.
+	// value taken from FNV; it has no special significance.
 
-SELF_TEST_RUN;
+	// expected case: string
+	if(!len)
+	{
+		while(*str)
+		{
+			h ^= tolower(*str++);
+			h *= 0x01000193u;
+		}
+	}
+	else
+	{
+		size_t bytes_left = len;
+		while(bytes_left != 0)
+		{
+			h ^= tolower(*str++);
+			h *= 0x01000193u;
 
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
+			bytes_left--;
+		}
+	}
+
+	return h;
+}
Index: ps/trunk/source/lib/string_s.cpp
===================================================================
--- ps/trunk/source/lib/string_s.cpp	(revision 3910)
+++ ps/trunk/source/lib/string_s.cpp	(revision 3911)
@@ -1,402 +1,205 @@
 /**
  * =========================================================================
  * File        : string_s.cpp
  * Project     : 0 A.D.
  * Description : implementation of proposed CRT safe string functions
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <string.h>
 
 #include "lib.h"
 #include "posix.h"	// SIZE_MAX
 #include "self_test.h"
 
 
 
 // written against http://std.dkuug.dk/jtc1/sc22/wg14/www/docs/n1031.pdf .
 // optimized for size - e.g. strcpy calls strncpy with n = SIZE_MAX.
 
 // since char and wide versions of these functions are basically the same,
 // this source file implements generic versions and bridges the differences
 // with these macros. wstring_s.cpp #defines WSTRING_S and includes this file.
 #ifdef WSTRING_S
 # define tchar wchar_t
 # define T(string_literal) L ## string_literal
 # define tnlen wcsnlen
 # define tncpy_s wcsncpy_s
 # define tcpy_s wcscpy_s
 # define tncat_s wcsncat_s
 # define tcat_s wcscat_s
 # define tcmp wcscmp
 # define tcpy wcscpy
 #else
 # define tchar char
 # define T(string_literal) string_literal
 # define tnlen strnlen
 # define tncpy_s strncpy_s
 # define tcpy_s strcpy_s
 # define tncat_s strncat_s
 # define tcat_s strcat_s
 # define tcmp strcmp
 # define tcpy strcpy
 #endif
 
 
 // return <retval> and raise an assertion if <condition> doesn't hold.
 // usable as a statement.
 #define ENFORCE(condition, retval) STMT(\
 	if(!(condition))                    \
 	{                                   \
 		debug_assert(condition);        \
 		return retval;                  \
 	}                                   \
 )
 
 // raise a debug warning if <len> is the size of a pointer.
 // catches bugs such as: tchar* s = ..; tcpy_s(s, sizeof(s), T(".."));
 // if warnings get annoying, replace with debug_printf. usable as a statement.
 #define WARN_IF_PTR_LEN(len) STMT(                            \
 	if(len == sizeof(char*))                                  \
 		debug_warn("make sure string buffer size is correct");\
 )
 
 
 // skip our implementation if already available, but not the
 // self-test and the t* defines (needed for test).
 #if !HAVE_STRING_S
 
 
 // return length [in characters] of a string, not including the trailing
 // null character. to protect against access violations, only the
 // first <max_len> characters are examined; if the null character is
 // not encountered by then, <max_len> is returned.
 size_t tnlen(const tchar* str, size_t max_len)
 {
 	// note: we can't bail - what would the return value be?
 	debug_assert(str != 0);
 
 	WARN_IF_PTR_LEN(max_len);
 
 	size_t len;
 	for(len = 0; len < max_len; len++)
 		if(*str++ == '\0')
 			break;
 
 	return len;
 }
 
 
 // copy at most <max_src_chars> (not including trailing null) from
 // <src> into <dst>, which must not overlap.
 // if thereby <max_dst_chars> (including null) would be exceeded,
 // <dst> is set to the empty string and ERANGE returned; otherwise,
 // 0 is returned to indicate success and that <dst> is null-terminated.
 //
 // note: padding with zeroes is not called for by NG1031.
 int tncpy_s(tchar* dst, size_t max_dst_chars, const tchar* src, size_t max_src_chars)
 {
 	// the MS implementation returns EINVAL and allows dst = 0 if
 	// max_dst_chars = max_src_chars = 0. no mention of this in
 	// 3.6.2.1.1, so don't emulate that behavior.
 	ENFORCE(dst != 0, EINVAL);
 	ENFORCE(max_dst_chars != 0, ERANGE);
 	*dst = '\0';	// in case src ENFORCE is triggered
 	ENFORCE(src != 0, EINVAL);
 
 	WARN_IF_PTR_LEN(max_dst_chars);
 	WARN_IF_PTR_LEN(max_src_chars);
 
 	// copy string until null character encountered or limit reached.
 	// optimized for size (less comparisons than MS impl) and
 	// speed (due to well-predicted jumps; we don't bother unrolling).
 	tchar* p = dst;
 	size_t chars_left = MIN(max_dst_chars, max_src_chars);
 	while(chars_left != 0)
 	{
 		// success: reached end of string normally.
 		if((*p++ = *src++) == '\0')
 			return 0;
 		chars_left--;
 	}
 
 	// which limit did we hit?
 	// .. dest, and last character wasn't null: overflow.
 	if(max_dst_chars <= max_src_chars)
 	{
 		*dst = '\0';
 		ENFORCE(0 && "Buffer too small", ERANGE);
 	}
 	// .. source: success, but still need to null-terminate the destination.
 	*p = '\0';
 	return 0;
 }
 
 
 // copy <src> (including trailing null) into <dst>, which must not overlap.
 // if thereby <max_dst_chars> (including null) would be exceeded,
 // <dst> is set to the empty string and ERANGE returned; otherwise,
 // 0 is returned to indicate success and that <dst> is null-terminated.
 int tcpy_s(tchar* dst, size_t max_dst_chars, const tchar* src)
 {
 	return tncpy_s(dst, max_dst_chars, src, SIZE_MAX);
 }
 
 
 // append <src> to <dst>, which must not overlap.
 // if thereby <max_dst_chars> (including null) would be exceeded,
 // <dst> is set to the empty string and ERANGE returned; otherwise,
 // 0 is returned to indicate success and that <dst> is null-terminated.
 int tncat_s(tchar* dst, size_t max_dst_chars, const tchar* src, size_t max_src_chars)
 {
 	ENFORCE(dst != 0, EINVAL);
 	ENFORCE(max_dst_chars != 0, ERANGE);
 	// src is checked in tncpy_s
 
 	// WARN_IF_PTR_LEN not necessary: both max_dst_chars and max_src_chars
 	// are checked by tnlen / tncpy_s (respectively).
 
 	const size_t dst_len = tnlen(dst, max_dst_chars);
 	if(dst_len == max_dst_chars)
 	{
 		*dst = '\0';
 		ENFORCE(0 && "Destination string not null-terminated", ERANGE);
 	}
 
 	tchar* const end = dst+dst_len;
 	const size_t chars_left = max_dst_chars-dst_len;
 	int ret = tncpy_s(end, chars_left, src, max_src_chars);
 	// if tncpy_s overflowed, we need to clear the start of our string
 	// (not just the appended part). can't do that by default, because
 	// the beginning of dst is not changed in normal operation.
 	if(ret != 0)
 		*dst = '\0';
 	return ret;
 }
 
 
 // append <src> to <dst>, which must not overlap.
 // if thereby <max_dst_chars> (including null) would be exceeded,
 // <dst> is set to the empty string and ERANGE returned; otherwise,
 // 0 is returned to indicate success and that <dst> is null-terminated.
 //
 // note: implemented as tncat_s(dst, max_dst_chars, src, SIZE_MAX)
 int tcat_s(tchar* dst, size_t max_dst_chars, const tchar* src)
 {
 	return tncat_s(dst, max_dst_chars, src, SIZE_MAX);
 }
 
 #endif	// #if !HAVE_STRING_S
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// built-in self test
-//
-//////////////////////////////////////////////////////////////////////////////
-
-namespace test {
-
-#if SELF_TEST_ENABLED
-
-// note: avoid 4-byte strings - they would trigger WARN_IF_PTR_LEN.
-
-static const tchar* s0 = T("");
-static const tchar* s1 = T("a");
-static const tchar* s5 = T("abcde");
-static const tchar* s10 = T("abcdefghij");
-
-static tchar d1[1];
-static tchar d2[2];
-static tchar d3[3];
-static tchar d5[5];
-static tchar d6[6];
-static tchar d10[10];
-static tchar d11[11];
-
-static tchar no_null[] = { 'n','o','_','n','u','l','l'};
-
-
-
-#define TEST_LEN(string, limit, expected)                                 \
-	TEST(tnlen((string), (limit)) == (expected));
-
-#define TEST_CPY(dst, dst_max, src, expected_ret, expected_dst)           \
-STMT(                                                                     \
-	int ret = tcpy_s((dst), dst_max, (src));                              \
-	TEST(ret == expected_ret);                                            \
-	if(dst != 0)                                                          \
-		TEST(!tcmp(dst, T(expected_dst)));                                \
-)
-#define TEST_CPY2(dst, src, expected_ret, expected_dst)                   \
-STMT(                                                                     \
-	int ret = tcpy_s((dst), ARRAY_SIZE(dst), (src));                      \
-	TEST(ret == expected_ret);                                            \
-	if(dst != 0)                                                          \
-		TEST(!tcmp(dst, T(expected_dst)));                                \
-)
-#define TEST_NCPY(dst, src, max_src_chars, expected_ret, expected_dst)    \
-STMT(                                                                     \
-	int ret = tncpy_s((dst), ARRAY_SIZE(dst), (src), (max_src_chars));    \
-	TEST(ret == expected_ret);                                            \
-	if(dst != 0)                                                          \
-		TEST(!tcmp(dst, T(expected_dst)));                                \
-)
-
-#define TEST_CAT(dst, dst_max, src, expected_ret, expected_dst)           \
-STMT(                                                                     \
-	int ret = tcat_s((dst), dst_max, (src));                              \
-	TEST(ret == expected_ret);                                            \
-	if(dst != 0)                                                          \
-		TEST(!tcmp(dst, T(expected_dst)));                                \
-)
-#define TEST_CAT2(dst, dst_val, src, expected_ret, expected_dst)          \
-STMT(                                                                     \
-	tcpy(dst, T(dst_val));                                                \
-	int ret = tcat_s((dst), ARRAY_SIZE(dst), (src));                      \
-	TEST(ret == expected_ret);                                            \
-	if(dst != 0)                                                          \
-		TEST(!tcmp(dst, T(expected_dst)));                                \
-)
-#define TEST_NCAT(dst, dst_val, src, max_src_chars, expected_ret, expected_dst)\
-STMT(                                                                     \
-	tcpy(dst, T(dst_val));                                                \
-	int ret = tncat_s((dst), ARRAY_SIZE(dst), (src), (max_src_chars));    \
-	TEST(ret == expected_ret);                                            \
-	if(dst != 0)                                                          \
-		TEST(!tcmp(dst, T(expected_dst)));                                \
-)
-
-
-// contains all tests that verify correct behavior for bogus input.
-// our implementation suppresses error dialogs while the self-test is active,
-// but others (e.g. the functions shipped with VC8) do not.
-// since we have no control over their error reporting (which ends up taking
-// down the program), we must skip this part of the test if using them.
-// this is still preferable to completely disabling the self-test.
-static void test_param_validation()
-{
-#if !HAVE_STRING_S
-	TEST_CPY(0 ,0,0 , EINVAL,"");	// all invalid
-	TEST_CPY(0 ,0,s1, EINVAL,"");	// dst = 0, max = 0
-	TEST_CPY(0 ,1,s1, EINVAL,"");	// dst = 0, max > 0
-	TEST_CPY(d1,1,0 , EINVAL,"");	// src = 0
-	TEST_CPY(d1,0,s1, ERANGE,"");	// max_dst_chars = 0
-
-	TEST_CPY2(d1 ,s1, ERANGE,"");
-	TEST_CPY2(d1 ,s5, ERANGE,"");
-	TEST_CPY2(d5 ,s5, ERANGE,"");
-
-	TEST_NCPY(d1 ,s1,1, ERANGE,"");
-	TEST_NCPY(d1 ,s5,1, ERANGE,"");
-	TEST_NCPY(d5 ,s5,5, ERANGE,"");
-
-	TEST_CAT(0 ,0,0 , EINVAL,"");	// all invalid
-	TEST_CAT(0 ,0,s1, EINVAL,"");	// dst = 0, max = 0
-	TEST_CAT(0 ,1,s1, EINVAL,"");	// dst = 0, max > 0
-	TEST_CAT(d1,1,0 , EINVAL,"");	// src = 0
-	TEST_CAT(d1,0,s1, ERANGE,"");	// max_dst_chars = 0
-	TEST_CAT(no_null,5,s1, ERANGE,"");	// dst not terminated
-
-	TEST_CAT2(d1 ,"" ,s1, ERANGE,"");
-	TEST_CAT2(d1 ,"" ,s5, ERANGE,"");
-	TEST_CAT2(d10,"" ,s10, ERANGE,"");		// empty, total overflow
-	TEST_CAT2(d10,"12345",s5 , ERANGE,"");	// not empty, overflow
-	TEST_CAT2(d10,"12345",s10, ERANGE,"");	// not empty, total overflow
-
-	TEST_NCAT(d1 ,"" ,s1,1, ERANGE,"");
-	TEST_NCAT(d1 ,"" ,s5,5, ERANGE,"");
-	TEST_NCAT(d10,"" ,s10,10, ERANGE,"");		// empty, total overflow
-	TEST_NCAT(d10,"12345",s5 ,5 , ERANGE,"");	// not empty, overflow
-	TEST_NCAT(d10,"12345",s10,10, ERANGE,"");	// not empty, total overflow
-#endif
-}
-
-
-static void test_length()
-{
-	TEST_LEN(s0, 0 , 0 );
-	TEST_LEN(s0, 1 , 0 );
-	TEST_LEN(s0, 50, 0 );
-	TEST_LEN(s1, 0 , 0 );
-	TEST_LEN(s1, 1 , 1 );
-	TEST_LEN(s1, 50, 1 );
-	TEST_LEN(s5, 0 , 0 );
-	TEST_LEN(s5, 1 , 1 );
-	TEST_LEN(s5, 50, 5 );
-	TEST_LEN(s10,9 , 9 );
-	TEST_LEN(s10,10, 10);
-	TEST_LEN(s10,11, 10);
-}
-
-
-static void test_copy()
-{
-	TEST_CPY2(d2 ,s1, 0,"a");
-	TEST_CPY2(d6 ,s5, 0,"abcde");
-	TEST_CPY2(d11,s5, 0,"abcde");
-
-	TEST_NCPY(d2 ,s1,1, 0,"a");
-	TEST_NCPY(d6 ,s5,5, 0,"abcde");
-	TEST_NCPY(d11,s5,5, 0,"abcde");
-
-	tcpy(d5, T("----"));
-	TEST_NCPY(d5,s5,0 , 0,"");	// specified behavior! see 3.6.2.1.1 #4
-	TEST_NCPY(d5,s5,1 , 0,"a");
-	TEST_NCPY(d5,s5,4 , 0,"abcd");
-	TEST_NCPY(d6,s5,5 , 0,"abcde");
-	TEST_NCPY(d6,s5,10, 0,"abcde");
-}
-
-
-static void test_concatenate()
-{
-	TEST_CAT2(d3 ,"1",s1, 0,"1a");
-	TEST_CAT2(d5 ,"1",s1, 0,"1a");
-	TEST_CAT2(d6 ,"" ,s5, 0,"abcde");
-	TEST_CAT2(d10,"" ,s5, 0,"abcde");
-	TEST_CAT2(d10,"1234" ,s5 , 0,"1234abcde");
-
-	TEST_NCAT(d3 ,"1",s1,1, 0,"1a");
-	TEST_NCAT(d5 ,"1",s1,1, 0,"1a");
-	TEST_NCAT(d6 ,"" ,s5,5, 0,"abcde");
-	TEST_NCAT(d10,"" ,s5,5, 0,"abcde");
-	TEST_NCAT(d10,"1234" ,s5 ,5 , 0,"1234abcde");
-
-	TEST_NCAT(d5,"----",s5,0 , 0,"----");
-	TEST_NCAT(d5,"",s5,1 , 0,"a");
-	TEST_NCAT(d5,"",s5,4 , 0,"abcd");
-	TEST_NCAT(d5,"12",s5,2 , 0,"12ab");
-	TEST_NCAT(d6,"",s5,10, 0,"abcde");
-}
-
-
-static void self_test()
-{
-	test_param_validation();
-	test_length();
-	test_copy();
-	test_concatenate();
-}
-
-SELF_TEST_RUN;
-
-#endif	// #if SELF_TEST_ENABLED
-
-}	// namespace test
Index: ps/trunk/source/lib/path_util.cpp
===================================================================
--- ps/trunk/source/lib/path_util.cpp	(revision 3910)
+++ ps/trunk/source/lib/path_util.cpp	(revision 3911)
@@ -1,399 +1,402 @@
 /**
  * =========================================================================
  * File        : path_util.cpp
  * Project     : 0 A.D.
  * Description : helper functions for path strings.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2004-2006 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <string.h>
 
 #include "lib.h"
 #include "posix.h"
 #include "path_util.h"
 
 
 static inline bool is_dir_sep(char c)
 {
 	if(c == '/' || c == DIR_SEP)
 		return true;
 	return false;
 }
 
 
 // is s2 a subpath of s1, or vice versa?
+// (equal counts as subpath)
 bool path_is_subpath(const char* s1, const char* s2)
 {
 	// make sure s1 is the shorter string
 	if(strlen(s1) > strlen(s2))
 		std::swap(s1, s2);
 
 	int c1 = 0, last_c1, c2;
 	for(;;)
 	{
 		last_c1 = c1;
 		c1 = *s1++, c2 = *s2++;
 
 		// end of s1 reached:
 		if(c1 == '\0')
 		{
 			// s1 matched s2 up until:
 			if((c2 == '\0') ||	// its end (i.e. they're equal length) OR
 				is_dir_sep(c2) ||		// start of next component OR
 				is_dir_sep(last_c1))	// ", but both have a trailing slash
 				// => is subpath
 				return true;
 		}
 
 		// mismatch => is not subpath
 		if(c1 != c2)
 			return false;
 	}
 }
 
 
 // if path is invalid, return a descriptive error code, otherwise ERR_OK.
 LibError path_validate(const char* path)
 {
 	// disallow "/", because it would create a second 'root' (with name = "").
 	// root dir is "".
 	if(path[0] == '/')
 		WARN_RETURN(ERR_PATH_NOT_RELATIVE);
 
 	// scan each char in path string; count length.
 	int c = 0;		// current char; used for .. detection
 	size_t path_len = 0;
 	for(;;)
 	{
 		const int last_c = c;
 		c = path[path_len++];
 
 		// whole path is too long
 		if(path_len >= PATH_MAX)
 			WARN_RETURN(ERR_PATH_LENGTH);
 
 		// disallow:
 		// - ".." (prevent going above the VFS root dir)
 		// - "./" (security hole when mounting and not supported on Windows).
 		// allow "/.", because CVS backup files include it.
 		if(last_c == '.' && (c == '.' || c == '/'))
 			WARN_RETURN(ERR_PATH_NON_CANONICAL);
 
 		// disallow OS-specific dir separators
 		if(c == '\\' || c == ':')
 			WARN_RETURN(ERR_PATH_NON_PORTABLE);
 
 		// end of string, no errors encountered
 		if(c == '\0')
 			break;
 	}
 
 	return ERR_OK;
 }
 
 
 // if name is invalid, return a descriptive error code, otherwise ERR_OK.
 // (name is a path component, i.e. that between directory separators)
 LibError path_component_validate(const char* name)
 {
 	// disallow empty strings
 	if(*name == '\0')
 		WARN_RETURN(ERR_PATH_EMPTY);
 
 	for(;;)
 	{
 		const int c = *name++;
 
 		// disallow *any* dir separators (regardless of which
 		// platform we're on).
 		if(c == '\\' || c == ':' || c == '/')
 			WARN_RETURN(ERR_PATH_COMPONENT_SEPARATOR);
 
 		// end of string, no errors encountered
 		if(c == '\0')
 			break;
 	}
 
 	return ERR_OK;
 }
 
 
 // copy path strings (provided for convenience).
 void path_copy(char* dst, const char* src)
 {
 	strcpy_s(dst, PATH_MAX, src);
 }
 
 
 // combine <path1> and <path2> into one path, and write to <dst>.
 // if necessary, a directory separator is added between the paths.
 // each may be empty, filenames, or full paths.
 // total path length (including '\0') must not exceed PATH_MAX.
 LibError path_append(char* dst, const char* path1, const char* path2, uint flags)
 {
 	const size_t len1 = strlen(path1);
 	const size_t len2 = strlen(path2);
 	size_t total_len = len1 + len2 + 1;	// includes '\0'
 	const bool no_end_slash1 = (len1 == 0 || !is_dir_sep(path1[len1-1]));
 	const bool no_end_slash2 = (len2 == 0 || !is_dir_sep(path2[len2-1]));
 
 	// check if we need to add '/' between path1 and path2
 	// notes:
 	// - the second can't start with '/' (not allowed by path_validate)
 	// - must check len2 as well - if it's empty, we'd end up
 	//   inadvertently terminating the string with '/'.
 	bool need_separator = false;
 	if(len2 != 0 && len1 != 0 && no_end_slash1)
 	{
 		total_len++;	// for '/'
 		need_separator = true;
 	}
 
 	// check if trailing slash requested and not already present
 	bool need_terminator = false;
 	if(flags & PATH_APPEND_SLASH && no_end_slash2)
 	{
 		total_len++;	// for '/'
 		need_terminator = true;
 	}
 
 	if(total_len > PATH_MAX)
 		WARN_RETURN(ERR_PATH_LENGTH);
 
 	strcpy(dst, path1);	// safe
 	dst += len1;
 	if(need_separator)
 		*dst++ = '/';
 	strcpy(dst, path2);	// safe
 	if(need_terminator)
 		strcpy(dst+len2, "/");	// safe
 
 	return ERR_OK;
 }
 
 
 // strip <remove> from the start of <src>, prepend <replace>,
 // and write to <dst>.
 // returns ERR_FAIL (without warning!) if the beginning of <src> doesn't
 // match <remove>.
 LibError path_replace(char* dst, const char* src, const char* remove, const char* replace)
 {
 	// remove doesn't match start of <src>
 	const size_t remove_len = strlen(remove);
 	if(strncmp(src, remove, remove_len) != 0)
 		return ERR_FAIL;	// NOWARN
 
 	// if removing will leave a separator at beginning of src, remove it
 	// (example: "a/b"; removing "a" would yield "/b")
 	const char* start = src+remove_len;
 	if(is_dir_sep(*start))
 		start++;
 
 	// prepend replace.
 	RETURN_ERR(path_append(dst, replace, start));
 	return ERR_OK;
 }
 
 
 
 
 //-----------------------------------------------------------------------------
 
 // split paths into specific parts
 
 // return pointer to the name component within path (i.e. skips over all
 // characters up to the last dir separator, if any).
 const char* path_name_only(const char* path)
 {
 	// first try: look for portable '/'
 	const char* slash = strrchr(path, '/');
 	// not present
 	if(!slash)
 	{
 		// now look for platform-specific DIR_SEP
 		slash = strrchr(path, DIR_SEP);
 		// neither present, it's a filename only
 		if(!slash)
 			return path;
 	}
 
+// TODO: take max of portableslash, nonportableslash
+
 	const char* name = slash+1;
 	return name;
 }
 
 
 // return last component within path. this is similar to path_name_only,
 // but correctly handles VFS paths, which must end with '/'.
 // (path_name_only would return "")
 const char* path_last_component(const char* path)
 {
 	// ('\0' is end of set string)
 	static const char separators[3] = { DIR_SEP, '/', '\0' };
 
 	const char* pos = path;
 	const char* last_component = path;
 
 	for(;;)
 	{
 		if(*pos == '\0')
 			break;
 		last_component = pos;
 		const size_t component_len = strcspn(pos, separators);
 		pos += component_len+1;	// +1 for separator
 	}
 
 	return last_component;
 }
 
 
 // if <path> contains a name component, it is stripped away.
 void path_strip_fn(char* path)
 {
 	char* name = (char*)path_name_only(path);
 	*name = '\0';	// cut off string here
 }
 
 
 // fill <dir> with the directory path portion of <path>
 // ("" if root dir, otherwise ending with '/').
 // note: copies to <dir> and proceeds to path_strip_fn it.
 void path_dir_only(const char* path, char* dir)
 {
 	path_copy(dir, path);
 	path_strip_fn(dir);
 }
 
 
 // return extension of <fn>, or "" if there is none.
 // NOTE: does not include the period; e.g. "a.bmp" yields "bmp".
 const char* path_extension(const char* fn)
 {
 	const char* dot = strrchr(fn, '.');
 	if(!dot)
 		return "";
 	const char* ext = dot+1;
 	return ext;
 }
 
 
 // call <cb> with <ctx> for each component in <path>.
 LibError path_foreach_component(const char* path_org, PathComponentCb cb, void* ctx)
 {
 	CHECK_PATH(path_org);
 
 	// copy into (writeable) buffer so we can 'tokenize' path components by
 	// replacing '/' with '\0'.
 	char path[PATH_MAX];
 	strcpy_s(path, ARRAY_SIZE(path), path_org);
 	char* cur_component = path;
 
 	bool is_dir = true;	// until we find a component without slash
 
 	// successively navigate to the next component in <path>.
 	for(;;)
 	{
 		// at end of string - done.
 		// (this happens if <path> is empty or ends with slash)
 		if(*cur_component == '\0')
 			break;
 
 		// find end of cur_component
 		char* slash = (char*)strchr(cur_component, '/');
 		// .. try platform-specific separator
 		if(!slash)
 			slash = (char*)strchr(cur_component, DIR_SEP);
 
 		// decide its type and 0-terminate
 		// .. filename (by definition)
 		if(!slash)
 			is_dir = false;
 		// .. directory
 		else
 			*slash = '\0';	// 0-terminate cur_component
 
 		LibError ret = cb(cur_component, is_dir, ctx);
 		// callback wants to abort - return its value.
 		if(ret != INFO_CB_CONTINUE)
 			return ret;
 
 		// filename is by definition the last component. abort now
 		// in case the callback didn't.
 		if(!is_dir)
 			break;
 
 		// advance to next component
 		// .. undo having replaced '/' with '\0' - this means <path> will
 		//    store the complete path up to and including cur_component.
 		*slash = '/';
 		cur_component = slash+1;
 	}
 
 	return ERR_OK;
 }
 
 
 //-----------------------------------------------------------------------------
 
 // convenience "class" that simplifies successively appending a filename to
 // its parent directory. this avoids needing to allocate memory and calling
 // strlen/strcat. used by wdll_ver and dir_next_ent.
 // we want to maintain C compatibility, so this isn't a C++ class.
 
 // write the given directory path into our buffer and set end/chars_left
 // accordingly. <dir> need not but can end with a directory separator.
 //
 // note: <dir> and the filename set via path_package_append_file are separated by
 // '/'. this is to allow use on portable paths; the function otherwise
 // does not care if paths are relative/portable/absolute.
 LibError path_package_set_dir(PathPackage* pp, const char* dir)
 {
 	// -1 allows for trailing DIR_SEP that will be added if not
 	// already present.
 	if(strcpy_s(pp->path, ARRAY_SIZE(pp->path)-1, dir) != 0)
 		WARN_RETURN(ERR_PATH_LENGTH);
 	size_t len = strlen(pp->path);
 	// add directory separator if not already present
 	// .. but only check this if dir != "" (=> len-1 is safe)
 	if(len != 0)
 	{
 		char* last_char = pp->path+len-1;
 		if(!is_dir_sep(*last_char))
 		{
 			*(last_char+1) = '/';
 			// note: need to 0-terminate because pp.path is uninitialized
 			// and we overwrite strcpy_s's terminator above.
 			*(last_char+2) = '\0';
 			// only bump by 1 - filename must overwrite '\0'.
 			len++;
 		}
 	}
 
 	pp->end = pp->path+len;
 	pp->chars_left = ARRAY_SIZE(pp->path)-len;
 	return ERR_OK;
 }
 
 
 // append the given filename to the directory established by the last
 // path_package_set_dir on this package. the whole path is accessible at pp->path.
 LibError path_package_append_file(PathPackage* pp, const char* path)
 {
 	CHECK_ERR(strcpy_s(pp->end, pp->chars_left, path));
 	return ERR_OK;
 }
Index: ps/trunk/source/lib/debug.h
===================================================================
--- ps/trunk/source/lib/debug.h	(revision 3910)
+++ ps/trunk/source/lib/debug.h	(revision 3911)
@@ -1,396 +1,515 @@
 /**
  * =========================================================================
  * File        : debug.h
  * Project     : 0 A.D.
  * Description : platform-independent debug support code.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #ifndef DEBUG_H_INCLUDED
 #define DEBUG_H_INCLUDED
 
 #include "lib.h" // STMT
 #if OS_WIN
 # include "sysdep/win/wdbg.h"
 #else
 # include "sysdep/unix/udbg.h"
 #endif
 
-/*
-
-[KEEP IN SYNC WITH WIKI]
+/**
 
 overview
 --------
 
 this module provides platform-independent debug facilities, useful for
 diagnosing and reporting program errors.
 - a symbol engine provides access to compiler-generated debug information and
   can also give a stack trace including local variables;
 - the breakpoint API enables stopping when a given address is
   executed, read or written to (as specified);
 - a hook into the system's memory allocator can optionally check for and
   report heap corruption;
 - our more powerful assert() replacement gives a stack trace so
   that the underlying problem becomes apparent;
 - the output routines make for platform-independent logging and
   crashlogs with "last-known activity" reporting.
 
 
 usage
 -----
 
 please see the detailed comments below on how to use the individual features.
 much of this is only helpful if you explicity ask for it!
 
 
 rationale
 ---------
 
 much of this functionality already exists within the VC7 IDE/debugger.
 motivation for this code is as follows:
 - we want a consistent interface for all platforms;
 - limitations(*) in the VC variants should be fixed;
 - make debugging as easy as possible.
 
 * mostly pertaining to Release mode - e.g. symbols cannot be resolved
 even if debug information is present and assert dialogs are useless.
 
-*/
+**/
 
 //-----------------------------------------------------------------------------
 // debug memory allocator
 //-----------------------------------------------------------------------------
 
-// check heap integrity (independently of mmgr).
-// errors are reported by the CRT or via debug_display_error.
+/**
+ * check heap integrity (independently of mmgr).
+ * errors are reported by the CRT or via debug_display_error.
+ **/
 extern void debug_heap_check(void);
 
 enum DebugHeapChecks
 {
-	// no automatic checks. (default)
+	/**
+	 * no automatic checks. (default)
+	 **/
 	DEBUG_HEAP_NONE   = 0,
 
-	// basic automatic checks when deallocating.
+	/**
+	 * basic automatic checks when deallocating.
+	 **/
 	DEBUG_HEAP_NORMAL = 1,
 
-	// all automatic checks on every memory API call. this is really
-	// slow (x100), but reports errors closer to where they occurred.
+	/**
+	 * all automatic checks on every memory API call. this is really
+	 * slow (x100), but reports errors closer to where they occurred.
+	 **/
 	DEBUG_HEAP_ALL    = 2
 };
 
-// call at any time; from then on, the specified checks will be performed.
-// if not called, the default is DEBUG_HEAP_NONE, i.e. do nothing.
+/**
+ * call at any time; from then on, the specified checks will be performed.
+ * if not called, the default is DEBUG_HEAP_NONE, i.e. do nothing.
+ **/
 extern void debug_heap_enable(DebugHeapChecks what);
 
 
 //-----------------------------------------------------------------------------
 // debug_assert
 //-----------------------------------------------------------------------------
 
-// rationale: we call this "debug_assert" instead of "assert" for the
-// following reasons:
-// - consistency (everything here is prefixed with debug_) and
-// - to avoid inadvertent use of the much less helpful built-in CRT assert.
-//   if we were to override assert, it would be difficult to tell whether
-//   user source has included <assert.h> (possibly indirectly via other
-//   headers) and thereby stomped on our definition.
-
-// make sure the expression <expr> evaluates to non-zero. used to validate
-// invariants in the program during development and thus gives a
-// very helpful warning if something isn't going as expected.
-// sprinkle these liberally throughout your code!
-//
-// recommended use is debug_assert(expression && "descriptive string") -
-// the string can pass more information about the problem on to whomever
-// is seeing the error.
-//
-// rationale: 0x55 and 0xAA are distinctive values and thus
-// help debug the symbol engine.
+/**
+ * make sure the expression <expr> evaluates to non-zero. used to validate
+ * invariants in the program during development and thus gives a
+ * very helpful warning if something isn't going as expected.
+ * sprinkle these liberally throughout your code!
+ *
+ * recommended use is debug_assert(expression && "descriptive string") -
+ * the string can pass more information about the problem on to whomever
+ * is seeing the error.
+ *
+ * rationale: we call this "debug_assert" instead of "assert" for the
+ * following reasons:
+ * - consistency (everything here is prefixed with debug_) and
+ * - to avoid inadvertent use of the much less helpful built-in CRT assert.
+ *   if we were to override assert, it would be difficult to tell whether
+ *   user source has included <assert.h> (possibly indirectly via other
+ *   headers) and thereby stomped on our definition.
+ *
+ * implementation rationale: 0x55 and 0xAA are distinctive values and
+ * thus help debug the symbol engine.
+ **/
 #define debug_assert(expr) \
 STMT(\
 	static unsigned char suppress__ = 0x55;\
 	if(suppress__ == 0x55 && !(expr))\
 	{\
 		switch(debug_assert_failed(#expr, __FILE__, __LINE__, __func__))\
 		{\
 		case ER_SUPPRESS:\
 			suppress__ = 0xAA;\
 			break;\
 		case ER_CONTINUE:\
 			break;\
 		default:\
 		case ER_BREAK:\
 			debug_break();\
 			break;\
 		}\
 	}\
 )
 
-
-// show a dialog to make sure unexpected states in the program are noticed.
-// this is less error-prone than "debug_assert(0 && "text");" and avoids
-// "conditional expression is constant" warnings. we'd really like to
-// completely eliminate the problem; replacing 0 literals with extern
-// volatile variables fools VC7 but isn't guaranteed to be free of overhead.
-// we therefore just squelch the warning (unfortunately non-portable).
+/**
+ * show a dialog to make sure unexpected states in the program are noticed.
+ * this is less error-prone than "debug_assert(0 && "text");" and avoids
+ * "conditional expression is constant" warnings. we'd really like to
+ * completely eliminate the problem; replacing 0 literals with extern
+ * volatile variables fools VC7 but isn't guaranteed to be free of overhead.
+ * we therefore just squelch the warning (unfortunately non-portable).
+ **/
 #define debug_warn(str) debug_assert((str) && 0)
 
 
-// if (LibError)err indicates an function failed, display the error dialog.
-// used by CHECK_ERR et al., which wrap function calls and automatically
-// warn user and return to caller.
+/**
+ * if (LibError)err indicates an function failed, display the error dialog.
+ * used by CHECK_ERR et al., which wrap function calls and automatically
+ * warn user and return to caller.
+ **/
 #define DEBUG_WARN_ERR(err)\
 STMT(\
 	static unsigned char suppress__ = 0x55;\
 	if(suppress__ == 0x55)\
 	{\
 		switch(debug_warn_err(err, __FILE__, __LINE__, __func__))\
 		{\
 		case ER_SUPPRESS:\
 			suppress__ = 0xAA;\
 			break;\
 		case ER_CONTINUE:\
 			break;\
 		default:\
 		case ER_BREAK:\
 			debug_break();\
 			break;\
 		}\
 	}\
 )
 
 
-// called when an assertion has failed; notifies the user via debug_display_error.
+/**
+ * called when a debug_assert fails;
+ * notifies the user via debug_display_error.
+ *
+ * @param assert_expr the expression that failed; typically passed as
+ * #expr in the assert macro.
+ * @param file, line source file name and line number of the spot that failed
+ * @param func name of the function containing it
+ * @return ErrorReaction (user's choice: continue running or stop?)
+ **/
 extern ErrorReaction debug_assert_failed(const char* assert_expr,
 	const char* file, int line, const char* func);
 
-// called when a lib function wrapped in DEBUG_WARN_ERR failed;
-// notifies the user via debug_display_error.
+/**
+ * called when a DEBUG_WARN_ERR indicates an error occurred;
+ * notifies the user via debug_display_error.
+ *
+ * @param err LibError value indicating the error that occurred
+ * @param file, line source file name and line number of the spot that failed
+ * @param func name of the function containing it
+ * @return ErrorReaction (user's choice: continue running or stop?)
+ **/
 extern ErrorReaction debug_warn_err(LibError err,
 	const char* file, int line, const char* func);
 
 
 //-----------------------------------------------------------------------------
 // output
 //-----------------------------------------------------------------------------
 
-// write a formatted string to the debug channel, subject to filtering
-// (see below). implemented via debug_puts - see performance note there.
+/**
+ * write a formatted string to the debug channel, subject to filtering
+ * (see below). implemented via debug_puts - see performance note there.
+ *
+ * @param format string and varargs; see printf.
+ **/
 extern void debug_printf(const char* fmt, ...);
-// note: this merely converts to a MBS and calls debug_printf.
+
+/// note: this merely converts to a MBS and calls debug_printf.
 extern void debug_wprintf(const wchar_t* fmt, ...);
 
 
-// translates and displays the given strings in a dialog.
-// this is typically only used when debug_display_error has failed or
-// is unavailable because that function is much more capable.
-// implemented via sys_display_msgw; see documentation there.
+/**
+ * translates and displays the given strings in a dialog.
+ * this is typically only used when debug_display_error has failed or
+ * is unavailable because that function is much more capable.
+ * implemented via sys_display_msgw; see documentation there.
+ **/
 extern void debug_display_msgw(const wchar_t* caption, const wchar_t* msg);
 
-
+/// flags to customize debug_display_error behavior
 enum DisplayErrorFlags
 {
-	// allow the suppress button (requires calling via macro that
-	// maintains a 'suppress' bool; see debug_assert)
+	/**
+	 * allow the suppress button (requires calling via macro that
+	 * maintains a 'suppress' bool; see debug_assert)
+	 **/
 	DE_ALLOW_SUPPRESS = 1,
 
-	// disallow the continue button. used e.g. if an exception is fatal.
+	/**
+	 * disallow the continue button. used e.g. if an exception is fatal.
+	 **/
 	DE_NO_CONTINUE    = 2,
 
-	// do not trigger a breakpoint inside debug_display_error; caller
-	// will take care of this if ER_BREAK is returned. this is so that the
-	// debugger can jump directly into the offending function.
+	/**
+	 * do not trigger a breakpoint inside debug_display_error; caller
+	 * will take care of this if ER_BREAK is returned. this is so that the
+	 * debugger can jump directly into the offending function.
+	 **/
 	DE_MANUAL_BREAK   = 4
 };
 
-// display the error dialog. shows <description> along with a stack trace.
-// context and skip are as with debug_dump_stack.
-// flags: see DisplayErrorFlags. file and line indicate where the error
-// occurred and are typically passed as __FILE__, __LINE__.
+/**
+ * display an error dialog with a message and stack trace.
+ *
+ * @param description text to show.
+ * @param flags: see DisplayErrorFlags.
+ * @param context, skip: see debug_dump_stack.
+ * @param file, line: location of the error (typically passed as
+ * __FILE__, __LINE__ from a macro)
+ * @return ErrorReaction (user's choice: continue running or stop?)
+ **/
 extern ErrorReaction debug_display_error(const wchar_t* description,
 	int flags, uint skip, void* context, const char* file, int line);
 
-// convenience version, in case the advanced parameters aren't needed.
-// macro instead of providing overload/default values for C compatibility.
+/**
+ * convenience version, in case the advanced parameters aren't needed.
+ * macro instead of providing overload/default values for C compatibility.
+ **/
 #define DISPLAY_ERROR(text) debug_display_error(text, 0, 0,0, __FILE__,__LINE__)
 
 
 //
 // filtering
 //
 
-// debug output is very useful, but "too much of a good thing can kill you".
-// we don't want to require different LOGn() macros that are enabled
-// depending on "debug level", because changing that entails lengthy
-// compiles and it's too coarse-grained. instead, we require all
-// strings to start with "tag_string:" (exact case and no quotes;
-// the alphanumeric-only <tag_string> identifies output type).
-// they are then subject to filtering: only if the tag has been
-// "added" via debug_filter_add is the appendant string displayed.
-//
-// this approach is easiest to implement and is fine because we control
-// all logging code. LIMODS falls from consideration since it's not
-// portable and too complex.
-//
-// notes:
-// - filter changes only affect subsequent debug_*printf calls;
-//   output that didn't pass the filter is permanently discarded.
-// - strings not starting with a tag are always displayed.
-// - debug_filter_* can be called at any time and from the debugger.
-
-// in future, allow output with the given tag to proceed.
-// no effect if already added.
+/**
+ * debug output is very useful, but "too much of a good thing can kill you".
+ * we don't want to require different LOGn() macros that are enabled
+ * depending on "debug level", because changing that entails lengthy
+ * compiles and it's too coarse-grained. instead, we require all
+ * strings to start with "tag_string:" (exact case and no quotes;
+ * the alphanumeric-only <tag_string> identifies output type).
+ * they are then subject to filtering: only if the tag has been
+ * "added" via debug_filter_add is the appendant string displayed.
+ *
+ * this approach is easiest to implement and is fine because we control
+ * all logging code. LIMODS falls from consideration since it's not
+ * portable and too complex.
+ *
+ * notes:
+ * - filter changes only affect subsequent debug_*printf calls;
+ *   output that didn't pass the filter is permanently discarded.
+ * - strings not starting with a tag are always displayed.
+ * - debug_filter_* can be called at any time and from the debugger.
+
+ * in future, allow output with the given tag to proceed.
+ * no effect if already added.
+ **/
 extern void debug_filter_add(const char* tag);
-// in future, discard output with the given tag.
-// no effect if not currently added.
+
+/**
+ * in future, discard output with the given tag.
+ * no effect if not currently added.
+ **/
 extern void debug_filter_remove(const char* tag);
-// clear all filter state; equivalent to debug_filter_remove for
-// each tag that was debug_filter_add-ed.
+
+/**
+ * clear all filter state; equivalent to debug_filter_remove for
+ * each tag that was debug_filter_add-ed.
+ **/
 extern void debug_filter_clear();
 
 
-// write to memory buffer (fast)
-// used for "last activity" reporting in the crashlog.
+/**
+ * write to memory buffer (fast)
+ * used for "last activity" reporting in the crashlog.
+ *
+ * @param format string and varags; see printf.
+ **/
 extern void debug_wprintf_mem(const wchar_t* fmt, ...);
 
-// write all logs and <text> out to crashlog.txt (unicode format).
+/**
+ * write all logs and <text> out to crashlog.txt (unicode format).
+ **/
 extern LibError debug_write_crashlog(const wchar_t* text);
 
 
 //-----------------------------------------------------------------------------
 // breakpoints
 //-----------------------------------------------------------------------------
 
-// trigger a breakpoint when reached/"called".
-// defined as a macro by the platform-specific header above; this allows
-// breaking directly into the target function, instead of one frame
-// below it as with a conventional call-based implementation.
-//#define debug_break()
-
-
-// sometimes mmgr's 'fences' (making sure padding before and after the
-// allocation remains intact) aren't enough to catch hard-to-find
-// memory corruption bugs. another tool is to trigger a debug exception
-// when the later to be corrupted variable is accessed; the problem should
-// then become apparent.
-// the VC++ IDE provides such 'breakpoints', but can only detect write access.
-// additionally, it can't resolve symbols in Release mode (where this would
-// be most useful), so we provide a breakpoint API.
+/**
+ * trigger a breakpoint when reached/"called".
+ * defined as a macro by the platform-specific header above; this allows
+ * breaking directly into the target function, instead of one frame
+ * below it as with a conventional call-based implementation.
+ **/
+//#define debug_break()	// not defined here; see above
+
 
-// (values chosen to match IA-32 bit defs, so compiler can optimize.
-// this isn't required, it'll work regardless.)
+/**
+ * sometimes mmgr's 'fences' (making sure padding before and after the
+ * allocation remains intact) aren't enough to catch hard-to-find
+ * memory corruption bugs. another tool is to trigger a debug exception
+ * when the later to be corrupted variable is accessed; the problem should
+ * then become apparent.
+ * the VC++ IDE provides such 'breakpoints', but can only detect write access.
+ * additionally, it can't resolve symbols in Release mode (where this would
+ * be most useful), so we provide a breakpoint API.
+
+ * (values chosen to match IA-32 bit defs, so compiler can optimize.
+ * this isn't required; it'll work regardless.)
+ **/
 enum DbgBreakType
 {
-	DBG_BREAK_CODE       = 0,	// execute
-	DBG_BREAK_DATA_WRITE = 1,	// write
-	DBG_BREAK_DATA       = 3	// read or write
+	DBG_BREAK_CODE       = 0,	/// execute
+	DBG_BREAK_DATA_WRITE = 1,	/// write
+	DBG_BREAK_DATA       = 3	/// read or write
 };
 
-// arrange for a debug exception to be raised when <addr> is accessed
-// according to <type>.
-// for simplicity, the length (range of bytes to be checked) is derived
-// from addr's alignment, and is typically 1 machine word.
-// breakpoints are a limited resource (4 on IA-32); if none are
-// available, we return ERR_LIMIT.
+/**
+ * arrange for a debug exception to be raised when the
+ * indicated memory is accessed.
+ *
+ * @param addr memory address
+ * for simplicity, the length (range of bytes to be checked) is derived
+ * from addr's alignment, and is typically 1 machine word.
+ * @param type the type of access to watch for (see DbgBreakType)
+ * @return LibError; ERR_LIMIT if no more breakpoints are available
+ * (they are a limited resource - only 4 on IA-32).
+ **/
 extern LibError debug_set_break(void* addr, DbgBreakType type);
 
-// remove all breakpoints that were set by debug_set_break.
-// important, since these are a limited resource.
+/**
+ * remove all breakpoints that were set by debug_set_break.
+ * important, since these are a limited resource.
+ **/
 extern LibError debug_remove_all_breaks();
 
 
 //-----------------------------------------------------------------------------
 // symbol access
 //-----------------------------------------------------------------------------
 
-// maximum number of characters (including trailing \0) written to
-// user's buffers by debug_resolve_symbol.
+/**
+ * maximum number of characters (including trailing \0) written to
+ * user's buffers by debug_resolve_symbol.
+ **/
 const size_t DBG_SYMBOL_LEN = 1000;
 const size_t DBG_FILE_LEN = 100;
 
-// read and return symbol information for the given address. all of the
-// output parameters are optional; we pass back as much information as is
-// available and desired. return 0 iff any information was successfully
-// retrieved and stored.
-// sym_name and file must hold at least the number of chars above;
-// file is the base name only, not path (see rationale in wdbg_sym).
-// the PDB implementation is rather slow (~500us).
+/**
+ * read and return symbol information for the given address.
+ *
+ * NOTE: the PDB implementation is rather slow (~500us).
+ *
+ * @param ptr_of_interest address of symbol (e.g. function, variable)
+ * @param sym_name optional out; size >= DBG_SYMBOL_LEN chars;
+ * receives symbol name returned via debug info.
+ * @param file optional out; size >= DBG_FILE_LEN chars; receives
+ * base name only (no path; see rationale in wdbg_sym) of
+ * source file containing the symbol.
+ * @param line optional out; receives source file line number of symbol.
+ *
+ * note: all of the output parameters are optional; we pass back as much
+ * information as is available and desired.
+ * @return LibError; ERR_OK iff any information was successfully
+ * retrieved and stored.
+ **/
 extern LibError debug_resolve_symbol(void* ptr_of_interest, char* sym_name, char* file, int* line);
 
-// write a complete stack trace (including values of local variables) into
-// the specified buffer. if <context> is nonzero, it is assumed to be a
-// platform-specific representation of execution state (e.g. Win32 CONTEXT)
-// and tracing starts there; this is useful for exceptions.
-// otherwise, tracing starts at the current stack position, and the given
-// number of stack frames (i.e. functions) above the caller are skipped.
-// this prevents functions like debug_assert_failed from
-// cluttering up the trace. returns the buffer for convenience.
+/**
+ * write a complete stack trace (including values of local variables) into
+ * the specified buffer.
+ *
+ * @param buf target buffer
+ * @param max_chars of buffer (should be several thousand)
+ * @param skip number of stack frames (i.e. functions on call stack) to skip.
+ * this prevents error-reporting functions like debug_assert_failed from
+ * cluttering up the trace.
+ * @param context platform-specific representation of execution state
+ * (e.g. Win32 CONTEXT). if not NULL, tracing starts there; this is useful
+ * for exceptions. otherwise, tracing starts from the current call stack.
+ * @return buf for convenience; writes an error string into it if
+ * something goes wrong.
+ **/
 extern const wchar_t* debug_dump_stack(wchar_t* buf, size_t max_chars, uint skip, void* context);
 
 
 //-----------------------------------------------------------------------------
 // helper functions (used by implementation)
 //-----------------------------------------------------------------------------
 
-// [system-dependent] write a string to the debug channel.
-// this can be quite slow (~1 ms)! On Windows, it uses OutputDebugString
-// (entails context switch), otherwise stdout+fflush (waits for IO).
+/**
+ * [system-dependent] write a string to the debug channel.
+ * this can be quite slow (~1 ms)! On Windows, it uses OutputDebugString
+ * (entails context switch), otherwise stdout+fflush (waits for IO).
+ **/
 extern void debug_puts(const char* text);
 
-// abstraction of all STL iterators used by debug_stl.
+/// abstraction of all STL iterators used by debug_stl.
 typedef const u8* (*DebugIterator)(void* internal, size_t el_size);
 
-// return address of the Nth function on the call stack.
-// if <context> is nonzero, it is assumed to be a platform-specific
-// representation of execution state (e.g. Win32 CONTEXT) and tracing
-// starts there; this is useful for exceptions.
-// otherwise, tracing starts at the current stack position, and the given
-// number of stack frames (i.e. functions) above the caller are skipped.
-// used by mmgr to determine what function requested each allocation;
-// this is fast enough to allow that.
+/**
+ * return address of the Nth function on the call stack.
+ *
+ * used by mmgr to determine what function requested each allocation;
+ * this is fast enough to allow that.
+ *
+ * @param skip number of stack frames (i.e. functions on call stack) to skip.
+ * @param context platform-specific representation of execution state
+ * (e.g. Win32 CONTEXT). if not NULL, tracing starts there; this is useful
+ * for exceptions. otherwise, tracing starts from the current call stack.
+ * @return address of Nth function
+ **/
 extern void* debug_get_nth_caller(uint skip, void* context);
 
-// return 1 if the pointer appears to be totally bogus, otherwise 0.
-// this check is not authoritative (the pointer may be "valid" but incorrect)
-// but can be used to filter out obviously wrong values in a portable manner.
+/**
+ * check if a pointer appears to be totally invalid.
+ *
+ * this check is not authoritative (the pointer may be "valid" but incorrect)
+ * but can be used to filter out obviously wrong values in a portable manner.
+ *
+ * @param p pointer
+ * @return 1 if totally bogus, otherwise 0.
+ **/
 extern int debug_is_pointer_bogus(const void* p);
 
+/// does the given pointer appear to point to code?
 extern bool debug_is_code_ptr(void* p);
+
+/// does the given pointer appear to point to the stack?
 extern bool debug_is_stack_ptr(void* p);
 
 
-// set the current thread's name; it will be returned by subsequent calls to
-// debug_get_thread_name.
-//
-// the string pointed to by <name> MUST remain valid throughout the
-// entire program; best to pass a string literal. allocating a copy
-// would be quite a bit more work due to cleanup issues.
-//
-// if supported on this platform, the debugger is notified of the new name;
-// it will be displayed there instead of just the handle.
+/**
+ * set the current thread's name; it will be returned by subsequent calls to
+ * debug_get_thread_name.
+ *
+ * if supported on this platform, the debugger is notified of the new name;
+ * it will be displayed there instead of just the handle.
+ *
+ * @param name identifier string for thread. MUST remain valid throughout
+ * the entire program; best to pass a string literal. allocating a copy
+ * would be quite a bit more work due to cleanup issues.
+ **/
 extern void debug_set_thread_name(const char* name);
 
-// return the pointer assigned by debug_set_thread_name or 0 if
-// that hasn't been done yet for this thread.
+/**
+ * return current thread's name.
+ *
+ * @return thread name, or NULL if one hasn't been assigned yet
+ * via debug_set_thread_name.
+ **/
 extern const char* debug_get_thread_name();
 
 
-// call at exit to avoid leaks (not strictly necessary).
+/**
+ * call at exit to avoid some leaks.
+ * not strictly necessary.
+ **/
 extern void debug_shutdown();
 
 #endif	// #ifndef DEBUG_H_INCLUDED
Index: ps/trunk/source/lib/self_test.h
===================================================================
--- ps/trunk/source/lib/self_test.h	(revision 3910)
+++ ps/trunk/source/lib/self_test.h	(revision 3911)
@@ -1,175 +1,178 @@
 /**
  * =========================================================================
  * File        : self_test.h
  * Project     : 0 A.D.
  * Description : helpers for built-in self tests
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 /*
 
 [KEEP IN SYNC WITH WIKI!]
 
 Introduction
 ------------
 
 Self-tests as advocated by eXtreme Programming have proven to be useful.
 By embedding test code into modules, we can be confident that boundary
 cases are handled correctly and everything still works after edits.
 We give guidelines for their use and explain several helper mechanisms below.
 
 
 Guidelines
 ----------
 
 What makes a good self-test?
 - They belong in the module being tested to ensure they are kept in
   sync with it.
 - It is easiest to attach them to low-level functions, e.g. ilog2, rather
   than verifying the module's final result (e.g. checking renderer output by
   comparing pictures).
 - You should cover all cases: expected failures ("does it fail as expected?"),
   bad inputs ("does it reject those?"), and successes ("did it have the
   expected result?").
 - Tests should be non-intrusive (only bother user if something fails) and
   very quick. This is because they are executed every program run - which
   is a good thing because it solves the common problem of forgetting to
   run them after a change.
 
   If the test is unavoidably slow or annoying (example: wdbg_sym's
   stack trace), then best to disable it by default; see below for how.
   It can then be enabled manually after changes, and that is better than
   no test at all.
 
 
 Example Usage
 -------------
 
 The following is a working example of a built-in self test using
 our facilities. Further notes below are referenced with (1) etc.
 
 >>>
 
 #if SELF_TEST_ENABLED							// (1)
 namespace test {								// (2)
 
 static void test_log2()
 {
 	TEST(ilog2(0) == -1);						// (3)
 	// further test cases..
 }
 
 static void self_test()
 {
 	test_log2();
 	// further test groups..
 }
 
 SELF_TEST_RUN;									// (4)
 
 }	// namespace test
 #endif	// #if SELF_TEST_ENABLED
 
 <<<
 
 (1) when not enabled, self-tests are completely removed so as
     not to bloat the executable. for details on how to enable/disable them
 	globally or override in single files, see below.
 
 (2) wrapping in a namespace is optional and must be removed for C programs.
     it avoids possible name collisions with the module being tested.
 
 (3) TEST *must* be used instead of debug_assert et al.! this is
     explained below.
 
 (4) automatically calls your self_test function at non-local static object
     init time (i.e. before main is entered).
 
 For further details, see below.
 
 */
 
 #ifndef SELF_TEST_H__
 #define SELF_TEST_H__
 
 // a self test is enabled if at the point of its definition
 // SELF_TEST_ENABLED evaluates to 1 (non-zero).
 // the value #defined below is the global default. you can override it
 // in individual files by defining to 0 or 1 before including this header.
 #ifndef SELF_TEST_ENABLED
 #define SELF_TEST_ENABLED 1
 #endif
 
 // each test case should use this (instead of assert et al.) to verify
 // conditions.
 // rationale: some code checks boundary conditions via assert. these are
 // often triggered deliberately in self-tests to verify error behavior.
 // we therefore squelch asserts while tests are active (see mechanism below),
 // and this is the only error reporter guaranteed to work.
 //
 // note: could also stringize condition and display that, but it'd require
 // macro magic (stringize+prepend L) and we already display file+line.
 #define TEST(condition) STMT(\
 	if(!(condition))\
 		DISPLAY_ERROR(L"Self-test failed");\
 )
 
 
 // your source file should contain a function: void self_test(void) that
 // performs all tests or calls out to individual test functions.
 // this macro calls it at static init time and takes care of setting
 // self_test_active (see above).
 //
 // rationale: since compiler optimizations may mess with the dummy variable,
 // best to put this in a macro so we won't have to change each occurrence.
 #define SELF_TEST_RUN\
 	static int dummy = self_test_run(self_test)
 
 // calling at static init time may not always be desirable - some
 // self-tests may require initialization beforehand. this mechanism allows
 // registering self tests automatically, which are then all run when you
 // call self_test_run_all.
 #define SELF_TEST_REGISTER\
 	static SelfTestRecord self_test_record = { self_test, 0 };\
 	static int dummy = self_test_register(&self_test_record)
 
 struct SelfTestRecord
 {
 	void(*func)();
 	const SelfTestRecord* next;
 };
 
 // call all self-tests registered thus far. rationale: see above.
 // also displays a banner+elapsed time via debug_printf.
 extern void self_test_run_all();
 
 
 //
 // internal use only:
 //
 
 // trampoline that sets self_test_active and returns a dummy value;
 // used by SELF_TEST_RUN.
 extern int self_test_run(void(*func)());
 
 extern int self_test_register(SelfTestRecord* r);
 
 // checked by debug_assert_failed; disables asserts if true (see above).
 // set/cleared by run_self_test.
 extern bool self_test_active;
 
+#define TS_ASSERT_OK(expr) TS_ASSERT_EQUAL((expr), ERR_OK)
+#define TS_ASSERT_STR_EQUAL(str1, str2) TS_ASSERT(!strcmp((str1), (str2)))
+
 #endif	// #ifndef SELF_TEST_H__
Index: ps/trunk/source/lib/lockfree.h
===================================================================
--- ps/trunk/source/lib/lockfree.h	(revision 3910)
+++ ps/trunk/source/lib/lockfree.h	(revision 3911)
@@ -1,166 +1,166 @@
 /**
  * =========================================================================
  * File        : lockfree.h
  * Project     : 0 A.D.
  * Description : lock-free synchronized data structures.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #ifndef LOCKFREE_H__
 #define LOCKFREE_H__
 
 #include "posix_types.h"	// uintptr_t
 
 /*
 
 [KEEP IN SYNC WITH WIKI]
 
 overview
 --------
 
 this module provides several implicitly thread-safe data structures.
 rather than allowing only one thread to access them at a time, their
 operations are carefully implemented such that they take effect in
 one atomic step. data consistency problems are thus avoided.
 this novel approach to synchronization has several advantages:
 - deadlocks are impossible;
 - overhead due to OS kernel entry is avoided;
 - graceful scaling to multiple processors is ensured.
 
 
 mechanism
 ---------
 
 the basic primitive that makes this possible is "compare and swap",
 a CPU instruction that performs both steps atomically. it compares a
 machine word against the expected value; if equal, the new value is
 written and an indication returned. otherwise, another thread must have
 been writing to the same location; the operation is typically retried.
 
 this instruction is available on all modern architectures; in some cases,
 emulation in terms of an alternate primitive (LL/SC) is necessary.
 
 
 memory management
 -----------------
 
 one major remaining problem is how to free no longer needed nodes in the
 data structure. in general, we want to reclaim their memory for arbitrary use;
 this isn't safe as long as other threads are still accessing them.
 
 the RCU algorithm recognizes that all CPUs having entered a quiescent
 state means that no threads are still referencing data.
 lacking such kernel support, we use a similar mechanism - "hazard pointers"
 are set before accessing data; only if none are pointing to a node can it
 be freed. until then, they are stored in a per-thread 'waiting list'.
 
 this approach has several advantages over previous algorithms
 (typically involving reference count): the CAS primitive need only
 operate on single machine words, and space/time overhead is much reduced.
 
 
 usage notes
 -----------
 
 useful "payload" in the data structures is allocated when inserting each
 item: additional_bytes are appended. rationale: see struct Node definition.
 
 since lock-free algorithms are subtle and easy to get wrong, an extensive
-self-test is included; #define SELF_TEST_ENABLED 1 to activate.
+self-test is included.
 
 
 terminology
 -----------
 
 "atomic" means indivisible; in this case, other CPUs cannot
   interfere with such an operation.
 "race conditions" are potential data consistency
   problems resulting from lack of thread synchronization.
 "deadlock" is a state where several threads are waiting on
   one another and no progress is possible.
 "thread-safety" is understood to mean the
   preceding two problems do not occur.
 "scalability" is a measure of how efficient synchronization is;
   overhead should not increase significantly with more processors.
 "linearization point" denotes the time at which an external
   observer believes a lock-free operation to have taken effect.
 
 */
 
 
 //
 // lock-free singly linked list
 //
 
 struct LFList
 {
 	void* head;
 };
 
 // make ready a previously unused(!) list object. if a negative error
 // code (currently only ERR_NO_MEM) is returned, the list can't be used.
 extern LibError lfl_init(LFList* list);
 
 // call when list is no longer needed; should no longer hold any references.
 extern void lfl_free(LFList* list);
 
 // return pointer to "user data" attached to <key>,
 // or 0 if not found in the list.
 extern void* lfl_find(LFList* list, void* key);
 
 // insert into list in order of increasing key. ensures items are unique
 // by first checking if already in the list. returns 0 if out of memory,
 // otherwise a pointer to "user data" attached to <key>. the optional
 // <was_inserted> return variable indicates whether <key> was added.
 extern void* lfl_insert(LFList* list, void* key, size_t additional_bytes, int* was_inserted);
 
 // remove from list; return -1 if not found, or 0 on success.
 extern LibError lfl_erase(LFList* list, void* key);
 
 
 //
 // lock-free hash table (chained, fixed size)
 //
 
 struct LFHash
 {
 	LFList* tbl;
 	uint mask;
 };
 
 // make ready a previously unused(!) hash object. table size will be
 // <num_entries>; this cannot currently be expanded. if a negative error
 // code (currently only ERR_NO_MEM) is returned, the hash can't be used.
 extern LibError lfh_init(LFHash* hash, size_t num_entries);
 
 // call when hash is no longer needed; should no longer hold any references.
 extern void lfh_free(LFHash* hash);
 
 // return pointer to "user data" attached to <key>,
 // or 0 if not found in the hash.
 extern void* lfh_find(LFHash* hash, uintptr_t key);
 
 // insert into hash if not already present. returns 0 if out of memory,
 // otherwise a pointer to "user data" attached to <key>. the optional
 // <was_inserted> return variable indicates whether <key> was added.
 extern void* lfh_insert(LFHash* hash, uintptr_t key, size_t additional_bytes, int* was_inserted);
 
 // remove from hash; return -1 if not found, or 0 on success.
 extern LibError lfh_erase(LFHash* hash, uintptr_t key);
 
 
 #endif	// #ifndef LOCKFREE_H__
Index: ps/trunk/source/lib/adts.h
===================================================================
--- ps/trunk/source/lib/adts.h	(revision 3910)
+++ ps/trunk/source/lib/adts.h	(revision 3911)
@@ -1,1464 +1,1310 @@
 /**
  * =========================================================================
  * File        : adts.h
  * Project     : 0 A.D.
  * Description : useful Abstract Data Types not provided by STL.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #ifndef ADTS_H__
 #define ADTS_H__
 
 #include "lib.h"
 
 #include <cfloat>
 #include <cassert>
 
 #include <list>
 #include <map>
 #include <queue>
 
 
 template<typename Key, typename T> class DHT_Traits
 {
 public:
 	static const size_t initial_entries = 16;
 	size_t hash(Key key) const;
 	bool equal(Key k1, Key k2) const;
 	Key get_key(T t) const;
 };
 
 template<> class DHT_Traits<const char*, const char*>
 {
 public:
 	static const size_t initial_entries = 512;
 	size_t hash(const char* key) const
 	{
 		return (size_t)fnv_lc_hash(key);
 	}
 	bool equal(const char* k1, const char* k2) const
 	{
 		return !strcmp(k1, k2);
 	}
 	const char* get_key(const char* t) const
 	{
 		return t;
 	}
 };
 
 
 
 // intended for pointer types
 template<typename Key, typename T, typename Traits=DHT_Traits<Key,T> >
 class DynHashTbl
 {
 	T* tbl;
 	u16 num_entries;
 	u16 max_entries;	// when initialized, = 2**n for faster modulo
 	Traits tr;
 
 	T& get_slot(Key key) const
 	{
 		size_t hash = tr.hash(key);
 		debug_assert(max_entries != 0);	// otherwise, mask will be incorrect
 		const uint mask = max_entries-1;
 		for(;;)
 		{
 			T& t = tbl[hash & mask];
 			// empty slot encountered => not found
 			if(!t)
 				return t;
 			// keys are actually equal => found it
 			if(tr.equal(key, tr.get_key(t)))
 				return t;
 			// keep going (linear probing)
 			hash++;
 		}
 	}
 
 	void expand_tbl()
 	{
 		// alloc a new table (but don't assign it to <tbl> unless successful)
 		T* old_tbl = tbl;
 		tbl = (T*)calloc(max_entries*2, sizeof(T));
 		if(!tbl)
 		{
 			tbl = old_tbl;
 			throw std::bad_alloc();
 		}
 
 		max_entries += max_entries;
 		// must be set before get_slot
 
 		// newly initialized, nothing to copy - done
 		if(!old_tbl)
 			return;
 
 		// re-hash from old table into the new one
 		for(size_t i = 0; i < max_entries/2u; i++)
 		{
 			T t = old_tbl[i];
 			if(t)
 				get_slot(tr.get_key(t)) = t;
 		}
 		free(old_tbl);
 	}
 
 
 public:
 
 	DynHashTbl()
 	{
 		tbl = 0;
 		num_entries = 0;
 		max_entries = tr.initial_entries/2;	// will be doubled in expand_tbl
 		debug_assert(is_pow2(max_entries));
 		expand_tbl();
 	}
 
 	~DynHashTbl()
 	{
 		clear();
 	}
 
 	void clear()
 	{
 		// note: users might call clear() right before the dtor runs,
 		// so safely handling calling this twice.
 		SAFE_FREE(tbl);
 		num_entries = 0;
 		// rationale: must not set to 0 because expand_tbl only doubles the size.
 		// don't keep the previous size because it may have become huge and
 		// there is no provision for shrinking.
 		max_entries = tr.initial_entries/2;	// will be doubled in expand_tbl
 	}
 
 	void insert(const Key key, const T t)
 	{
 		// more than 75% full - increase table size.
 		// do so before determining slot; this will invalidate previous pnodes.
 		if(num_entries*4 >= max_entries*3)
 			expand_tbl();
 
 		T& slot = get_slot(key);
 		debug_assert(slot == 0);	// not already present
 		slot = t;
 		num_entries++;
 	}
 
 	T find(Key key) const
 	{
 		return get_slot(key);
 	}
 
 	size_t size() const
 	{
 		return num_entries;
 	}
 
 
 	class iterator
 	{
 	public:
 		typedef std::forward_iterator_tag iterator_category;
 		typedef T value_type;
 		typedef ptrdiff_t difference_type;
 		typedef const T* pointer;
 		typedef const T& reference;
 
 		iterator()
 		{
 		}
 		iterator(T* pos_, T* end_) : pos(pos_), end(end_)
 		{
 		}
 		T& operator*() const
 		{
 			return *pos;
 		}
 		iterator& operator++()	// pre
 		{
 			do
 			pos++;
 			while(pos != end && *pos == 0);
 			return (*this);
 		}
 		bool operator==(const iterator& rhs) const
 		{
 			return pos == rhs.pos;
 		}
 		bool operator<(const iterator& rhs) const
 		{
 			return (pos < rhs.pos);
 		}
 
 		// derived
 		const T* operator->() const
 		{
 			return &**this;
 		}
 		bool operator!=(const iterator& rhs) const
 		{
 			return !(*this == rhs);
 		}
 		iterator operator++(int)	// post
 		{
 			iterator tmp =  *this; ++*this; return tmp;
 		}
 
 	protected:
 		T* pos;
 		T* end;
 		// only used when incrementing (avoid going beyond end of table)
 	};
 
 	iterator begin() const
 	{
 		T* pos = tbl;
 		while(pos != tbl+max_entries && *pos == 0)
 			pos++;
 		return iterator(pos, tbl+max_entries);
 	}
 	iterator end() const
 	{
 		return iterator(tbl+max_entries, 0);
 	}
 };
 
 
 //-----------------------------------------------------------------------------
 
 /*
 Cache for items of variable size and value/"cost".
 underlying displacement algorithm is pluggable; default is "Landlord".
 
 template reference:
 Entry provides size, cost, credit and credit_density().
   rationale:
   - made a template instead of exposing Cache::Entry because
     that would drag a lot of stuff out of Cache.
   - calculates its own density since that entails a Divider functor,
     which requires storage inside Entry.
 Entries is a collection with iterator and begin()/end() and
   "static Entry& entry_from_it(iterator)".
   rationale:
   - STL map has pair<key, item> as its value_type, so this
     function would return it->second. however, we want to support
     other container types (where we'd just return *it).
 Manager is a template parameterized on typename Key and class Entry.
   its interface is as follows:
 
 	// is the cache empty?
 	bool empty() const;
 
 	// add (key, entry) to cache.
 	void add(Key key, const Entry& entry);
 
 	// if the entry identified by <key> is not in cache, return false;
 	// otherwise return true and pass back a pointer to it.
 	bool find(Key key, const Entry** pentry) const;
 
 	// remove an entry from cache, which is assumed to exist!
 	// this makes sense because callers will typically first use find() to
 	// return info about the entry; this also checks if present.
 	void remove(Key key);
 
 	// mark <entry> as just accessed for purpose of cache management.
 	// it will tend to be kept in cache longer.
 	void on_access(Entry& entry);
 
 	// caller's intent is to remove the least valuable entry.
 	// in implementing this, you have the latitude to "shake loose"
 	// several entries (e.g. because their 'value' is equal).
 	// they must all be push_back-ed into the list; Cache will dole
 	// them out one at a time in FIFO order to callers.
 	//
 	// rationale:
 	// - it is necessary for callers to receive a copy of the
 	//   Entry being evicted - e.g. file_cache owns its items and
 	//   they must be passed back to allocator when evicted.
 	// - e.g. Landlord can potentially see several entries become
 	//   evictable in one call to remove_least_valuable. there are
 	//   several ways to deal with this:
 	//   1) generator interface: we return one of { empty, nevermind,
 	//      removed, remove-and-call-again }. this greatly complicates
 	//      the call site.
 	//   2) return immediately after finding an item to evict.
 	//      this changes cache behavior - entries stored at the
 	//      beginning would be charged more often (unfair).
 	//      resuming charging at the next entry doesn't work - this
 	//      would have to be flushed when adding, at which time there
 	//      is no provision for returning any items that may be evicted.
 	//   3) return list of all entries "shaken loose". this incurs
 	//      frequent mem allocs, which can be alleviated via suballocator.
 	//      note: an intrusive linked-list doesn't make sense because
 	//      entries to be returned need to be copied anyway (they are
 	//      removed from the manager's storage).
 	void remove_least_valuable(std::list<Entry>& entry_list)
 */
 
 
 //
 // functors to calculate minimum credit density (MCD)
 //
 
 // MCD is required for the Landlord algorithm's evict logic.
 // [Young02] calls it '\delta'.
 
 // scan over all entries and return MCD.
 template<class Entries> float ll_calc_min_credit_density(const Entries& entries)
 {
 	float min_credit_density = FLT_MAX;
 	for(typename Entries::const_iterator it = entries.begin(); it != entries.end(); ++it)
 	{
 		const float credit_density = Entries::entry_from_it(it).credit_density();
 		min_credit_density = fminf(min_credit_density, credit_density);
 	}
 	return min_credit_density;
 }
 
 // note: no warning is given that the MCD entry is being removed!
 // (reduces overhead in remove_least_valuable)
 // these functors must account for that themselves (e.g. by resetting
 // their state directly after returning MCD).
 
 // determine MCD by scanning over all entries.
 // tradeoff: O(N) time complexity, but all notify* calls are no-ops.
 template<class Entry, class Entries>
 class McdCalc_Naive
 {
 public:
 	void notify_added(const Entry&) const {}
 	void notify_decreased(const Entry&) const {}
 	void notify_impending_increase_or_remove(const Entry&) const {}
 	void notify_increased_or_removed(const Entry&) const {}
 	float operator()(const Entries& entries) const
 	{
 		const float mcd = ll_calc_min_credit_density(entries);
 		return mcd;
 	}
 };
 
 // cache previous MCD and update it incrementally (when possible).
 // tradeoff: amortized O(1) time complexity, but notify* calls must
 // perform work whenever something in the cache changes.
 template<class Entry, class Entries>
 class McdCalc_Cached
 {
 public:
 	McdCalc_Cached() : min_credit_density(FLT_MAX), min_valid(false) {}
 
 	void notify_added(const Entry& entry)
 	{
 		// when adding a new item, the minimum credit density can only
 		// decrease or remain the same; acting as if entry's credit had
 		// been decreased covers both cases.
 		notify_decreased(entry);
 	}
 
 	void notify_decreased(const Entry& entry)
 	{
 		min_credit_density = MIN(min_credit_density, entry.credit_density());
 	}
 
 	void notify_impending_increase_or_remove(const Entry& entry)
 	{
 		// remember if this entry had the smallest density
 		is_min_entry = feq(min_credit_density, entry.credit_density());
 	}
 
 	void notify_increased_or_removed(const Entry& UNUSED(entry))
 	{
 		// .. it did and was increased or removed. we must invalidate
 		// MCD and recalculate it next time.
 		if(is_min_entry)
 		{
 			min_valid = false;
 			min_credit_density = -1.0f;
 		}
 	}
 
 	float operator()(const Entries& entries)
 	{
 		if(min_valid)
 		{
 			// the entry that has MCD will be removed anyway by caller;
 			// we need to invalidate here because they don't call
 			// notify_increased_or_removed.
 			min_valid = false;
 			return min_credit_density;
 		}
 
 		// this is somewhat counterintuitive. since we're calculating
 		// MCD directly, why not mark our cached version of it valid
 		// afterwards? reason is that our caller will remove the entry with
 		// MCD, so it'll be invalidated anyway.
 		// instead, our intent is to calculate MCD for the *next time*.
 		const float ret = ll_calc_min_credit_density(entries);
 		min_valid = true;
 		min_credit_density = FLT_MAX;
 		return ret;
 	}
 
 private:
 	float min_credit_density;
 	bool min_valid;
 
 	// temporary flag set by notify_impending_increase_or_remove
 	bool is_min_entry;
 };
 
 
 //
 // Landlord cache management policy: see [Young02].
 //
 
 // in short, each entry has credit initially set to cost. when wanting to
 // remove an item, all are charged according to MCD and their size;
 // entries are evicted if their credit is exhausted. accessing an entry
 // restores "some" of its credit.
 template<typename Key, typename Entry, template<class Entry, class Entries> class McdCalc = McdCalc_Cached>
 class Landlord
 {
 public:
 	bool empty() const
 	{
 		return map.empty();
 	}
 
 	void add(Key key, const Entry& entry)
 	{
 		// adapter for add_ (which returns an iterator)
 		(void)add_(key, entry);
 	}
 
 	bool find(Key key, const Entry** pentry) const
 	{
 		MapCIt it = map.find(key);
 		if(it == map.end())
 			return false;
 		*pentry = &it->second;
 		return true;
 	}
 
 	void remove(Key key)
 	{
 		MapIt it = map.find(key);
 		debug_assert(it != map.end());
 		remove_(it);
 	}
 
 	void on_access(Entry& entry)
 	{
 		mcd_calc.notify_impending_increase_or_remove(entry);
 
 		// Landlord algorithm calls for credit to be reset to anything
 		// between its current value and the cost.
 		const float gain = 0.75f;	// restore most credit
 		entry.credit = gain*entry.cost + (1.0f-gain)*entry.credit;
 
 		mcd_calc.notify_increased_or_removed(entry);
 	}
 
 	void remove_least_valuable(std::list<Entry>& entry_list)
 	{
 		// we are required to evict at least one entry. one iteration
 		// ought to suffice, due to definition of min_credit_density and
 		// tolerance; however, we provide for repeating if necessary.
 again:
 
 		// messing with this (e.g. raising if tiny) would result in
 		// different evictions than Landlord_Lazy, which is unacceptable.
 		// nor is doing so necessary: if mcd is tiny, so is credit.
 		const float min_credit_density = mcd_calc(map);
 		debug_assert(min_credit_density > 0.0f);
 
 		for(MapIt it = map.begin(); it != map.end();)	// no ++it
 		{
 			Entry& entry = it->second;
 
 			charge(entry, min_credit_density);
 			if(should_evict(entry))
 			{
 				entry_list.push_back(entry);
 
 				// annoying: we have to increment <it> before erasing
 				MapIt it_to_remove = it++;
 				map.erase(it_to_remove);
 			}
 			else
 			{
 				mcd_calc.notify_decreased(entry);
 				++it;
 			}
 		}
 
 		if(entry_list.empty())
 			goto again;
 	}
 
 protected:
 	// note: use hash_map instead of map for better locality
 	// (relevant when iterating over all items in remove_least_valuable)
 	class Map : public STL_HASH_MAP<Key, Entry>
 	{
 	public:
 		static Entry& entry_from_it(typename Map::iterator it) { return it->second; }
 		static const Entry& entry_from_it(typename Map::const_iterator it) { return it->second; }
 	};
 	typedef typename Map::iterator MapIt;
 	typedef typename Map::const_iterator MapCIt;
 	Map map;
 
 	// add entry and return iterator pointing to it.
 	MapIt add_(Key key, const Entry& entry)
 	{
 		typedef std::pair<MapIt, bool> PairIB;
 		typename Map::value_type val = std::make_pair(key, entry);
 		PairIB ret = map.insert(val);
 		debug_assert(ret.second);	// must not already be in map
 
 		mcd_calc.notify_added(entry);
 
 		return ret.first;
 	}
 
 	// remove entry (given by iterator) directly.
 	void remove_(MapIt it)
 	{
 		const Entry& entry = it->second;
 		mcd_calc.notify_impending_increase_or_remove(entry);
 		mcd_calc.notify_increased_or_removed(entry);
 		map.erase(it);
 	}
 
 	void charge(Entry& entry, float delta)
 	{
 		entry.credit -= delta * entry.size;
 
 		// don't worry about entry.size being 0 - if so, cost
 		// should also be 0, so credit will already be 0 anyway.
 	}
 
 	// for each entry, 'charge' it (i.e. reduce credit by) delta * its size.
 	// delta is typically MCD (see above); however, several such updates
 	// may be lumped together to save time. Landlord_Lazy does this.
 	void charge_all(float delta)
 	{
 		for(MapIt it = map.begin(); it != map.end(); ++it)
 		{
 			Entry& entry = it->second;
 			entry.credit -= delta * entry.size;
 			if(!should_evict(entry))
 				mcd_calc.notify_decreased(entry);
 		}
 	}
 
 	// is entry's credit exhausted? if so, it should be evicted.
 	bool should_evict(const Entry& entry)
 	{
 		// we need a bit of leeway because density calculations may not
 		// be exact. choose value carefully: must not be high enough to
 		// trigger false positives.
 		return entry.credit < 0.0001f;
 	}
 
 private:
 	McdCalc<Entry, Map> mcd_calc;
 };
 
 // Cache manger policies. (these are partial specializations of Landlord,
 // adapting it to the template params required by Cache)
 template<class Key, class Entry> class Landlord_Naive : public Landlord<Key, Entry, McdCalc_Naive> {};
 template<class Key, class Entry> class Landlord_Cached: public Landlord<Key, Entry, McdCalc_Cached> {};
 
 // variant of Landlord that adds a priority queue to directly determine
 // which entry to evict. this allows lumping several charge operations
 // together and thus reduces iteration over all entries.
 // tradeoff: O(logN) removal (instead of N), but additional O(N) storage.
 template<typename Key, class Entry>
 class Landlord_Lazy : public Landlord_Naive<Key, Entry>
 {
 	typedef typename Landlord_Naive<Key, Entry>::Map Map;
 	typedef typename Landlord_Naive<Key, Entry>::MapIt MapIt;
 	typedef typename Landlord_Naive<Key, Entry>::MapCIt MapCIt;
 
 public:
 	Landlord_Lazy() { pending_delta = 0.0f; }
 
 	void add(Key key, const Entry& entry)
 	{
 		// we must apply pending_delta now - otherwise, the existing delta
 		// would later be applied to this newly added item (incorrect).
 		commit_pending_delta();
 
 		MapIt it = Parent::add_(key, entry);
 		pri_q.push(it);
 	}
 
 	void remove(Key key)
 	{
 		Parent::remove(key);
 
 		// reconstruct pri_q from current map. this is slow (N*logN) and
 		// could definitely be done better, but we don't bother since
 		// remove is a very rare operation (e.g. invalidating entries).
 		while(!pri_q.empty())
 			pri_q.pop();
 		for(MapCIt it = this->map.begin(); it != this->map.end(); ++it)
 			pri_q.push(it);
 	}
 
 	void on_access(Entry& entry)
 	{
 		Parent::on_access(entry);
 
 		// entry's credit was changed. we now need to reshuffle the
 		// pri queue to reflect this.
 		pri_q.ensure_heap_order();
 	}
 
 	void remove_least_valuable(std::list<Entry>& entry_list)
 	{
 		MapIt least_valuable_it = pri_q.top(); pri_q.pop();
 		Entry& entry = Map::entry_from_it(least_valuable_it);
 
 		entry_list.push_back(entry);
 
 		// add to pending_delta the MCD that would have resulted
 		// if removing least_valuable_it normally.
 		// first, calculate actual credit (i.e. apply pending_delta to
 		// this entry); then add the resulting density to pending_delta.
 		entry.credit -= pending_delta*entry.size;
 		const float credit_density = entry.credit_density();
 		debug_assert(credit_density > 0.0f);
 		pending_delta += credit_density;
 
 		Parent::remove_(least_valuable_it);
 	}
 
 private:
 	typedef Landlord_Naive<Key, Entry> Parent;
 
 	// sort iterators by credit_density of the Entry they reference.
 	struct CD_greater
 	{
 		bool operator()(MapIt it1, MapIt it2) const
 		{
 			return Map::entry_from_it(it1).credit_density() >
 			       Map::entry_from_it(it2).credit_density();
 		}
 	};
 	// wrapper on top of priority_queue that allows 'heap re-sift'
 	// (see on_access).
 	// notes:
 	// - greater comparator makes pri_q.top() the one with
 	//   LEAST credit_density, which is what we want.
 	// - deriving from an STL container is a bit dirty, but we need this
 	//   to get at the underlying data (priority_queue interface is not
 	//   very capable).
 	class PriQ: public std::priority_queue<MapIt, std::vector<MapIt>, CD_greater>
 	{
 	public:
 		void ensure_heap_order()
 		{
 			// TODO: this is actually N*logN - ouch! that explains high
 			// CPU cost in profile. this is called after only 1 item has
 			// changed, so a logN "sift" operation ought to suffice.
 			// that's not supported by the STL heap functions, so we'd
 			// need a better implementation. pending..
 			std::make_heap(this->c.begin(), this->c.end(), this->comp);
 		}
 	};
 	PriQ pri_q;
 
 	// delta values that have accumulated over several
 	// remove_least_valuable() calls. applied during add().
 	float pending_delta;
 
 	void commit_pending_delta()
 	{
 		if(pending_delta > 0.0f)
 		{
 			this->charge_all(pending_delta);
 			pending_delta = 0.0f;
 
 			// we've changed entry credit, so the heap order *may* have been
 			// violated; reorder the pri queue. (I don't think so,
 			// due to definition of delta, but we'll play it safe)
 			pri_q.ensure_heap_order();
 		}
 	}
 };
 
 
 //
 // functor that implements division of first arg by second
 //
 
 // this is used to calculate credit_density(); performance matters
 // because this is called for each entry during each remove operation.
 
 // floating-point division (fairly slow)
 class Divider_Naive
 {
 public:
 	Divider_Naive() {}	// needed for default CacheEntry ctor
 	Divider_Naive(float UNUSED(x)) {}
 	float operator()(float val, float divisor) const
 	{
 		return val / divisor;
 	}
 };
 
 // caches reciprocal of divisor and multiplies by that.
 // tradeoff: only 4 clocks (instead of 20), but 4 bytes extra per entry.
 class Divider_Recip
 {
 	float recip;
 public:
 	Divider_Recip() {}	// needed for default CacheEntry ctor
 	Divider_Recip(float x) { recip = 1.0f / x; }
 	float operator()(float val, float UNUSED(divisor)) const
 	{
 		return val * recip;
 	}
 };
 
 // TODO: use SSE/3DNow RCP instruction? not yet, because not all systems
 // support it and overhead of detecting this support eats into any gains.
 
 // initial implementation for testing purposes; quite inefficient.
 template<typename Key, typename Entry>
 class LRU
 {
 public:
 	bool empty() const
 	{
 		return lru.empty();
 	}
 
 	void add(Key key, const Entry& entry)
 	{
 		lru.push_back(KeyAndEntry(key, entry));
 	}
 
 	bool find(Key key, const Entry** pentry) const
 	{
 		CIt it = std::find_if(lru.begin(), lru.end(), KeyEq(key));
 		if(it == lru.end())
 			return false;
 		*pentry = &it->entry;
 		return true;
 	}
 
 	void remove(Key key)
 	{
 		std::remove_if(lru.begin(), lru.end(), KeyEq(key));
 	}
 
 	void on_access(Entry& entry)
 	{
 		for(It it = lru.begin(); it != lru.end(); ++it)
 		{
 			if(&entry == &it->entry)
 			{
 				add(it->key, it->entry);
 				lru.erase(it);
 				return;
 			}
 		}
 		debug_warn("entry not found in list");
 	}
 
 	void remove_least_valuable(std::list<Entry>& entry_list)
 	{
 		entry_list.push_back(lru.front().entry);
 		lru.pop_front();
 	}
 
 private:
 	struct KeyAndEntry
 	{
 		Key key;
 		Entry entry;
 		KeyAndEntry(Key key_, const Entry& entry_)
 			: key(key_), entry(entry_) {}
 	};
 	class KeyEq
 	{
 		Key key;
 	public:
 		KeyEq(Key key_) : key(key_) {}
 		bool operator()(const KeyAndEntry& ke) const
 		{
 			return ke.key == key;
 		}
 	};
 
 	typedef std::list<KeyAndEntry> List;
 	typedef typename List::iterator It;
 	typedef typename List::const_iterator CIt;
 	List lru;
 };
 
 
 //
 // Cache
 //
 
 template
 <
 typename Key, typename Item,
 // see documentation above for Manager's interface.
 template<typename Key, class Entry> class Manager = Landlord_Cached,
 class Divider = Divider_Naive
 >
 class Cache
 {
 public:
 	Cache() : mgr() {}
 
 	void add(Key key, Item item, size_t size, uint cost)
 	{
 		return mgr.add(key, Entry(item, size, cost));
 	}
 
 	// remove the entry identified by <key>. expected usage is to check
 	// if present and determine size via retrieve(), so no need for
 	// error checking.
 	// useful for invalidating single cache entries.
 	void remove(Key key)
 	{
 		mgr.remove(key);
 	}
 
 	// if there is no entry for <key> in the cache, return false.
 	// otherwise, return true and pass back item and (optionally) size.
 	//
 	// if refill_credit (default), the cache manager 'rewards' this entry,
 	// tending to keep it in cache longer. this parameter is not used in
 	// normal operation - it's only for special cases where we need to
 	// make an end run around the cache accounting (e.g. for cache simulator).
 	bool retrieve(Key key, Item& item, size_t* psize = 0, bool refill_credit = true)
 	{
 		const Entry* entry;
 		if(!mgr.find(key, &entry))
 			return false;
 
 		item = entry->item;
 		if(psize)
 			*psize = entry->size;
 
 		if(refill_credit)
 			mgr.on_access((Entry&)*entry);
 
 		return true;
 	}
 
 	// toss out the least valuable entry. return false if cache is empty,
 	// otherwise true and (optionally) pass back its item and size.
 	bool remove_least_valuable(Item* pItem = 0, size_t* pSize = 0)
 	{
 		// as an artefact of the cache eviction policy, several entries
 		// may be "shaken loose" by one call to remove_least_valuable.
 		// we cache them in a list to disburden callers (they always get
 		// exactly one).
 		if(entries_awaiting_eviction.empty())
 		{
 			if(empty())
 				return false;
 
 			mgr.remove_least_valuable(entries_awaiting_eviction);
 			debug_assert(!entries_awaiting_eviction.empty());
 		}
 
 		const Entry& entry = entries_awaiting_eviction.front();
 		if(pItem)
 			*pItem = entry.item;
 		if(pSize)
 			*pSize = entry.size;
 		entries_awaiting_eviction.pop_front();
 
 		return true;
 	}
 
 	bool empty() const
 	{
 		return mgr.empty();
 	}
 
 private:
 	// this is applicable to all cache management policies and stores all
 	// required information. a Divider functor is used to implement
 	// division for credit_density.
 	template<class InnerDivider> struct CacheEntry
 	{
 		Item item;
 		size_t size;
 		uint cost;
 		float credit;
 
 		InnerDivider divider;
 
 		// needed for mgr.remove_least_valuable's entry_copy
 		CacheEntry() {}
 
 		CacheEntry(Item item_, size_t size_, uint cost_)
 			: item(item_), divider((float)size_)
 		{
 			size = size_;
 			cost = cost_;
 			credit = cost;
 
 			// else divider will fail
 			debug_assert(size != 0);
 		}
 
 		float credit_density() const
 		{
 			return divider(credit, (float)size);
 		}
 	};
 	typedef CacheEntry<Divider> Entry;
 
 	// see note in remove_least_valuable().
 	std::list<Entry> entries_awaiting_eviction;
 
 	Manager<Key, Entry> mgr;
 };
 
 
 
 //
 // FIFO bit queue
 //
 
 struct BitBuf
 {
 	ulong buf;
 	ulong cur;	// bit to be appended (toggled by add())
 	ulong len;	// |buf| [bits]
 
 	void reset()
 	{
 		buf = 0;
 		cur = 0;
 		len = 0;
 	}
 
 	// toggle current bit if desired, and add to buffer (new bit is LSB)
 	void add(ulong toggle)
 	{
 		cur ^= toggle;
 		buf <<= 1;
 		buf |= cur;
 		len++;
 	}
 
 	// extract LS n bits
 	uint extract(ulong n)
 	{
 		ulong i = buf & ((1ul << n) - 1);
 		buf >>= n;
 
 		return i;
 	}
 };
 
 
 //
 // ring buffer - static array, accessible modulo n
 //
 
 template<class T, size_t n> class RingBuf
 {
 	size_t size_;	// # of entries in buffer
 	size_t head;	// index of oldest item
 	size_t tail;	// index of newest item
 	T data[n];
 
 public:
 	RingBuf() : data() { clear(); }
 	void clear() { size_ = 0; head = 0; tail = n-1; }
 
 	size_t size() { return size_; }
 	bool empty() { return size_ == 0; }
 
 	const T& operator[](int ofs) const
 	{
 		debug_assert(!empty());
 		size_t idx = (size_t)(head + ofs);
 		return data[idx % n];
 	}
 	T& operator[](int ofs)
 	{
 		debug_assert(!empty());
 		size_t idx = (size_t)(head + ofs);
 		return data[idx % n];
 	}
 
 	T& front()
 	{
 		debug_assert(!empty());
 		return data[head];
 	}
 	const T& front() const
 	{
 		debug_assert(!empty());
 		return data[head];
 	}
 	T& back()
 	{
 		debug_assert(!empty());
 		return data[tail];
 	}
 	const T& back() const
 	{
 		debug_assert(!empty());
 		return data[tail];
 	}
 
 	void push_back(const T& item)
 	{
 		if(size_ < n)
 			size_++;
 		// do not complain - overwriting old values is legit
 		// (e.g. sliding window).
 		else
 			head = (head + 1) % n;
 
 		tail = (tail + 1) % n;
 		data[tail] = item;
 	}
 
 	void pop_front()
 	{
 		if(size_ != 0)
 		{
 			size_--;
 			head = (head + 1) % n;
 		}
 		else
 			debug_warn("underflow");
 	}
 
 	class iterator
 	{
 	public:
 		typedef std::random_access_iterator_tag iterator_category;
 		typedef T value_type;
 		typedef ptrdiff_t difference_type;
 		typedef T* pointer;
 		typedef T& reference;
 
 		iterator() : data(0), pos(0)
 			{}
 		iterator(T* data_, size_t pos_) : data(data_), pos(pos_)
 			{}
 		T& operator[](int idx) const
 			{ return data[(pos+idx) % n]; }
 		T& operator*() const
 			{ return data[pos % n]; }
 		T* operator->() const
 			{ return &**this; }
 		iterator& operator++()	// pre
 			{ ++pos; return (*this); }
 		iterator operator++(int)	// post
 			{ iterator tmp =  *this; ++*this; return tmp; }
 		bool operator==(const iterator& rhs) const
 			{ return data == rhs.data && pos == rhs.pos; }
 		bool operator!=(const iterator& rhs) const
 			{ return !(*this == rhs); }
 		bool operator<(const iterator& rhs) const
 			{ return (pos < rhs.pos); }
 		iterator& operator+=(difference_type ofs)
 			{ pos += ofs; return *this; }
 		iterator& operator-=(difference_type ofs)
 			{ return (*this += -ofs); }
 		iterator operator+(difference_type ofs) const
 			{ iterator tmp = *this; return (tmp += ofs); }
 		iterator operator-(difference_type ofs) const
 			{ iterator tmp = *this; return (tmp -= ofs); }
 		difference_type operator-(const iterator right) const
 			{ return (difference_type)(pos - right.pos); }
 
 	protected:
 		T* data;
 		size_t pos;
 		// not mod-N so that begin != end when buffer is full.
 	};
 
 	class const_iterator
 	{
 	public:
 		typedef std::random_access_iterator_tag iterator_category;
 		typedef T value_type;
 		typedef ptrdiff_t difference_type;
 		typedef const T* pointer;
 		typedef const T& reference;
 
 		const_iterator() : data(0), pos(0)
 			{}
 		const_iterator(const T* data_, size_t pos_) : data(data_), pos(pos_)
 			{}
 		const T& operator[](int idx) const
 			{ return data[(pos+idx) % n]; }
 		const T& operator*() const
 			{ return data[pos % n]; }
 		const T* operator->() const
 			{ return &**this; }
 		const_iterator& operator++()	// pre
 			{ ++pos; return (*this); }
 		const_iterator operator++(int)	// post
 			{ const_iterator tmp =  *this; ++*this; return tmp; }
 		bool operator==(const const_iterator& rhs) const
 			{ return data == rhs.data && pos == rhs.pos; }
 		bool operator!=(const const_iterator& rhs) const
 			{ return !(*this == rhs); }
 		bool operator<(const const_iterator& rhs) const
 			{ return (pos < rhs.pos); }
 		iterator& operator+=(difference_type ofs)
 			{ pos += ofs; return *this; }
 		iterator& operator-=(difference_type ofs)
 			{ return (*this += -ofs); }
 		iterator operator+(difference_type ofs) const
 			{ iterator tmp = *this; return (tmp += ofs); }
 		iterator operator-(difference_type ofs) const
 			{ iterator tmp = *this; return (tmp -= ofs); }
 		difference_type operator-(const iterator right) const
 			{ return (difference_type)(pos - right.pos); }
 
 	protected:
 		const T* data;
 		size_t pos;
 			// not mod-N so that begin != end when buffer is full.
 	};
 
 	iterator begin()
 	{
 		return iterator(data, (size_ < n)? 0 : head);
 	}
 	const_iterator begin() const
 	{
 		return const_iterator(data, (size_ < n)? 0 : head);
 	}
 	iterator end()
 	{
 		return iterator(data, (size_ < n)? size_ : head+n);
 	}
 	const_iterator end() const
 	{
 		return const_iterator(data, (size_ < n)? size_ : head+n);
 	}
 };
 
 
 
 //
 // cache
 //
 
 
 // owns a pool of resources (Entry-s), associated with a 64 bit id.
 // typical use: add all available resources to the cache via grow();
 // assign() ids to the resources, and update the resource data if necessary;
 // retrieve() the resource, given id.
 template<class Entry> class LRUCache
 {
 public:
 	// 'give' Entry to the cache.
 	int grow(Entry& e)
 	{
 		// add to front of LRU list, but not index
 		// (since we don't have an id yet)
 		lru_list.push_front(Line(e));
 		return 0;
 	}
 
 
 	// find the least-recently used line; associate id with it,
 	// and return its Entry. fails (returns 0) if id is already
 	// associated, or all lines are locked.
 	Entry* assign(u64 id)
 	{
 		if(find_line(id))
 		{
 			debug_warn("assign: id already in cache!");
 			return 0;
 		}
 
 		// scan in least->most used order for first non-locked entry
 		List_iterator l = lru_list.end();
 		while(l != lru_list.begin())
 		{
 			--l;
 			if(l->refs == 0)
 				goto have_line;
 		}
 
 		// all are locked and cannot be displaced.
 		// caller should grow() enough lines so that this never happens.
 		debug_warn("assign: all lines locked - grow() more lines");
 		return 0;
 
 	have_line:
 
 		// update mapping (index)
 		idx.erase(id);
 		idx[id] = l;
 
 		l->id = id;
 		return &l->ent;
 	}
 
 
 	// find line identified by id; return its entry or 0 if not in cache.
 	Entry* retrieve(u64 id)
 	{
 		// invalid: id 0 denotes not-yet-associated lines
 		if(id == 0)
 		{
 			debug_warn("retrieve: id 0 not allowed");
 			return 0;
 		}
 		Line* l = find_line(id);
 		return l? &l->ent : 0;
 	}
 
 
 	// add/release a reference to a line, to protect it against
 	// displacement via associate(). we verify refs >= 0.
 	int lock(u64 id, bool locked)
 	{
 		Line* l = find_line(id);
 		if(!l)
 			return -1;
 
 		if(locked)
 			l->refs++;
 		else
 		{
 			debug_assert(l->refs > 0);
 			l->refs--;
 		}
 		return 0;
 	}
 
 
 private:
 	// implementation:
 	// cache lines are stored in a list, most recently used in front.
 	// a map finds the list entry containing a given id in log-time.
 
 	struct Line
 	{
 		u64 id;
 		Entry ent;
 		int refs;	// protect from displacement if > 0
 
 		Line(Entry& _ent)
 		{
 			id   = 0;
 			ent  = _ent;
 			refs = 0;
 		}
 	};
 
 	typedef std::list<Line> List;
 	typedef typename List::iterator List_iterator;
 	List lru_list;
 
 	typedef std::map<u64, List_iterator> Map;
 	Map idx;
 
 
 	// return the line identified by id, or 0 if not in cache.
 	// mark it as the most recently used line.
 	Line* find_line(u64 id)
 	{
 		typename Map::const_iterator i = idx.find(id);
 		// not found
 		if(i == idx.end())
 			return 0;
 
 		// index points us to list entry
 		List_iterator l = i->second;
 
 		// mark l as the most recently used line.
 		lru_list.splice(lru_list.begin(), lru_list, l);
 		idx[l->id] = l;
 
 		return &*l;
 	}
 };
 
-
-//
-// expansible hash table (linear probing)
-//
-
-
-
-
-
-
-// from VFS, not currently needed
-
-#if 0
-template<class T> class StringMap
-{
-public:
-
-	T* add(const char* fn, T& t)
-	{
-		const FnHash fn_hash = fnv_hash(fn);
-
-		t.name = fn;
-
-		std::pair<FnHash, T> item = std::make_pair(fn_hash, t);
-		std::pair<MapIt, bool> res;
-		res = map.insert(item);
-
-		if(!res.second)
-		{
-			debug_warn("add: already in container");
-			return 0;
-		}
-
-		// return address of user data (T) inserted into container.
-		return &((res.first)->second);
-	}
-
-	T* find(const char* fn)
-	{
-		const FnHash fn_hash = fnv_hash(fn);
-		MapIt it = map.find(fn_hash);
-			// O(log(size))
-		if(it == map.end())
-			return 0;
-		return &it->second;
-	}
-
-	size_t size() const
-	{
-		return map.size();
-	}
-
-	void clear()
-	{
-		map.clear();
-	}
-
-
-private:
-	typedef std::map<FnHash, T> Map;
-	typedef typename Map::iterator MapIt;
-	Map map;
-
-
-public:
-
-	class iterator
-	{
-	public:
-		iterator()
-			{}
-		iterator(typename StringMap<T>::MapIt _it)
-			{ it = _it; }
-		T& operator*() const
-			{ return it->second; }
-		T* operator->() const
-			{ return &**this; }
-		iterator& operator++()	// pre
-			{ ++it; return (*this); }
-		bool operator==(const iterator& rhs) const
-			{ return it == rhs.it; }
-		bool operator!=(const iterator& rhs) const
-		{ return !(*this == rhs); }
-	protected:
-		typename StringMap<T>::MapIt it;
-	};
-
-	iterator begin()
-		{ return iterator(map.begin()); }
-
-	iterator end()
-		{ return iterator(map.end()); }
-
-};
-
-
-
-template<class Key, class Data> class PriMap
-{
-public:
-
-	int add(Key key, uint pri, Data& data)
-	{
-		Item item = std::make_pair(pri, data);
-		MapEntry ent = std::make_pair(key, item);
-		std::pair<MapIt, bool> ret;
-		ret = map.insert(ent);
-		// already in map
-		if(!ret.second)
-		{
-			MapIt it = ret.first;
-			Item item = it->second;
-			const uint old_pri = item.first;
-			Data& old_data     = item.second;
-
-			// new data is of higher priority; replace older data
-			if(old_pri <= pri)
-			{
-				old_data = data;
-				return 0;
-			}
-			// new data is of lower priority; don't add
-			else
-				return 1;
-		}
-
-		return 0;
-	}
-
-	Data* find(Key key)
-	{
-		MapIt it = map.find(key);
-		if(it == map.end())
-			return 0;
-
-		return &it->second.second;
-	}
-
-	void clear()
-	{
-		map.clear();
-	}
-
-private:
-	typedef std::pair<uint, Data> Item;
-	typedef std::pair<Key, Item> MapEntry;
-	typedef std::map<Key, Item> Map;
-	typedef typename Map::iterator MapIt;
-	Map map;
-};
-#endif // #if 0
-
-
-
 #endif	// #ifndef ADTS_H__
Index: ps/trunk/source/lib/allocators.cpp
===================================================================
--- ps/trunk/source/lib/allocators.cpp	(revision 3910)
+++ ps/trunk/source/lib/allocators.cpp	(revision 3911)
@@ -1,923 +1,864 @@
 /**
  * =========================================================================
  * File        : allocators.cpp
  * Project     : 0 A.D.
  * Description : memory suballocators.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include "posix.h"
 #include "sysdep/cpu.h"	// CAS
 #include "byte_order.h"
 
 #include "allocators.h"
 
 
 //-----------------------------------------------------------------------------
 // helper routines
 //-----------------------------------------------------------------------------
 
 // makes sure page_size has been initialized by the time it is needed
 // (otherwise, we are open to NLSO ctor order issues).
 // pool_create is therefore now safe to call before main().
 static size_t get_page_size()
 {
 	static const size_t page_size = sysconf(_SC_PAGE_SIZE);
 	return page_size;
 }
 
 static inline bool is_page_multiple(uintptr_t x)
 {
 	return (x % get_page_size()) == 0;
 }
 
 static inline size_t round_up_to_page(size_t size)
 {
 	return round_up(size, get_page_size());
 }
 
 
 // very thin wrapper on top of sys/mman.h that makes the intent more obvious:
 // (its commit/decommit semantics are difficult to tell apart)
 
 static inline LibError LibError_from_mmap(void* ret, bool warn_if_failed = true)
 {
 	if(ret != MAP_FAILED)
 		return ERR_OK;
 	return LibError_from_errno(warn_if_failed);
 }
 
 static const int mmap_flags = MAP_PRIVATE|MAP_ANONYMOUS;
 
 static LibError mem_reserve(size_t size, u8** pp)
 {
 	errno = 0;
 	void* ret = mmap(0, size, PROT_NONE, mmap_flags|MAP_NORESERVE, -1, 0);
 	*pp = (u8*)ret;
 	return LibError_from_mmap(ret);
 }
 
 static LibError mem_release(u8* p, size_t size)
 {
 	errno = 0;
 	int ret = munmap(p, size);
 	return LibError_from_posix(ret);
 }
 
 static LibError mem_commit(u8* p, size_t size, int prot)
 {
 	if(prot == PROT_NONE)
 		// not allowed - it would be misinterpreted by mmap.
 		WARN_RETURN(ERR_INVALID_PARAM);
 
 	errno = 0;
 	void* ret = mmap(p, size, prot, mmap_flags|MAP_FIXED, -1, 0);
 	return LibError_from_mmap(ret);
 }
 
 static LibError mem_decommit(u8* p, size_t size)
 {
 	errno = 0;
 	void* ret = mmap(p, size, PROT_NONE, mmap_flags|MAP_NORESERVE|MAP_FIXED, -1, 0);
 	return LibError_from_mmap(ret);
 }
 
 static LibError mem_protect(u8* p, size_t size, int prot)
 {
 	errno = 0;
 	int ret = mprotect(p, size, prot);
 	return LibError_from_posix(ret);
 }
 
 
 //-----------------------------------------------------------------------------
 // page aligned allocator
 //-----------------------------------------------------------------------------
 
 /**
 * allocate memory starting at a page-aligned address.
 * it defaults to read/writable; you can mprotect it if desired.
 *
 * @param unaligned_size minimum size [bytes] to allocate
 * (will be rounded up to page size)
 * @return void* allocated memory, or NULL if error / out of memory.
 */
 void* page_aligned_alloc(size_t unaligned_size)
 {
 	const size_t size_pa = round_up_to_page(unaligned_size);
 	u8* p = 0;
 	RETURN0_IF_ERR(mem_reserve(size_pa, &p));
 	RETURN0_IF_ERR(mem_commit(p, size_pa, PROT_READ|PROT_WRITE));
 	return p;
 }
 
 /**
 * Free a memory block that had been allocated by page_aligned_alloc.
 *
 * @param void* Exact pointer returned by page_aligned_alloc
 * @param unaligned_size Exact size passed to page_aligned_alloc
 */
 void page_aligned_free(void* p, size_t unaligned_size)
 {
 	if(!p)
 		return;
 	debug_assert(is_page_multiple((uintptr_t)p));
 	const size_t size_pa = round_up_to_page(unaligned_size);
 	(void)mem_release((u8*)p, size_pa);
 }
 
 
 //-----------------------------------------------------------------------------
 // dynamic (expandable) array
 //-----------------------------------------------------------------------------
 
 // indicates that this DynArray must not be resized or freed
 // (e.g. because it merely wraps an existing memory range).
 // stored in da->prot to reduce size; doesn't conflict with any PROT_* flags.
 const int DA_NOT_OUR_MEM = 0x40000000;
 
 static LibError validate_da(DynArray* da)
 {
 	if(!da)
 		WARN_RETURN(ERR_INVALID_PARAM);
 	u8* const base           = da->base;
 	const size_t max_size_pa = da->max_size_pa;
 	const size_t cur_size    = da->cur_size;
 	const size_t pos         = da->pos;
 	const int prot           = da->prot;
 
 	if(debug_is_pointer_bogus(base))
 		WARN_RETURN(ERR_1);
 	// note: don't check if base is page-aligned -
 	// might not be true for 'wrapped' mem regions.
 //	if(!is_page_multiple((uintptr_t)base))
 //		WARN_RETURN(ERR_2);
 	if(!is_page_multiple(max_size_pa))
 		WARN_RETURN(ERR_3);
 	if(cur_size > max_size_pa)
 		WARN_RETURN(ERR_4);
 	if(pos > cur_size || pos > max_size_pa)
 		WARN_RETURN(ERR_5);
 	if(prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC|DA_NOT_OUR_MEM))
 		WARN_RETURN(ERR_6);
 
 	return ERR_OK;
 }
 
 #define CHECK_DA(da) RETURN_ERR(validate_da(da))
 
 
 
 /**
 * ready the DynArray object for use.
 *
 * @param DynArray*
 * @param max_size Max size [bytes] of the DynArray; this much
 * (rounded up to next page multiple) virtual address space is reserved.
 * no virtual memory is actually committed until calls to da_set_size.
 * @return LibError
 */
 LibError da_alloc(DynArray* da, size_t max_size)
 {
 	const size_t max_size_pa = round_up_to_page(max_size);
 
 	u8* p;
 	RETURN_ERR(mem_reserve(max_size_pa, &p));
 
 	da->base        = p;
 	da->max_size_pa = max_size_pa;
 	da->cur_size    = 0;
 	da->prot        = PROT_READ|PROT_WRITE;
 	da->pos         = 0;
 	CHECK_DA(da);
 	return ERR_OK;
 }
 
 
 /**
 * "wrap" (i.e. store information about) the given buffer in a
 * DynArray object, preparing it for use with da_read or da_append.
 *
 * da_free should be called when the DynArray is no longer needed,
 * even though it doesn't free this memory (but does zero the DynArray).
 *
 * @param DynArray*. Note: any future operations on it that would
 * change the underlying memory (e.g. da_set_size) will fail.
 * @param p Memory
 * @param size Size [bytes]
 * @return LibError
 */
 LibError da_wrap_fixed(DynArray* da, u8* p, size_t size)
 {
 	da->base        = p;
 	da->max_size_pa = round_up_to_page(size);
 	da->cur_size    = size;
 	da->prot        = PROT_READ|PROT_WRITE|DA_NOT_OUR_MEM;
 	da->pos         = 0;
 	CHECK_DA(da);
 	return ERR_OK;
 }
 
 
 /**
 * free all memory (address space + physical) that constitutes the
 * given array. use-after-free is impossible because the memory is
 * marked not-present via MMU.
 *
 * @param DynArray* da; zeroed afterwards.
 * @return LibError
 */
 LibError da_free(DynArray* da)
 {
 	CHECK_DA(da);
 
 	u8* p            = da->base;
 	size_t size      = da->max_size_pa;
 	bool was_wrapped = (da->prot & DA_NOT_OUR_MEM) != 0;
 
 	// wipe out the DynArray for safety
 	// (must be done here because mem_release may fail)
 	memset(da, 0, sizeof(*da));
 
 	// skip mem_release if <da> was allocated via da_wrap_fixed
 	// (i.e. it doesn't actually own any memory). don't complain;
 	// da_free is supposed to be called even in the above case.
 	if(!was_wrapped)
 		RETURN_ERR(mem_release(p, size));
 	return ERR_OK;
 }
 
 
 /**
 * expand or shrink the array: changes the amount of currently committed
 * (i.e. usable) memory pages.
 *
 * @param DynArray*
 * @param new_size [bytes]. Pages are added/removed until this size
 * (rounded up to the next page size multiple) is reached.
 * @return LibError
 */
 LibError da_set_size(DynArray* da, size_t new_size)
 {
 	CHECK_DA(da);
 
 	if(da->prot & DA_NOT_OUR_MEM)
 		WARN_RETURN(ERR_LOGIC);
 
 	// determine how much to add/remove
 	const size_t cur_size_pa = round_up_to_page(da->cur_size);
 	const size_t new_size_pa = round_up_to_page(new_size);
 	const ssize_t size_delta_pa = (ssize_t)new_size_pa - (ssize_t)cur_size_pa;
 
 	// not enough memory to satisfy this expand request: abort.
 	// note: do not complain - some allocators (e.g. file_cache)
 	// egitimately use up all available space.
 	if(new_size_pa > da->max_size_pa)
 		return ERR_LIMIT;	// NOWARN
 
 	u8* end = da->base + cur_size_pa;
 	// expanding
 	if(size_delta_pa > 0)
 		RETURN_ERR(mem_commit(end, size_delta_pa, da->prot));
 	// shrinking
 	else if(size_delta_pa < 0)
 		RETURN_ERR(mem_decommit(end+size_delta_pa, -size_delta_pa));
 	// else: no change in page count, e.g. if going from size=1 to 2
 	// (we don't want mem_* to have to handle size=0)
 
 	da->cur_size = new_size;
 	CHECK_DA(da);
 	return ERR_OK;
 }
 
 
 /**
 * Make sure at least <size> bytes starting at da->pos are committed and
 * ready for use.
 *
 * @param DynArray*
 * @param size Minimum amount to guarantee [bytes]
 * @return LibError
 */
 LibError da_reserve(DynArray* da, size_t size)
 {
 	// default to page size (the OS won't commit less anyway);
 	// grab more if request requires it.
 	const size_t expand_amount = MAX(4*KiB, size);
 
 	if(da->pos + size > da->cur_size)
 		return da_set_size(da, da->cur_size + expand_amount);
 	return ERR_OK;
 }
 
 
 /**
 * Change access rights of the array memory; used to implement
 * write-protection. affects the currently committed pages as well as
 * all subsequently added pages.
 *
 * @param DynArray*
 * @param prot PROT_* protection flags as defined by POSIX mprotect()
 * @return LibError
 */
 LibError da_set_prot(DynArray* da, int prot)
 {
 	CHECK_DA(da);
 
 	// somewhat more subtle: POSIX mprotect requires the memory have been
 	// mmap-ed, which it probably wasn't here.
 	if(da->prot & DA_NOT_OUR_MEM)
 		WARN_RETURN(ERR_LOGIC);
 
 	da->prot = prot;
 	RETURN_ERR(mem_protect(da->base, da->cur_size, prot));
 
 	CHECK_DA(da);
 	return ERR_OK;
 }
 
 
 /**
 * "read" from array, i.e. copy into the given buffer.
 * starts at offset DynArray.pos and advances this.
 *
 * @param DynArray*
 * @param data Destination buffer
 * @param size Amount to copy [bytes]
 * @return LibError
 */
 LibError da_read(DynArray* da, void* data, size_t size)
 {
 	// make sure we have enough data to read
 	if(da->pos+size > da->cur_size)
 		WARN_RETURN(ERR_EOF);
 
 	memcpy2(data, da->base+da->pos, size);
 	da->pos += size;
 	return ERR_OK;
 }
 
 
 /**
 * "write" to array, i.e. copy from the given buffer.
 * starts at offset DynArray.pos and advances this.
 *
 * @param DynArray*
 * @param data Source buffer
 * @param Amount to copy [bytes]
 * @return LibError
 */
 LibError da_append(DynArray* da, const void* data, size_t size)
 {
 	RETURN_ERR(da_reserve(da, size));
 	memcpy2(da->base+da->pos, data, size);
 	da->pos += size;
 	return ERR_OK;
 }
 
 
 //-----------------------------------------------------------------------------
 // pool allocator
 //-----------------------------------------------------------------------------
 
 // design parameters:
 // - O(1) alloc and free;
 // - fixed- XOR variable-sized blocks;
 // - doesn't preallocate the entire pool;
 // - returns sequential addresses.
 
 
 // "freelist" is a pointer to the first unused element (0 if there are none);
 // its memory holds a pointer to the next free one in list.
 
 static void freelist_push(void** pfreelist, void* el)
 {
 	debug_assert(el != 0);
 	void* prev_el = *pfreelist;
 	*pfreelist = el;
 	*(void**)el = prev_el;
 }
 
 static void* freelist_pop(void** pfreelist)
 {
 	void* el = *pfreelist;
 	// nothing in list
 	if(!el)
 		return 0;
 	*pfreelist = *(void**)el;
 	return el;
 }
 
 
 // elements returned are aligned to this many bytes:
 static const size_t ALIGN = 8;
 
 
 /**
 * Ready Pool for use.
 *
 * @param Pool*
 * @param max_size Max size [bytes] of the Pool; this much
 * (rounded up to next page multiple) virtual address space is reserved.
 * no virtual memory is actually committed until calls to pool_alloc.
 * @param el_size Number of bytes that will be returned by each
 * pool_alloc (whose size parameter is then ignored). Can be 0 to
 * allow variable-sized allocations, but pool_free is then unusable.
 * @return LibError
 */
 LibError pool_create(Pool* p, size_t max_size, size_t el_size)
 {
 	if(el_size == POOL_VARIABLE_ALLOCS)
 		p->el_size = 0;
 	else
 		p->el_size = round_up(el_size, ALIGN);
 	p->freelist = 0;
 	RETURN_ERR(da_alloc(&p->da, max_size));
 	return ERR_OK;
 }
 
 
 /**
 * Free all memory that ensued from the Pool. all elements are made unusable
 * (it doesn't matter if they were "allocated" or in freelist or unused);
 * future alloc and free calls on this pool will fail.
 *
 * @param Pool*
 * @return LibError
 */
 LibError pool_destroy(Pool* p)
 {
 	// don't be picky and complain if the freelist isn't empty;
 	// we don't care since it's all part of the da anyway.
 	// however, zero it to prevent further allocs from succeeding.
 	p->freelist = 0;
 	return da_free(&p->da);
 }
 
 
 /**
 * Indicate whether <el> was allocated from the given pool.
 * this is useful for callers that use several types of allocators.
 *
 * @param Pool*
 * @param el Address in question
 * @return bool
 */
 bool pool_contains(Pool* p, void* el)
 {
 	// outside of our range
 	if(!(p->da.base <= el && el < p->da.base+p->da.pos))
 		return false;
 	// sanity check: it should be aligned (if pool has fixed-size elements)
 	if(p->el_size)
 		debug_assert((uintptr_t)((u8*)el - p->da.base) % p->el_size == 0);
 	return true;
 }
 
 
 /**
 * Dole out memory from the pool.
 * exhausts the freelist before returning new entries to improve locality.
 *
 * @param Pool*
 * @param size bytes to allocate; ignored if pool_create's el_size was not 0.
 * @return allocated memory, or 0 if the Pool would have to be expanded and
 * there isn't enough memory to do so.
 */
 void* pool_alloc(Pool* p, size_t size)
 {
 	// if pool allows variable sizes, go with the size parameter,
 	// otherwise the pool el_size setting.
 	const size_t el_size = p->el_size? p->el_size : round_up(size, ALIGN);
 
 	// note: this can never happen in pools with variable-sized elements
 	// because they disallow pool_free.
 	void* el = freelist_pop(&p->freelist);
 	if(el)
 		goto have_el;
 
 	// alloc a new entry
 	{
 		// expand, if necessary
 		if(da_reserve(&p->da, el_size) < 0)
 			return 0;
 
 		el = p->da.base + p->da.pos;
 		p->da.pos += el_size;
 	}
 
 have_el:
 	debug_assert(pool_contains(p, el));	// paranoia
 	return el;
 }
 
 
 /**
 * Make a fixed-size element available for reuse in the given Pool.
 *
 * this is not allowed if the Pool was created for variable-size elements.
 * rationale: avoids having to pass el_size here and compare with size when
 * allocating; also prevents fragmentation and leaking memory.
 *
 * @param Pool*
 * @param el Element returned by pool_alloc.
 */
 void pool_free(Pool* p, void* el)
 {
 	// only allowed to free items if we were initialized with
 	// fixed el_size. (this avoids having to pass el_size here and
 	// check if requested_size matches that when allocating)
 	if(p->el_size == 0)
 	{
 		debug_warn("cannot free variable-size items");
 		return;
 	}
 
 	if(pool_contains(p, el))
 		freelist_push(&p->freelist, el);
 	else
 		debug_warn("invalid pointer (not in pool)");
 }
 
 
 /**
 * "free" all allocations that ensued from the given Pool.
 * this resets it as if freshly pool_create-d, but doesn't release the
 * underlying reserved virtual memory.
 *
 * @param Pool*
 */
 void pool_free_all(Pool* p)
 {
 	p->freelist = 0;
 
 	// must be reset before da_set_size or CHECK_DA will complain.
 	p->da.pos = 0;
 
 	da_set_size(&p->da, 0);
 }
 
 
 //-----------------------------------------------------------------------------
 // bucket allocator
 //-----------------------------------------------------------------------------
 
 // design goals:
 // - fixed- XOR variable-sized blocks;
 // - allow freeing individual blocks if they are all fixed-size;
 // - never relocates;
 // - no fixed limit.
 
 // note: this type of allocator is called "region-based" in the literature.
 // see "Reconsidering Custom Memory Allocation" (Berger, Zorn, McKinley).
 // if individual elements must be freeable, consider "reaps":
 // basically a combination of region and heap, where frees go to the heap and
 // allocs exhaust that memory first and otherwise use the region.
 
 // power-of-2 isn't required; value is arbitrary.
 const size_t BUCKET_SIZE = 4000;
 
 
 /**
 * Ready Bucket for use.
 *
 * @param Bucket*
 * @param el_size Number of bytes that will be returned by each
 * bucket_alloc (whose size parameter is then ignored). Can be 0 to
 * allow variable-sized allocations, but bucket_free is then unusable.
 * @return LibError
 */
 LibError bucket_create(Bucket* b, size_t el_size)
 {
 	b->freelist = 0;
 	b->el_size = round_up(el_size, ALIGN);
 
 	// note: allocating here avoids the is-this-the-first-time check
 	// in bucket_alloc, which speeds things up.
 	b->bucket = (u8*)malloc(BUCKET_SIZE);
 	if(!b->bucket)
 	{
 		// cause next bucket_alloc to retry the allocation
 		b->pos = BUCKET_SIZE;
 		b->num_buckets = 0;
 		WARN_RETURN(ERR_NO_MEM);
 	}
 
 	*(u8**)b->bucket = 0;	// terminate list
 	b->pos = round_up(sizeof(u8*), ALIGN);
 	b->num_buckets = 1;
 	return ERR_OK;
 }
 
 
 /**
 * Free all memory that ensued from the Bucket.
 * future alloc and free calls on this Bucket will fail.
 *
 * @param Bucket*
 */
 void bucket_destroy(Bucket* b)
 {
 	while(b->bucket)
 	{
 		u8* prev_bucket = *(u8**)b->bucket;
 		free(b->bucket);
 		b->bucket = prev_bucket;
 		b->num_buckets--;
 	}
 
 	debug_assert(b->num_buckets == 0);
 
 	// poison pill: cause subsequent alloc and free to fail
 	b->freelist = 0;
 	b->el_size = BUCKET_SIZE;
 }
 
 
 /**
 * Dole out memory from the Bucket.
 * exhausts the freelist before returning new entries to improve locality.
 *
 * @param Bucket*
 * @param size bytes to allocate; ignored if bucket_create's el_size was not 0.
 * @return allocated memory, or 0 if the Bucket would have to be expanded and
 * there isn't enough memory to do so.
 */
 void* bucket_alloc(Bucket* b, size_t size)
 {
 	size_t el_size = b->el_size? b->el_size : round_up(size, ALIGN);
 	// must fit in a bucket
 	debug_assert(el_size <= BUCKET_SIZE-sizeof(u8*));
 
 	// try to satisfy alloc from freelist
 	void* el = freelist_pop(&b->freelist);
 	if(el)
 		return el;
 
 	// if there's not enough space left, close current bucket and
 	// allocate another.
 	if(b->pos+el_size > BUCKET_SIZE)
 	{
 		u8* bucket = (u8*)malloc(BUCKET_SIZE);
 		if(!bucket)
 			return 0;
 		*(u8**)bucket = b->bucket;
 		b->bucket = bucket;
 		// skip bucket list field and align (note: malloc already
 		// aligns to at least 8 bytes, so don't take b->bucket into account)
 		b->pos = round_up(sizeof(u8*), ALIGN);
 		b->num_buckets++;
 	}
 
 	void* ret = b->bucket+b->pos;
 	b->pos += el_size;
 	return ret;
 }
 
 
 /**
 * Make a fixed-size element available for reuse in the Bucket.
 *
 * this is not allowed if the Bucket was created for variable-size elements.
 * rationale: avoids having to pass el_size here and compare with size when
 * allocating; also prevents fragmentation and leaking memory.
 *
 * @param Bucket*
 * @param el Element returned by bucket_alloc.
 */
 void bucket_free(Bucket* b, void* el)
 {
 	if(b->el_size == 0)
 	{
 		debug_warn("cannot free variable-size items");
 		return;
 	}
 
 	freelist_push(&b->freelist, el);
 
 	// note: checking if <el> was actually allocated from <b> is difficult:
 	// it may not be in the currently open bucket, so we'd have to
 	// iterate over the list - too much work.
 }
 
 
 //-----------------------------------------------------------------------------
 // matrix allocator
 //-----------------------------------------------------------------------------
 
 // takes care of the dirty work of allocating 2D matrices:
 // - aligns data
 // - only allocates one memory block, which is more efficient than
 //   malloc/new for each row.
 
 /**
 * allocate a 2D cols x rows matrix of <el_size> byte cells.
 * this must be freed via matrix_free.
 *
 * @param cols, rows Matrix dimensions.
 * @param el_size Size [bytes] of each matrix entry.
 * @return void**: 0 if out of memory, or a pointer that should be cast to the
 * target type (e.g. int**). it can then be accessed via matrix[col][row].
 */
 void** matrix_alloc(uint cols, uint rows, size_t el_size)
 {
 	const size_t initial_align = 64;
 	// note: no provision for padding rows. this is a bit more work and
 	// if el_size isn't a power-of-2, performance is going to suck anyway.
 	// otherwise, the initial alignment will take care of it.
 
 	const size_t ptr_array_size = cols*sizeof(void*);
 	const size_t row_size = cols*el_size;
 	const size_t data_size = rows*row_size;
 	const size_t total_size = ptr_array_size + initial_align + data_size;
 
 	void* p = malloc(total_size);
 	if(!p)
 		return 0;
 
 	uintptr_t data_addr = (uintptr_t)p + ptr_array_size + initial_align;
 	data_addr -= data_addr % initial_align;
 
 	// alignment check didn't set address to before allocation
 	debug_assert(data_addr >= (uintptr_t)p+ptr_array_size);
 
 	void** ptr_array = (void**)p;
 	for(uint i = 0; i < cols; i++)
 	{
 		ptr_array[i] = (void*)data_addr;
 		data_addr += row_size;
 	}
 
 	// didn't overrun total allocation
 	debug_assert(data_addr <= (uintptr_t)p+total_size);
 
 	return ptr_array;
 }
 
 
 /**
 * Free a matrix allocated by matrix_alloc.
 *
 * @param void** matrix. Callers will likely want to pass it as another
 * type, but C++ requires it be explicitly casted to void**.
 */
 void matrix_free(void** matrix)
 {
 	free(matrix);
 }
 
 
 //-----------------------------------------------------------------------------
 // allocator optimized for single instances
 //-----------------------------------------------------------------------------
 
 
 /**
 * Allocate <size> bytes of zeroed memory.
 *
 * intended for applications that frequently alloc/free a single
 * fixed-size object. caller provides static storage and an in-use flag;
 * we use that memory if available and otherwise fall back to the heap.
 * if the application only has one object in use at a time, malloc is
 * avoided; this is faster and avoids heap fragmentation.
 *
 * note: thread-safe despite use of shared static data.
 *
 * @param storage Caller-allocated memory of at least <size> bytes
 * (typically a static array of bytes)
 * @param in_use_flag Pointer to a flag we set when <storage> is in-use.
 * @param size [bytes] to allocate
 * @return allocated memory; typically = <storage>, but falls back to
 * malloc if that's in-use. can return 0 (with warning) if out of memory.
 */
 void* single_calloc(void* storage, volatile uintptr_t* in_use_flag, size_t size)
 {
 	// sanity check
 	debug_assert(*in_use_flag == 0 || *in_use_flag == 1);
 
 	void* p;
 
 	// successfully reserved the single instance
 	if(CAS(in_use_flag, 0, 1))
 		p = storage;
 	// already in use (rare) - allocate from heap
 	else
 	{
 		p = malloc(size);
 		if(!p)
 		{
 			WARN_ERR(ERR_NO_MEM);
 			return 0;
 		}
 	}
 
 	memset(p, 0, size);
 	return p;
 }
 
 
 /**
 * Free a memory block that had been allocated by single_calloc.
 *
 * @param storage Exact value passed to single_calloc.
 * @param in_use_flag Exact value passed to single_calloc.
 * @param Exact value returned by single_calloc.
 */
 void single_free(void* storage, volatile uintptr_t* in_use_flag, void* p)
 {
 	// sanity check
 	debug_assert(*in_use_flag == 0 || *in_use_flag == 1);
 
 	if(p == storage)
 	{
 		if(CAS(in_use_flag, 1, 0))
 		{
 			// ok, flag has been reset to 0
 		}
 		else
 			debug_warn("in_use_flag out of sync (double free?)");
 	}
 	// was allocated from heap
 	else
 	{
 		// single instance may have been freed by now - cannot assume
 		// anything about in_use_flag.
 
 		free(p);
 	}
 }
-
-
-//-----------------------------------------------------------------------------
-// built-in self test
-//-----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-static void test_da()
-{
-	DynArray da;
-
-	// basic test of functionality (not really meaningful)
-	TEST(da_alloc(&da, 1000) == 0);
-	TEST(da_set_size(&da, 1000) == 0);
-	TEST(da_set_prot(&da, PROT_NONE) == 0);
-	TEST(da_free(&da) == 0);
-
-	// test wrapping existing mem blocks for use with da_read
-	u8 data[4] = { 0x12, 0x34, 0x56, 0x78 };
-	TEST(da_wrap_fixed(&da, data, sizeof(data)) == 0);
-	u8 buf[4];
-	TEST(da_read(&da, buf, 4) == 0);	// success
-	TEST(read_le32(buf) == 0x78563412);	// read correct value
-	TEST(da_read(&da, buf, 1) < 0);		// no more data left
-	TEST(da_free(&da) == 0);
-}
-
-static void test_expand()
-{
-}
-
-static void test_matrix()
-{
-	// not much we can do here; allocate a matrix, write to it and
-	// make sure it can be freed.
-	// (note: can't check memory layout because "matrix" is int** -
-	// array of pointers. the matrix interface doesn't guarantee
-	// that data comes in row-major order after the row pointers)
-	int** m = (int**)matrix_alloc(3, 3, sizeof(int));
-	m[0][0] = 1;
-	m[0][1] = 2;
-	m[1][0] = 3;
-	m[2][2] = 4;
-	matrix_free((void**)m);
-}
-
-static void self_test()
-{
-	test_da();
-	test_expand();
-	test_matrix();
-}
-
-SELF_TEST_RUN;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
Index: ps/trunk/source/lib/res/file/compression.h
===================================================================
--- ps/trunk/source/lib/res/file/compression.h	(revision 3910)
+++ ps/trunk/source/lib/res/file/compression.h	(revision 3911)
@@ -1,90 +1,90 @@
 /**
  * =========================================================================
  * File        : compression.h
  * Project     : 0 A.D.
  * Description : interface for compressing/decompressing data streams.
  *             : currently implements "deflate" (RFC1951).
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2003-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #ifndef COMPRESSION_H__
 #define COMPRESSION_H__
 
 enum ContextType
 {
 	CT_COMPRESSION,
 	CT_DECOMPRESSION
 };
 
 enum CompressionMethod
 {
 	CM_NONE,
 
 	// zlib "deflate" - see RFC 1750, 1751.
 	CM_DEFLATE,
 
 	CM_UNSUPPORTED
 };
 
 extern uintptr_t comp_alloc(ContextType type, CompressionMethod method);
 
 // set output buffer. all subsequent comp_feed() calls will write into it.
 // should only be called once (*) due to the comp_finish() interface - since
 // that allows querying the output buffer, it must not be fragmented.
 // * the previous output buffer is wiped out by comp_reset, so
 // setting it again (once!) after that is allowed and required.
 extern void comp_set_output(uintptr_t ctx, void* out, size_t out_size);
 
 // [compression contexts only:] allocate an output buffer big enough to
 // hold worst_case_compression_ratio*in_size bytes.
 // rationale: this interface is useful because callers cannot
 // reliably estimate how much output space is needed.
 // raises a warning for decompression contexts because this operation
 // does not make sense there:
-// - decompression ratio is quite large - ballpark 1000x;
+// - worst-case decompression ratio is quite large - ballpark 1000x;
 // - exact uncompressed size is known to caller (via archive file header).
 // note: buffer is held until comp_free; it can be reused after a
 // comp_reset. this reduces malloc/free calls.
 extern LibError comp_alloc_output(uintptr_t ctx, size_t in_size);
 
 // get current position in output buffer.
 // precondition: valid calls to EITHER comp_alloc_output OR comp_set_output.
 extern void* comp_get_output(uintptr_t ctx);
 
 // 'feed' the given buffer to the compressor/decompressor.
 // returns number of output bytes produced (*), or a negative LibError code.
 // * 0 is a legitimate return value - this happens if the input buffer is
 // small and the codec hasn't produced any output.
 // note: the buffer may be overwritten or freed immediately after - we take
 // care of copying and queuing any data that remains (e.g. due to
 // lack of output buffer space).
 extern ssize_t comp_feed(uintptr_t ctx, const void* in, size_t in_size);
 
 // feed any remaining queued input data, finish the compress/decompress and
 // pass back the output buffer.
 extern LibError comp_finish(uintptr_t ctx, void** out, size_t* out_size);
 
 // prepare this context for reuse. the effect is similar to freeing this
 // context and creating another.
 // rationale: this API avoids reallocating a considerable amount of
 // memory (ballbark 200KB LZ window plus output buffer).
 extern LibError comp_reset(uintptr_t ctx);
 
 // free this context and all associated memory.
 extern void comp_free(uintptr_t ctx);
 
 #endif	// #ifndef COMPRESSION_H__
Index: ps/trunk/source/lib/res/file/path.cpp
===================================================================
--- ps/trunk/source/lib/res/file/path.cpp	(revision 3910)
+++ ps/trunk/source/lib/res/file/path.cpp	(revision 3911)
@@ -1,280 +1,279 @@
 /**
  * =========================================================================
  * File        : path.cpp
  * Project     : 0 A.D.
  * Description : helper functions for VFS paths.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2004-2006 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <string.h>
 
 #include "lib.h"
 #include "adts.h"
 #include "file_internal.h"
 #include "allocators.h"
 
 
 // path types:
 // p_*: posix (e.g. mount object name or for open())
 // v_*: vfs (e.g. mount point)
 // fn : filename only (e.g. from readdir)
 // dir_name: directory only, no path (e.g. subdir name)
 //
 // all paths must be relative (no leading '/'); components are separated
 // by '/'; no ':', '\\', "." or ".." allowed; root dir is "".
 //
 // grammar:
 // path ::= dir*file?
 // dir  ::= name/
 // file ::= name
 // name ::= [^/]
 
 
 enum Conversion
 {
 	TO_NATIVE,
 	TO_PORTABLE
 };
 
 static LibError convert_path(char* dst, const char* src, Conversion conv = TO_NATIVE)
 {
 	// DIR_SEP is assumed to be a single character!
 
 	const char* s = src;
 	char* d = dst;
 
 	char from = DIR_SEP, to = '/';
 	if(conv == TO_NATIVE)
 		from = '/', to = DIR_SEP;
 
 	size_t len = 0;
 
 	for(;;)
 	{
 		len++;
 		if(len >= PATH_MAX)
 			WARN_RETURN(ERR_PATH_LENGTH);
 
 		char c = *s++;
 
 		if(c == from)
 			c = to;
 
 		*d++ = c;
 
 		// end of string - done
 		if(c == '\0')
 			return ERR_OK;
 	}
 }
 
 
 // set by file_set_root_dir
 static char n_root_dir[PATH_MAX];
 static size_t n_root_dir_len;
 
 
 // return the native equivalent of the given relative portable path
 // (i.e. convert all '/' to the platform's directory separator)
 // makes sure length < PATH_MAX.
 LibError file_make_native_path(const char* path, char* n_path)
 {
 	return convert_path(n_path, path, TO_NATIVE);
 }
 
 // return the portable equivalent of the given relative native path
 // (i.e. convert the platform's directory separators to '/')
 // makes sure length < PATH_MAX.
 LibError file_make_portable_path(const char* n_path, char* path)
 {
 	return convert_path(path, n_path, TO_PORTABLE);
 }
 
 
 // return the native equivalent of the given portable path
 // (i.e. convert all '/' to the platform's directory separator).
 // also prepends current directory => n_full_path is absolute.
 // makes sure length < PATH_MAX.
 LibError file_make_full_native_path(const char* path, char* n_full_path)
 {
 	debug_assert(path != n_full_path);	// doesn't work in-place
 
 	strcpy_s(n_full_path, PATH_MAX, n_root_dir);
 	return convert_path(n_full_path+n_root_dir_len, path, TO_NATIVE);
 }
 
 // return the portable equivalent of the given relative native path
 // (i.e. convert the platform's directory separators to '/')
 // n_full_path is absolute; if it doesn't match the current dir, fail.
 // (note: portable paths are always relative to the file root dir).
 // makes sure length < PATH_MAX.
 LibError file_make_full_portable_path(const char* n_full_path, char* path)
 {
 	debug_assert(path != n_full_path);	// doesn't work in-place
 
 	if(strncmp(n_full_path, n_root_dir, n_root_dir_len) != 0)
 		WARN_RETURN(ERR_TNODE_NOT_FOUND);
 	return convert_path(path, n_full_path+n_root_dir_len, TO_PORTABLE);
 }
 
 
 // establish the root directory from <rel_path>, which is treated as
 // relative to the executable's directory (determined via argv[0]).
 // all relative file paths passed to this module will be based from
 // this root dir.
 //
 // example: executable in "$install_dir/system"; desired root dir is
 // "$install_dir/data" => rel_path = "../data".
 //
 // argv[0] is necessary because the current directory is unknown at startup
 // (e.g. it isn't set when invoked via batch file), and this is the
 // easiest portable way to find our install directory.
 //
 // can only be called once, by design (see below). rel_path is trusted.
 LibError file_set_root_dir(const char* argv0, const char* rel_path)
 {
 	// security check: only allow attempting to chdir once, so that malicious
 	// code cannot circumvent the VFS checks that disallow access to anything
 	// above the current directory (set here).
 	// this routine is called early at startup, so any subsequent attempts
 	// are likely bogus.
 	static bool already_attempted;
 	if(already_attempted)
 		WARN_RETURN(ERR_ROOT_DIR_ALREADY_SET);
 	already_attempted = true;
 
 	// get full path to executable
 	char n_path[PATH_MAX];
 	// .. first try safe, but system-dependent version
 	if(sys_get_executable_name(n_path, PATH_MAX) < 0)
 	{
 		// .. failed; use argv[0]
 		if(!realpath(argv0, n_path))
 			return LibError_from_errno();
 	}
 
 	// make sure it's valid
 	if(access(n_path, X_OK) < 0)
 		return LibError_from_errno();
 
 	// strip executable name, append rel_path, convert to native
 	char* start_of_fn = (char*)path_name_only(n_path);
 	RETURN_ERR(file_make_native_path(rel_path, start_of_fn));
 
 	// get actual root dir - previous n_path may include ".."
 	// (slight optimization, speeds up path lookup)
 	if(!realpath(n_path, n_root_dir))
 		return LibError_from_errno();
 	// .. append DIR_SEP to simplify code that uses n_root_dir
 	//    (note: already 0-terminated, since it's static)
 	n_root_dir_len = strlen(n_root_dir)+1;	// +1 for trailing DIR_SEP
 	n_root_dir[n_root_dir_len-1] = DIR_SEP;
 	return ERR_OK;
 }
 
 
 //-----------------------------------------------------------------------------
 // storage for path strings
 //-----------------------------------------------------------------------------
 
 // rationale: we want a constant-time IsAtomFn(string pointer) lookup:
 // this avoids any overhead of calling file_make_unique_fn_copy on
 // already-atomized strings. that requires allocating from one contiguous
 // arena, which is also more memory-efficient than the heap (no headers).
 static Pool atom_pool;
 
+bool path_is_atom_fn(const char* fn)
+{
+	return pool_contains(&atom_pool, (void*)fn);
+}
+
 // allocate a copy of P_fn in our string pool. strings are equal iff
 // their addresses are equal, thus allowing fast comparison.
 //
 // if the (generous) filename storage is full, 0 is returned.
 // this is not ever expected to happen; callers need not check the
 // return value because a warning is raised anyway.
 const char* file_make_unique_fn_copy(const char* P_fn)
 {
 	// early out: if already an atom, return immediately.
-	if(pool_contains(&atom_pool, (void*)P_fn))
+	if(path_is_atom_fn(P_fn))
 		return P_fn;
 
 	const size_t fn_len = strlen(P_fn);
 	const char* unique_fn;
 
 	// check if already allocated; return existing copy if so.
 	//
 	// rationale: the entire storage could be done via container,
 	// rather than simply using it as a lookup mapping.
 	// however, DynHashTbl together with Pool (see above) is more efficient.
 	typedef DynHashTbl<const char*, const char*> AtomMap;
 	static AtomMap atom_map;
 	unique_fn = atom_map.find(P_fn);
 	if(unique_fn)
 		return unique_fn;
 
 	unique_fn = (const char*)pool_alloc(&atom_pool, fn_len+1);
 	if(!unique_fn)
 	{
 		DEBUG_WARN_ERR(ERR_NO_MEM);
 		return 0;
 	}
 	memcpy2((void*)unique_fn, P_fn, fn_len);
 	((char*)unique_fn)[fn_len] = '\0';
 
 	atom_map.insert(unique_fn, unique_fn);
 
 	stats_unique_name(fn_len);
 	return unique_fn;
 }
 
 
-bool path_is_atom_fn(const char* fn)
-{
-	return pool_contains(&atom_pool, (void*)fn);
-}
-
-
 void path_init()
 {
 	pool_create(&atom_pool, 8*MiB, POOL_VARIABLE_ALLOCS);
 }
 
 void path_shutdown()
 {
 	(void)pool_destroy(&atom_pool);
 }
 
 
 const char* file_get_random_name()
 {
 	// there had better be names in atom_pool, else this will fail.
 	debug_assert(atom_pool.da.pos != 0);
 
 again:
 	const size_t start_ofs = (size_t)rand(0, (uint)atom_pool.da.pos-1);
 
 	// scan ahead to next string boundary
 	const char* start = (const char*)atom_pool.da.base+start_ofs;
 	const char* next_0 = strchr(start, '\0')+1;
 	// .. at end of storage: restart
 	if((u8*)next_0 >= atom_pool.da.base+atom_pool.da.pos)
 		goto again;
 	// .. skip all '\0' (may be several due to pool alignment)
 	const char* next_name = next_0;
 	while(*next_name == '\0') next_name++;
 
 	return next_name;
 }
Index: ps/trunk/source/lib/res/file/zip.cpp
===================================================================
--- ps/trunk/source/lib/res/file/zip.cpp	(revision 3910)
+++ ps/trunk/source/lib/res/file/zip.cpp	(revision 3911)
@@ -1,710 +1,673 @@
 /**
  * =========================================================================
  * File        : zip.cpp
  * Project     : 0 A.D.
  * Description : archive backend for Zip files.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2003-2006 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <time.h>
 #include <limits>
 
 #include "lib.h"
 #include "byte_order.h"
 #include "allocators.h"
 #include "timer.h"
 #include "self_test.h"
 #include "file_internal.h"
 
 
 // safe downcasters: cast from any integral type to u32 or u16; 
 // issues warning if larger than would fit in the target type.
 //
 // these are generally useful but included here (instead of e.g. lib.h) for
 // several reasons:
 // - including implementation in lib.h doesn't work because the definition
 //   of debug_assert in turn requires lib.h's STMT.
 // - separate compilation of templates via export isn't supported by
 //   most compilers.
 
 template<typename T> u32 u32_from_larger(T x)
 {
 	const u32 max = std::numeric_limits<u32>::max();
 	debug_assert((u64)x <= (u64)max);
 	return (u32)(x & max);
 }
 
 template<typename T> u16 u16_from_larger(T x)
 {
 	const u16 max = std::numeric_limits<u16>::max();
 	debug_assert((u64)x <= (u64)max);
 	return (u16)(x & max);
 }
 
 
 //-----------------------------------------------------------------------------
 // timestamp conversion: DOS FAT <-> Unix time_t
 //-----------------------------------------------------------------------------
 
 static time_t time_t_from_FAT(u32 fat_timedate)
 {
 	const uint fat_time = bits(fat_timedate, 0, 15);
 	const uint fat_date = bits(fat_timedate, 16, 31);
 
 	struct tm t;							// struct tm format:
 	t.tm_sec   = bits(fat_time, 0,4) * 2;	// [0,59]
 	t.tm_min   = bits(fat_time, 5,10);		// [0,59]
 	t.tm_hour  = bits(fat_time, 11,15);		// [0,23]
 	t.tm_mday  = bits(fat_date, 0,4);		// [1,31]
 	t.tm_mon   = bits(fat_date, 5,8) - 1;	// [0,11]
 	t.tm_year  = bits(fat_date, 9,15) + 80;	// since 1900
 	t.tm_isdst = -1;	// unknown - let libc determine
 
 	// otherwise: totally bogus, and at the limit of 32-bit time_t
 	debug_assert(t.tm_year < 138);
 
 	time_t ret = mktime(&t);
 	if(ret == (time_t)-1)
 		debug_warn("mktime failed");
 	return ret;
 }
 
 
 static u32 FAT_from_time_t(time_t time)
 {
 	// (values are adjusted for DST)
 	struct tm* t = localtime(&time);
 
 	u16 fat_time = 0;
 	fat_time |= (t->tm_sec/2);		// 5
 	fat_time |= (t->tm_min) << 5;	// 6
 	fat_time |= (t->tm_hour) << 11;	// 5
 
 	u16 fat_date = 0;
 	fat_date |= (t->tm_mday);			// 5
 	fat_date |= (t->tm_mon+1) << 5;		// 4
 	fat_date |= (t->tm_year-80) << 9;	// 7
 
 	u32 fat_timedate = u32_from_u16(fat_date, fat_time);
 	return fat_timedate;
 }
 
 
 //-----------------------------------------------------------------------------
 // Zip file data structures and signatures
 //-----------------------------------------------------------------------------
 
 enum ZipCompressionMethod
 {
 	ZIP_CM_NONE    = 0,
 	ZIP_CM_DEFLATE = 8
 };
 
 // translate ArchiveEntry.method to zip_method.
 static ZipCompressionMethod zip_method_for(CompressionMethod method)
 {
 	switch(method)
 	{
 	case CM_NONE:
 		return ZIP_CM_NONE;
 	case CM_DEFLATE:
 		return ZIP_CM_DEFLATE;
 	default:
 		WARN_ERR(ERR_UNKNOWN_CMETHOD);
 		return ZIP_CM_NONE;
 	}
 }
 
 // translate to (not Zip-specific) CompressionMethod for use in ArchiveEntry.
 static CompressionMethod method_for_zip_method(ZipCompressionMethod zip_method)
 {
 	switch(zip_method)
 	{
 	case ZIP_CM_NONE:
 		return CM_NONE;
 	case ZIP_CM_DEFLATE:
 		return CM_DEFLATE;
 	default:
 		WARN_ERR(ERR_UNKNOWN_CMETHOD);
 		return CM_UNSUPPORTED;
 	}
 }
 
 
 static const u32 cdfh_magic = FOURCC_LE('P','K','\1','\2');
 static const u32  lfh_magic = FOURCC_LE('P','K','\3','\4');
 static const u32 ecdr_magic = FOURCC_LE('P','K','\5','\6');
 
 #pragma pack(push, 1)
 
 struct LFH
 {
 	u32 magic;
 	u16 x1;			// version needed
 	u16 flags;
 	u16 method;
 	u32 fat_mtime;	// last modified time (DOS FAT format)
 	u32 crc;
 	u32 csize;
 	u32 ucsize;
 	u16 fn_len;
 	u16 e_len;
 };
 
 const size_t LFH_SIZE = sizeof(LFH);
 cassert(LFH_SIZE == 30);
 
 // convenience (allows writing out LFH and fn in 1 IO).
 // must be declared here to avoid any struct padding.
 struct LFH_Package
 {
 	LFH lfh;
 	char fn[PATH_MAX];
 };
 
 
 struct CDFH
 {
 	u32 magic;
 	u32 x1;			// versions
 	u16 flags;
 	u16 method;
 	u32 fat_mtime;	// last modified time (DOS FAT format)
 	u32 crc;
 	u32 csize;
 	u32 ucsize;
 	u16 fn_len;
 	u16 e_len;
 	u16 c_len;
 	u32 x2;			// spanning
 	u32 x3;			// attributes
 	u32 lfh_ofs;
 };
 
 const size_t CDFH_SIZE = sizeof(CDFH);
 cassert(CDFH_SIZE == 46);
 
 // convenience (avoids need for pointer arithmetic)
 // must be declared here to avoid any struct padding.
 struct CDFH_Package
 {
 	CDFH cdfh;
 	char fn[PATH_MAX];
 };
 
 
 struct ECDR
 {
 	u32 magic;
 	u8 x1[6];	// multiple-disk support
 	u16 cd_entries;
 	u32 cd_size;
 	u32 cd_ofs;
 	u16 comment_len;
 };
 
 const size_t ECDR_SIZE = sizeof(ECDR);
 cassert(ECDR_SIZE == 22);
 
 #pragma pack(pop)
 
 
 static off_t lfh_total_size(const LFH* lfh_le)
 {
 	debug_assert(lfh_le->magic == lfh_magic);
 	const size_t fn_len = read_le16(&lfh_le->fn_len);
 	const size_t  e_len = read_le16(&lfh_le->e_len);
 	// note: LFH doesn't have a comment field!
 
 	return (off_t)(LFH_SIZE + fn_len + e_len);
 }
 
 static void lfh_assemble(LFH* lfh_le,
 	CompressionMethod method, time_t mtime, u32 crc,
 	off_t csize, off_t ucsize, size_t fn_len)
 {
 	const ZipCompressionMethod zip_method = zip_method_for(method);
 	const u32 fat_mtime = FAT_from_time_t(mtime);
 
 	lfh_le->magic     = lfh_magic;
 	lfh_le->x1        = to_le16(0);
 	lfh_le->flags     = to_le16(0);
 	lfh_le->method    = to_le16(zip_method);
 	lfh_le->fat_mtime = to_le32(fat_mtime);
 	lfh_le->crc       = to_le32(crc);
 	lfh_le->csize     = to_le32(u32_from_larger(csize));
 	lfh_le->ucsize    = to_le32(u32_from_larger(ucsize));
 	lfh_le->fn_len    = to_le16(u16_from_larger(fn_len));
 	lfh_le->e_len     = to_le16(0);
 }
 
 
 static void cdfh_decompose(const CDFH* cdfh_le,
 	CompressionMethod& method, time_t& mtime, off_t& csize, off_t& ucsize,
 	const char*& fn, off_t& lfh_ofs, size_t& total_size)
 {
 	const u16 zip_method = read_le16(&cdfh_le->method);
 	const u32 fat_mtime  = read_le32(&cdfh_le->fat_mtime);
 	csize         = (off_t)read_le32(&cdfh_le->csize);
 	ucsize        = (off_t)read_le32(&cdfh_le->ucsize);
 	const u16 fn_len     = read_le16(&cdfh_le->fn_len);
 	const u16 e_len      = read_le16(&cdfh_le->e_len);
 	const u16 c_len      = read_le16(&cdfh_le->c_len);
 	lfh_ofs       = (off_t)read_le32(&cdfh_le->lfh_ofs);
 
 	method = method_for_zip_method((ZipCompressionMethod)zip_method);
 	mtime = time_t_from_FAT(fat_mtime);
 
 	// return 0-terminated copy of filename
 	const char* fn_src = (const char*)cdfh_le+CDFH_SIZE; // not 0-terminated!
 	char fn_buf[PATH_MAX];
 	memcpy2(fn_buf, fn_src, fn_len*sizeof(char));
 	fn_buf[fn_len] = '\0';
 	fn = file_make_unique_fn_copy(fn_buf);
 
 	total_size = CDFH_SIZE + fn_len + e_len + c_len;
 }
 
 static void cdfh_assemble(CDFH* dst_cdfh_le,
 	CompressionMethod method, time_t mtime, u32 crc,
 	size_t csize, size_t ucsize, size_t fn_len, size_t slack, u32 lfh_ofs)
 {
 	const ZipCompressionMethod zip_method = zip_method_for(method);
 	const u32 fat_mtime = FAT_from_time_t(mtime);
 
 	dst_cdfh_le->magic     = cdfh_magic;
 	dst_cdfh_le->x1        = to_le32(0);
 	dst_cdfh_le->flags     = to_le16(0);
 	dst_cdfh_le->method    = to_le16(zip_method);
 	dst_cdfh_le->fat_mtime = to_le32(fat_mtime);
 	dst_cdfh_le->crc       = to_le32(crc);
 	dst_cdfh_le->csize     = to_le32(u32_from_larger(csize));
 	dst_cdfh_le->ucsize    = to_le32(u32_from_larger(ucsize));
 	dst_cdfh_le->fn_len    = to_le16(u16_from_larger(fn_len));
 	dst_cdfh_le->e_len     = to_le16(0);
 	dst_cdfh_le->c_len     = to_le16(u16_from_larger(slack));
 	dst_cdfh_le->x2        = to_le32(0);
 	dst_cdfh_le->x3        = to_le32(0);
 	dst_cdfh_le->lfh_ofs   = to_le32(lfh_ofs);
 }
 
 
 static void ecdr_decompose(ECDR* ecdr_le,
 	uint& cd_entries, off_t& cd_ofs, size_t& cd_size)
 {
 	cd_entries = (uint)read_le16(&ecdr_le->cd_entries);
 	cd_ofs    = (off_t)read_le32(&ecdr_le->cd_ofs);
 	cd_size  = (size_t)read_le32(&ecdr_le->cd_size);
 }
 
 static void ecdr_assemble(ECDR* dst_ecdr_le, uint cd_entries, off_t cd_ofs, size_t cd_size)
 {
 	dst_ecdr_le->magic       = ecdr_magic;
 	memset(dst_ecdr_le->x1, 0, sizeof(dst_ecdr_le->x1));
 	dst_ecdr_le->cd_entries  = to_le16(u16_from_larger(cd_entries));
 	dst_ecdr_le->cd_size     = to_le32(u32_from_larger(cd_size));
 	dst_ecdr_le->cd_ofs      = to_le32(u32_from_larger(cd_ofs));
 	dst_ecdr_le->comment_len = to_le16(0);
 }
 
 
 //-----------------------------------------------------------------------------
 
 // scan for and return a pointer to a Zip record, or 0 if not found.
 // <start> is the expected position; we scan from there until EOF for
 // the given ID (fourcc). <record_size> includes ID field) bytes must
 // remain before EOF - this makes sure the record is completely in the file.
 // used by z_find_ecdr and z_extract_cdfh.
 static const u8* za_find_id(const u8* buf, size_t size, const void* start, u32 magic, size_t record_size)
 {
 	ssize_t bytes_left = (ssize_t)((buf+size) - (u8*)start - record_size);
 
 	const u8* p = (const u8*)start;
 		// don't increment function argument directly,
 		// so we can warn the user if we had to scan.
 
 	while(bytes_left-- >= 0)
 	{
 		// found it
 		if(*(u32*)p == magic)
 		{
 #ifndef NDEBUG
 			if(p != start)
 				debug_warn("archive damaged, but still found next record.");
 #endif
 			return p;
 		}
 
 		p++;
 			// be careful not to increment before comparison;
 			// magic may already be found at <start>.
 	}
 
 	// passed EOF, didn't find it.
 	// note: do not warn - this happens in the initial ECDR search at
 	// EOF if the archive contains a comment field.
 	return 0;
 }
 
 
 // search for ECDR in the last <max_scan_amount> bytes of the file.
 // if found, fill <dst_ecdr> with a copy of the (little-endian) ECDR and
 // return ERR_OK, otherwise IO error or ERR_CORRUPTED.
 static LibError za_find_ecdr(File* f, size_t max_scan_amount, ECDR* dst_ecdr_le)
 {
 	// don't scan more than the entire file
 	const size_t file_size = f->size;
 	const size_t scan_amount = MIN(max_scan_amount, file_size);
 
 	// read desired chunk of file into memory
 	const off_t ofs = (off_t)(file_size - scan_amount);
 	FileIOBuf buf = FILE_BUF_ALLOC;
 	ssize_t bytes_read = file_io(f, ofs, scan_amount, &buf);
 	RETURN_ERR(bytes_read);
 	debug_assert(bytes_read == (ssize_t)scan_amount);
 
 	// look for ECDR in buffer
 	LibError ret = ERR_CORRUPTED;
 	const u8* start = (const u8*)buf;
 	const ECDR* ecdr_le = (const ECDR*)za_find_id(start, bytes_read, start, ecdr_magic, ECDR_SIZE);
 	if(ecdr_le)
 	{
 		*dst_ecdr_le = *ecdr_le;
 		ret = ERR_OK;
 	}
 
 	file_buf_free(buf);
 	return ret;
 }
 
 
 static LibError za_find_cd(File* f, uint& cd_entries, off_t& cd_ofs, size_t& cd_size)
 {
 	// sanity check: file size must be > header size.
 	// (this speeds up determining if the file is a Zip file at all)
 	const size_t file_size = f->size;
 	if(file_size < LFH_SIZE+CDFH_SIZE+ECDR_SIZE)
 	{
 completely_bogus:
 		// this file is definitely not a valid Zip file.
 		// note: the VFS blindly opens files when mounting; it needs to open
 		// all archives, but doesn't know their extension (e.g. ".pk3").
 		// therefore, do not warn user.
 		return ERR_UNKNOWN_FORMAT;	// NOWARN
 	}
 
 	ECDR ecdr_le;
 	// expected case: ECDR at EOF; no file comment (=> we only need to
 	// read 512 bytes)
 	LibError ret = za_find_ecdr(f, ECDR_SIZE, &ecdr_le);
 	if(ret == ERR_OK)
 	{
 have_ecdr:
 		ecdr_decompose(&ecdr_le, cd_entries, cd_ofs, cd_size);
 		return ERR_OK;
 	}
 	// last resort: scan last 66000 bytes of file
 	// (the Zip archive comment field - up to 64k - may follow ECDR).
 	// if the zip file is < 66000 bytes, scan the whole file.
 	ret = za_find_ecdr(f, 66000u, &ecdr_le);
 	if(ret == ERR_OK)
 		goto have_ecdr;
 
 	// both ECDR scans failed - this is not a valid Zip file.
 	// now see if the beginning of the file holds a valid LFH:
 	const off_t ofs = 0; const size_t scan_amount = LFH_SIZE;
 	FileIOBuf buf = FILE_BUF_ALLOC;
 	ssize_t bytes_read = file_io(f, ofs, scan_amount, &buf);
 	RETURN_ERR(bytes_read);
 	debug_assert(bytes_read == (ssize_t)scan_amount);
 	const bool has_LFH = (za_find_id(buf, scan_amount, buf, lfh_magic, LFH_SIZE) != 0);
 	file_buf_free(buf);
 	if(!has_LFH)
 		goto completely_bogus;
 	// the Zip file is mostly valid but lacking an ECDR. (can happen if
 	// user hard-exits while building an archive)
 	// notes:
 	// - return ERR_CORRUPTED so VFS will not include this file.
 	// - we could work around this by scanning all LFHs, but won't bother
 	//   because it'd be slow.
 	// - do not warn - the corrupt archive will be deleted on next
 	//   successful archive builder run anyway.
 	return ERR_CORRUPTED;	// NOWARN
 }
 
 
 // analyse an opened Zip file; call back into archive.cpp to
 // populate the Archive object with a list of the files it contains.
 // returns ERR_OK on success, ERR_CORRUPTED if file is recognizable as
 // a Zip file but invalid, otherwise ERR_UNKNOWN_FORMAT or IO error.
 //
 // fairly slow - must read Central Directory from disk
 // (size ~= 60 bytes*num_files); observed time ~= 80ms.
 LibError zip_populate_archive(File* f, Archive* a)
 {
 	uint cd_entries; off_t cd_ofs; size_t cd_size;
 	RETURN_ERR(za_find_cd(f, cd_entries, cd_ofs, cd_size));
 
 	// call back with number of entries in archives (an upper bound
 	// for valid files; we're not interested in the directory entries).
 	// we'd have to scan through the central dir to count them out; we'll
 	// just skip them and waste a bit of preallocated memory.
 	RETURN_ERR(archive_allocate_entries(a, cd_entries));
 
 	FileIOBuf buf = FILE_BUF_ALLOC;
 	RETURN_ERR(file_io(f, cd_ofs, cd_size, &buf));
 
 	// iterate through Central Directory
 	LibError ret = ERR_OK;
 	const CDFH* cdfh = (const CDFH*)buf;
 	size_t ofs_to_next_cdfh = 0;
 	for(uint i = 0; i < cd_entries; i++)
 	{
 		// scan for next CDFH (at or beyond current cdfh position)
 		cdfh = (const CDFH*)((u8*)cdfh + ofs_to_next_cdfh);
 		cdfh = (CDFH*)za_find_id((const u8*)buf, cd_size, (const u8*)cdfh, cdfh_magic, CDFH_SIZE);
 		if(!cdfh)	// no (further) CDFH found:
 		{
 			ret = ERR_CORRUPTED;
 			break;
 		}
 
 		// copy translated fields from CDFH into ArchiveEntry.
 		ArchiveEntry ae;
 		cdfh_decompose(cdfh, ae.method, ae.mtime, ae.csize, ae.ucsize, ae.atom_fn, ae.ofs, ofs_to_next_cdfh);
 		ae.flags = ZIP_LFH_FIXUP_NEEDED;
 
 		// if file (we don't care about directories):
 		if(ae.csize && ae.ucsize)
 		{
 			ret = archive_add_file(a, &ae);
 			if(ret != ERR_OK)
 				break;
 		}
 	}
 
 	file_buf_free(buf);
 	return ret;
 }
 
 
 //-----------------------------------------------------------------------------
 
 // this code grabs an LFH struct from file block(s) that are
 // passed to the callback. usually, one call copies the whole thing,
 // but the LFH may straddle a block boundary.
 //
 // rationale: this allows using temp buffers for zip_fixup_lfh,
 // which avoids involving the file buffer manager and thus
 // unclutters the trace and cache contents.
 
 struct LFH_Copier
 {
 	u8* lfh_dst;
 	size_t lfh_bytes_remaining;
 };
 
 static LibError lfh_copier_cb(uintptr_t ctx, const void* block, size_t size, size_t* bytes_processed)
 {
 	LFH_Copier* p = (LFH_Copier*)ctx;
 
 	debug_assert(size <= p->lfh_bytes_remaining);
 	memcpy2(p->lfh_dst, block, size);
 	p->lfh_dst += size;
 	p->lfh_bytes_remaining -= size;
 
 	*bytes_processed = size;
 	return INFO_CB_CONTINUE;
 }
 
 
 // ensures <ent.ofs> points to the actual file contents; it is initially
 // the offset of the LFH. we cannot use CDFH filename and extra field
 // lengths to skip past LFH since that may not mirror CDFH (has happened).
 //
 // this is called at file-open time instead of while mounting to
 // reduce seeks: since reading the file will typically follow, the
 // block cache entirely absorbs the IO cost.
 void zip_fixup_lfh(File* f, ArchiveEntry* ent)
 {
 	// already fixed up - done.
 	if(!(ent->flags & ZIP_LFH_FIXUP_NEEDED))
 		return;
 
 	// performance note: this ends up reading one file block, which is
 	// only in the block cache if the file starts in the same block as a
 	// previously read file (i.e. both are small).
 	LFH lfh;
 	LFH_Copier params = { (u8*)&lfh, sizeof(LFH) };
 	ssize_t ret = file_io(f, ent->ofs, LFH_SIZE, FILE_BUF_TEMP, lfh_copier_cb, (uintptr_t)&params);
 	debug_assert(ret == sizeof(LFH));
 
 	ent->ofs += lfh_total_size(&lfh);
 	ent->flags &= ~ZIP_LFH_FIXUP_NEEDED;
 }
 
 
 //-----------------------------------------------------------------------------
 // archive builder backend
 //-----------------------------------------------------------------------------
 
 // rationale: don't support partial adding, i.e. updating archive with
 // only one file. this would require overwriting parts of the Zip archive,
 // which is annoying and slow. also, archives are usually built in
 // seek-optimal order, which would break if we start inserting files.
 // while testing, loose files can be used, so there's no loss.
 
 // we don't want to expose ZipArchive to callers,
 // (would require defining File, Pool and CDFH)
 // so allocate the storage here and return opaque pointer.
 struct ZipArchive
 {
 	File f;
 	off_t cur_file_size;
 
 	Pool cdfhs;
 	uint cd_entries;
 	CDFH* prev_cdfh;
 };
 
 static SingleAllocator<ZipArchive> za_mgr;
 
 
 // create a new Zip archive and return a pointer for use in subsequent
 // zip_archive_add_file calls. previous archive file is overwritten.
 LibError zip_archive_create(const char* zip_filename, ZipArchive** pza)
 {
 	// local za_copy simplifies things - if something fails, no cleanup is
 	// needed. upon success, we copy into the newly allocated real za.
 	ZipArchive za_copy;
 	za_copy.cur_file_size = 0;
 	za_copy.cd_entries    = 0;
 	za_copy.prev_cdfh     = 0;
 
 	RETURN_ERR(file_open(zip_filename, FILE_WRITE|FILE_NO_AIO, &za_copy.f));
 	RETURN_ERR(pool_create(&za_copy.cdfhs, 10*MiB, 0));
 
 	ZipArchive* za = za_mgr.alloc();
 	if(!za)
 		WARN_RETURN(ERR_NO_MEM);
 	*za = za_copy;
 	*pza = za;
 	return ERR_OK;
 }
 
 
 // add a file (described by ArchiveEntry) to the archive. file_contents
 // is the actual file data; its compression method is given in ae->method and
 // can be CM_NONE.
 // IO cost: writes out <file_contents> to disk (we don't currently attempt
 // any sort of write-buffering).
 LibError zip_archive_add_file(ZipArchive* za, const ArchiveEntry* ae, void* file_contents)
 {
 	const size_t fn_len = strlen(ae->atom_fn);
 
 	// write (LFH, filename, file contents) to archive
 	// .. put LFH and filename into one 'package'
 	LFH_Package header;
 	lfh_assemble(&header.lfh, ae->method, ae->mtime, ae->crc, ae->csize, ae->ucsize, fn_len);
 	strcpy_s(header.fn, ARRAY_SIZE(header.fn), ae->atom_fn);
 	// .. write that out in 1 IO
 	const off_t lfh_ofs = za->cur_file_size;
 	FileIOBuf buf;
 	buf = (FileIOBuf)&header;
 	file_io(&za->f, lfh_ofs, LFH_SIZE+fn_len, &buf);
 	// .. write out file contents
 	buf = (FileIOBuf)file_contents;
 	file_io(&za->f, lfh_ofs+(off_t)(LFH_SIZE+fn_len), ae->csize, &buf);
 	za->cur_file_size += (off_t)(LFH_SIZE+fn_len+ae->csize);
 
 	// append a CDFH to the central dir (in memory)
 	// .. note: pool_alloc may round size up for padding purposes.
 	const size_t prev_pos = za->cdfhs.da.pos;
 	CDFH_Package* p = (CDFH_Package*)pool_alloc(&za->cdfhs, CDFH_SIZE+fn_len);
 	if(!p)
 		WARN_RETURN(ERR_NO_MEM);
 	const size_t slack = za->cdfhs.da.pos-prev_pos - (CDFH_SIZE+fn_len);
 	cdfh_assemble(&p->cdfh, ae->method, ae->mtime, ae->crc, ae->csize, ae->ucsize, fn_len, slack, lfh_ofs);
 	memcpy2(p->fn, ae->atom_fn, fn_len);
 
 	za->cd_entries++;
 
 	return ERR_OK;
 }
 
 
 // write out the archive to disk; only hereafter is it valid.
 // frees the ZipArchive instance.
 // IO cost: writes out Central Directory to disk (about 70 bytes per file).
 LibError zip_archive_finish(ZipArchive* za)
 {
 	const size_t cd_size = za->cdfhs.da.pos;
 
 	// append an ECDR to the CDFH list (this allows us to
 	// write out both to the archive file in one burst)
 	ECDR* ecdr = (ECDR*)pool_alloc(&za->cdfhs, ECDR_SIZE);
 	if(!ecdr)
 		WARN_RETURN(ERR_NO_MEM);
 	ecdr_assemble(ecdr, za->cd_entries, za->cur_file_size, cd_size);
 
 	FileIOBuf buf = za->cdfhs.da.base;
 	file_io(&za->f, za->cur_file_size, cd_size+ECDR_SIZE, &buf);
 
 	(void)file_close(&za->f);
 	(void)pool_destroy(&za->cdfhs);
 	za_mgr.release(za);
 	return ERR_OK;
 }
-
-
-//-----------------------------------------------------------------------------
-// built-in self test
-//-----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-static void test_fat_timedate_conversion()
-{
-	// note: FAT time stores second/2, which means converting may
-	// end up off by 1 second.
-
-	time_t t, converted_t;
-	long dt;
-
-	t = time(0);
-	converted_t = time_t_from_FAT(FAT_from_time_t(t));
-	dt = converted_t-t;	// disambiguate abs() parameter
-	TEST(abs(dt) < 2);
-
-	t++;
-	converted_t = time_t_from_FAT(FAT_from_time_t(t));
-	dt = converted_t-t;	// disambiguate abs() parameter
-	TEST(abs(dt) < 2);
-}
-
-static void self_test()
-{
-	test_fat_timedate_conversion();
-}
-
-SELF_TEST_RUN;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
Index: ps/trunk/source/lib/res/file/file_cache.cpp
===================================================================
--- ps/trunk/source/lib/res/file/file_cache.cpp	(revision 3910)
+++ ps/trunk/source/lib/res/file/file_cache.cpp	(revision 3911)
@@ -1,1363 +1,1288 @@
 /**
  * =========================================================================
  * File        : file_cache.cpp
  * Project     : 0 A.D.
  * Description : cache for entire files and I/O blocks. also allocates
  *             : file buffers, allowing zero-copy I/O.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2004-2006 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <map>
 
 #include "lib/allocators.h"
 #include "lib/byte_order.h"
 #include "lib/adts.h"
 #include "file_internal.h"
 
 //-----------------------------------------------------------------------------
 
 // block cache: intended to cache raw compressed data, since files aren't aligned
 // in the archive; alignment code would force a read of the whole block,
 // which would be a slowdown unless we keep them in memory.
 //
 // keep out of async code (although extra work for sync: must not issue/wait
 // if was cached) to simplify things. disadvantage: problems if same block
 // is issued twice, before the first call completes (via wait_io).
 // that won't happen though unless we have threaded file_ios =>
 // rare enough not to worry about performance.
 //
 // since sync code allocates the (temp) buffer, it's guaranteed
 // to remain valid.
 //
 
 class BlockMgr
 {
 	static const size_t MAX_BLOCKS = 32;
 	enum BlockStatus
 	{
 		BS_PENDING,
 		BS_COMPLETE,
 		BS_INVALID
 	};
 	struct Block
 	{
 		BlockId id;
 		// initialized in BlockMgr ctor and remains valid
 		void* mem;
 		BlockStatus status;
 		int refs;
 
 		Block()
 			: id(block_cache_make_id(0, 0)), status(BS_INVALID), refs(0) {}
 	};
 	// access pattern is usually ring buffer, but in rare cases we
 	// need to skip over locked items, even though they are the oldest.
 	Block blocks[MAX_BLOCKS];
 	uint oldest_block;
 
 	// use Pool to allocate mem for all blocks because it guarantees
 	// page alignment (required for IO) and obviates manually aligning.
 	Pool pool;
 
 public:
 	BlockMgr()
 		: blocks(), oldest_block(0)
 	{
 		(void)pool_create(&pool, MAX_BLOCKS*FILE_BLOCK_SIZE, FILE_BLOCK_SIZE);
 		for(Block* b = blocks; b < blocks+MAX_BLOCKS; b++)
 		{
 			b->mem = pool_alloc(&pool, 0);
 			debug_assert(b->mem);	// shouldn't ever fail
 		}
 	}
 
 	void shutdown()
 	{
 		(void)pool_destroy(&pool);
 	}
 
 	void* alloc(BlockId id)
 	{
 		Block* b;
 		for(b = blocks; b < blocks+MAX_BLOCKS; b++)
 		{
 			if(block_eq(b->id, id))
 				debug_warn("allocating block that is already in list");
 		}
 
 		for(size_t i = 0; i < MAX_BLOCKS; i++)
 		{
 			b = &blocks[oldest_block];
 			oldest_block = (oldest_block+1)%MAX_BLOCKS;
 
 			// normal case: oldest item can be reused
 			if(b->status != BS_PENDING && b->refs == 0)
 				goto have_block;
 
 			// wacky special case: oldest item is currently locked.
 			// skip it and reuse the next.
 			//
 			// to see when this can happen, consider IO depth = 4.
 			// let the Block at blocks[oldest_block] contain data that
 			// an IO wants. the 2nd and 3rd blocks are not in cache and
 			// happen to be taken from near the end of blocks[].
 			// attempting to issue block #4 fails because its buffer would
 			// want the first slot (which is locked since the its IO
 			// is still pending).
 			if(b->status == BS_COMPLETE && b->refs > 0)
 				continue;
 
 			debug_warn("status and/or refs have unexpected values");
 		}
 
 		debug_warn("all blocks are locked");
 		return 0;
 have_block:
 
 		b->id = id;
 		b->status = BS_PENDING;
 		return b->mem;
 	}
 
 	void mark_completed(BlockId id)
 	{
 		for(Block* b = blocks; b < blocks+MAX_BLOCKS; b++)
 		{
 			if(block_eq(b->id, id))
 			{
 				debug_assert(b->status == BS_PENDING);
 				b->status = BS_COMPLETE;
 				return;
 			}
 		}
 		debug_warn("mark_completed: block not found, but ought still to be in cache");
 	}
 
 	void* find(BlockId id)
 	{
 		// linear search is ok, since we only keep a few blocks.
 		for(Block* b = blocks; b < blocks+MAX_BLOCKS; b++)
 		{
 			if(block_eq(b->id, id))
 			{
 				 if(b->status == BS_COMPLETE)
 				 {
 					 debug_assert(b->refs >= 0);
 					 b->refs++;
 					 return b->mem;
 				 }
 
 				 debug_warn("block referenced while still in progress");
 				 return 0;
 			}
 		}
 		return 0;	// not found
 	}
 
 	void release(BlockId id)
 	{
 		for(Block* b = blocks; b < blocks+MAX_BLOCKS; b++)
 		{
 			if(block_eq(b->id, id))
 			{
 				b->refs--;
 				debug_assert(b->refs >= 0);
 				return;
 			}
 		}
 		debug_warn("release: block not found, but ought still to be in cache");
 	}
 
 	void invalidate(const char* atom_fn)
 	{
 		for(Block* b = blocks; b < blocks+MAX_BLOCKS; b++)
 		{
 			if(b->id.atom_fn == atom_fn)
 			{
 				if(b->refs)
 					debug_warn("invalidating block that is currently in-use");
 				b->status = BS_INVALID;
 			}
 		}
 	}
 };
 static BlockMgr block_mgr;
 
 
 bool block_eq(BlockId b1, BlockId b2)
 {
 	return b1.atom_fn == b2.atom_fn && b1.block_num == b2.block_num;
 }
 
 // create an id for use with the cache that uniquely identifies
 // the block from the file <atom_fn> starting at <ofs>.
 BlockId block_cache_make_id(const char* atom_fn, const off_t ofs)
 {
 	// <atom_fn> is guaranteed to be unique (see file_make_unique_fn_copy).
 	// block_num should always fit in 32 bits (assuming maximum file size
 	// = 2^32 * FILE_BLOCK_SIZE ~= 2^48 -- plenty). we don't bother
 	// checking this.
 	const u32 block_num = (u32)(ofs / FILE_BLOCK_SIZE);
 	BlockId id = { atom_fn, block_num };
 	return id;
 }
 
 void* block_cache_alloc(BlockId id)
 {
 	return block_mgr.alloc(id);
 }
 
 void block_cache_mark_completed(BlockId id)
 {
 	block_mgr.mark_completed(id);
 }
 
 void* block_cache_find(BlockId id)
 {
 	void* ret = block_mgr.find(id);
 	stats_block_cache(ret? CR_HIT : CR_MISS);
 	return ret;
 }
 
 void block_cache_release(BlockId id)
 {
 	return block_mgr.release(id);
 }
 
 
 //-----------------------------------------------------------------------------
 
 // >= file_sector_size or else waio will have to realign.
 // chosen as exactly 1 page: this allows write-protecting file buffers
 // without worrying about their (non-page-aligned) borders.
 // internal fragmentation is considerable but acceptable.
 static const size_t BUF_ALIGN = 4*KiB;
 
 /*
 CacheAllocator
 
 the biggest worry of a file cache is fragmentation. there are 2
 basic approaches to combat this:
 1) 'defragment' periodically - move blocks around to increase
    size of available 'holes'.
 2) prevent fragmentation from occurring at all via
    deliberate alloc/free policy.
 
 file_io returns cache blocks directly to the user (zero-copy IO),
 so only currently unreferenced blocks can be moved (while holding a
 lock, to boot). it is believed that this would severely hamper
 defragmentation; we therefore go with the latter approach.
 
 basic insight is: fragmentation occurs when a block is freed whose
 neighbors are not free (thus preventing coalescing). this can be
 prevented by allocating objects of similar lifetimes together.
 typical workloads (uniform access frequency) already show such behavior:
 the Landlord cache manager evicts files in an LRU manner, which matches
 the allocation policy.
 
 references:
 "The Memory Fragmentation Problem - Solved?" (Johnstone and Wilson)
 "Dynamic Storage Allocation - A Survey and Critical Review" (Johnstone and Wilson)
 
 policy:
 - allocation: use all available mem first, then look at freelist
 - freelist: good fit, address-ordered, always split blocks
 - free: immediately coalesce
 mechanism:
 - coalesce: boundary tags in freed memory with magic value
 - freelist: 2**n segregated doubly-linked, address-ordered
 */
 static const size_t MAX_CACHE_SIZE = 96*MiB;
 
 class CacheAllocator
 {
 public:
 	CacheAllocator()
 		: bitmap(0), freelists()
 	{
 		// (safe to call this from ctor as of 2006-02-02)
 		(void)pool_create(&pool, MAX_CACHE_SIZE, 0);
 	}
 
 	void shutdown()
 	{
 		(void)pool_destroy(&pool);
 	}
 
 	void* alloc(size_t size)
 	{
 		// determine actual size to allocate
 		// .. better not be more than MAX_CACHE_SIZE - file_buf_alloc will
 		//    fail because no amount of freeing up existing allocations
 		//    would make enough room. therefore, check for this here
 		//    (should never happen).
 		debug_assert(size < MAX_CACHE_SIZE);
 		// .. safely handle 0 byte allocations. according to C/C++ tradition,
 		//    we allocate a unique address, which ends up wasting 1 page.
 		if(!size)
 			size = 1;
 		// .. each allocation must be aligned to BUF_ALIGN, so
 		//    we round up all sizes to that.
 		const size_t size_pa = round_up(size, BUF_ALIGN);
 		const uint size_class = size_class_of(size_pa);
 
 		void* p;
 
 		// try to reuse a freed entry
 		p = alloc_from_class(size_class, size_pa);
 		if(p)
 			goto success;
 
 		// grab more space from pool
 		p = pool_alloc(&pool, size_pa);
 		if(p)
 			goto success;
 
 		// last resort: split a larger element
 		p = alloc_from_larger_class(size_class, size_pa);
 		if(p)
 			goto success;
 
 		// failed - can no longer expand and nothing big enough was
 		// found in freelists.
 		// file cache will decide which elements are least valuable,
 		// free() those and call us again.
 		return 0;
 
 success:
 #ifndef NDEBUG
 		alloc_checker.notify_alloc(p, size);
 #endif
 		stats_notify_alloc(size_pa);
 		return p;
 	}
 
 	// rationale: don't call this "free" because that would run afoul of the
 	// memory tracker's redirection macro and require #include "nommgr.h".
 	void dealloc(u8* p, size_t size)
 	{
 #ifndef NDEBUG
 		alloc_checker.notify_free(p, size);
 #endif
 
 		const size_t size_pa = round_up(size, BUF_ALIGN);
 		// make sure entire (aligned!) range is within pool.
 		if(!pool_contains(&pool, p) || !pool_contains(&pool, p+size_pa-1))
 		{
 			debug_warn("invalid pointer");
 			return;
 		}
 
 		// (re)allow writes
 		//
 		// note: unfortunately we cannot unmap this buffer's memory
 		// (to make sure it is not used) because we write a header/footer
 		// into it to support coalescing.
 		(void)mprotect(p, size_pa, PROT_READ|PROT_WRITE);
 
 		coalesce_and_free(p, size_pa);
 
 		stats_notify_free(size_pa);
 	}
 
 	// make given range read-only via MMU.
 	// write access is restored when buffer is freed.
 	//
 	// p and size are the exact (non-padded) values as in dealloc.
 	void make_read_only(u8* p, size_t size)
 	{
 		// bail to avoid mprotect failing
 		if(!size)
 			return;
 
 		const size_t size_pa = round_up(size, BUF_ALIGN);
 		(void)mprotect(p, size_pa, PROT_READ);
 	}
 
 	// free all allocations and reset state to how it was just after
 	// (the first and only) init() call.
 	void reset()
 	{
 #ifndef NDEBUG
 		alloc_checker.notify_clear();
 #endif
 
 		pool_free_all(&pool);
 		bitmap = 0;
 		memset(freelists, 0, sizeof(freelists));
 		stats_reset();
 	}
 
 private:
 #ifndef NDEBUG
 	AllocatorChecker alloc_checker;
 #endif
 
 	Pool pool;
 
 	//-------------------------------------------------------------------------
 	// boundary tags for coalescing
 	static const u32 HEADER_ID = FOURCC('C','M','A','H');
 	static const u32 FOOTER_ID = FOURCC('C','M','A','F');
 	static const u32 MAGIC = FOURCC('\xFF','\x55','\xAA','\x01');
 	struct Header
 	{
 		Header* prev;
 		Header* next;
 		size_t size_pa;
 		u32 id;
 		u32 magic;
 	};
 	// we could use struct Header for Footer as well, but keeping them
 	// separate and different can avoid coding errors (e.g. mustn't pass a
 	// Footer to freelist_remove!)
 	struct Footer
 	{
 		// note: deliberately reordered fields for safety
 		u32 magic;
 		u32 id;
 		size_t size_pa;
 	};
 	// must be enough room to stash Header+Footer within the freed allocation.
 	cassert(BUF_ALIGN >= sizeof(Header)+sizeof(Footer));
 
 	// expected_id identifies the tag type (either HEADER_ID or
 	// FOOTER_ID). returns whether the given id, magic and size_pa
 	// values are consistent with such a tag.
 	//
 	// note: these magic values are all that differentiates tags from
 	// user data. this isn't 100% reliable, but we can't insert extra
 	// boundary tags because the memory must remain aligned.
 	bool is_valid_tag(u32 expected_id, u32 id, u32 magic, size_t size_pa) const
 	{
 		if(id != expected_id || magic != MAGIC)
 			return false;
 		TEST(size_pa % BUF_ALIGN == 0);
 		TEST(size_pa <= MAX_CACHE_SIZE);
 		return true;
 	}
 
 	// add p to freelist; if its neighbor(s) are free, merges them all into
 	// one big region and frees that.
 	// notes:
 	// - correctly deals with p lying at start/end of pool.
 	// - p and size_pa are trusted: [p, p+size_pa) lies within the pool.
 	void coalesce_and_free(u8* p, size_t size_pa)
 	{
 		// CAVEAT: Header and Footer are wiped out by freelist_remove -
 		// must use them before that.
 
 		// expand (p, size_pa) to include previous allocation if it's free.
 		// (unless p is at start of pool region)
 		if(p != pool.da.base)
 		{
 			const Footer* footer = (const Footer*)(p-sizeof(Footer));
 			if(is_valid_tag(FOOTER_ID, footer->id, footer->magic, footer->size_pa))
 			{
 				p       -= footer->size_pa;
 				size_pa += footer->size_pa;
 				Header* header = (Header*)p;
 				freelist_remove(header);
 			}
 		}
 
 		// expand size_pa to include following memory if it was allocated
 		// and is currently free.
 		// (unless it starts beyond end of currently committed region)
 		Header* header = (Header*)(p+size_pa);
 		if((u8*)header < pool.da.base+pool.da.cur_size)
 		{
 			if(is_valid_tag(HEADER_ID, header->id, header->magic, header->size_pa))
 			{
 				size_pa += header->size_pa;
 				freelist_remove(header);
 			}
 		}
 
 		freelist_add(p, size_pa);
 	}
 
 	//-------------------------------------------------------------------------
 	// freelist
 
 	// segregated, i.e. one list per size class.
 	// note: we store Header nodes instead of just a pointer to head of
 	// list - this wastes a bit of mem but greatly simplifies list insertion.
 	Header freelists[sizeof(uintptr_t)*CHAR_BIT];
 
 	// bit i set iff size class i's freelist is not empty.
 	// in conjunction with ls1, this allows finding a non-empty list in O(1).
 	uintptr_t bitmap;
 
 	// "size class" i (>= 0) contains allocations of size (2**(i-1), 2**i]
 	// except for i=0, which corresponds to size=1.
 	static uint size_class_of(size_t size_pa)
 	{
 		return log2((uint)size_pa);
 	}
 
 	// value of LSB 1-bit.
 	static uint ls1(uint x)
 	{
 		return (x & -(int)x);
 	}
 
 	void freelist_add(u8* p, size_t size_pa)
 	{
 		TEST((uintptr_t)p % BUF_ALIGN == 0);
 		TEST(size_pa % BUF_ALIGN == 0);
 		const uint size_class = size_class_of(size_pa);
 
 		// write header and footer into the freed mem
 		// (its prev and next link fields will be set below)
 		Header* header = (Header*)p;
 		header->id = HEADER_ID;
 		header->magic = MAGIC;
 		header->size_pa = size_pa;
 		Footer* footer = (Footer*)(p+size_pa-sizeof(Footer));
 		footer->id = FOOTER_ID;
 		footer->magic = MAGIC;
 		footer->size_pa = size_pa;
 
 		Header* prev = &freelists[size_class];
 		// find node after which to insert (address ordered freelist)
 		while(prev->next && header <= prev->next)
 			prev = prev->next;
 
 		header->next = prev->next;
 		header->prev = prev;
 		if(prev->next)
 			prev->next->prev = header;
 		prev->next = header;
 
         bitmap |= BIT(size_class);
 	}
 
 	void freelist_remove(Header* header)
 	{
 		TEST((uintptr_t)header % BUF_ALIGN == 0);
 
 		Footer* footer = (Footer*)((u8*)header+header->size_pa-sizeof(Footer));
 		TEST(is_valid_tag(HEADER_ID, header->id, header->magic, header->size_pa));
 		TEST(is_valid_tag(FOOTER_ID, footer->id, footer->magic, footer->size_pa));
 		TEST(header->size_pa == footer->size_pa);
 		const uint size_class = size_class_of(header->size_pa);
 
 		header->prev->next = header->next;
 		if(header->next)
 			header->next->prev = header->prev;
 
 		// if freelist is now empty, clear bit in bitmap.
 		if(!freelists[size_class].next)
 			bitmap &= ~BIT(size_class);
 
 		// wipe out header and footer to prevent accidental reuse
 		memset(header, 0xEE, sizeof(Header));
 		memset(footer, 0xEE, sizeof(Footer));
 	}
 
 	// returns 0 if nothing big enough is in size_class's freelist.
 	void* alloc_from_class(uint size_class, size_t size_pa)
 	{
 		// return first suitable entry in (address-ordered) list
 		for(Header* cur = freelists[size_class].next; cur; cur = cur->next)
 		{
 			if(cur->size_pa >= size_pa)
 			{
 				u8* p = (u8*)cur;
 				const size_t remnant_pa = cur->size_pa - size_pa;
 
 				freelist_remove(cur);
 
 				if(remnant_pa)
 					freelist_add(p+size_pa, remnant_pa);
 
 				return p;
 			}
 		}
 
 		return 0;
 	}
 
 	// returns 0 if there is no big enough entry in any freelist.
 	void* alloc_from_larger_class(uint start_size_class, size_t size_pa)
 	{
 		uint classes_left = bitmap;
 		// .. strip off all smaller classes
 		classes_left &= (~0 << start_size_class);
 
 		// for each non-empty freelist (loop doesn't incur overhead for
 		// empty freelists)
 		while(classes_left)
 		{
 			const uint class_size = ls1(classes_left);
 			classes_left &= ~class_size;	// remove from classes_left
 			const uint size_class = size_class_of(class_size);
 
 			// .. try to alloc
 			void* p = alloc_from_class(size_class, size_pa);
 			if(p)
 				return p;
 		}
 
 		// apparently all classes above start_size_class are empty,
 		// or the above would have succeeded.
 		TEST(bitmap < BIT(start_size_class+1));
 		return 0;
 	}
 
 	//-------------------------------------------------------------------------
 	// stats and validation
 	size_t allocated_size_total_pa, free_size_total_pa;
 
 	void stats_notify_alloc(size_t size_pa) { allocated_size_total_pa += size_pa; }
 	void stats_notify_free(size_t size_pa) { free_size_total_pa += size_pa; }
 	void stats_reset() { allocated_size_total_pa = free_size_total_pa = 0; }
 
 	void self_check() const
 	{
 		debug_assert(allocated_size_total_pa+free_size_total_pa == pool.da.cur_size);
 
 		// make sure freelists contain exactly free_size_total_pa bytes
 		size_t freelist_size_total_pa = 0;
 		uint classes_left = bitmap;
 		while(classes_left)
 		{
 			const uint class_size = ls1(classes_left);
 			classes_left &= ~class_size;	// remove from classes_left
 			const uint size_class = size_class_of(class_size);
 			for(const Header* p = &freelists[size_class]; p; p = p->next)
 				freelist_size_total_pa += p->size_pa;
 		}
 		debug_assert(free_size_total_pa == freelist_size_total_pa);
 	}
 };	// CacheAllocator
 
 static CacheAllocator cache_allocator;
 
 //-----------------------------------------------------------------------------
 
 /*
 list of FileIOBufs currently held by the application.
 
 note: "currently held" means between a file_buf_alloc/file_buf_retrieve
 and file_buf_free.
 additionally, the buffer may be stored in file_cache if file_cache_add
 was called; it remains there until evicted in favor of another buffer.
 
 rationale: users are strongly encouraged to access buffers as follows:
 "alloc, use, free; alloc next..". this means only a few (typically one) are
 active at a time. a list of these is more efficient to go through (O(1))
 than having to scan file_cache for the buffer (O(N)).
 
 see also discussion at declaration of FileIOBuf.
 */
 class ExtantBufMgr
 {
 public:
 	ExtantBufMgr()
 		: extant_bufs(), epoch(1) {}
 
 	// return index of ExtantBuf that contains <buf>, or -1.
 	ssize_t find(FileIOBuf buf) const
 	{
 		debug_assert(buf != 0);
 		for(size_t i = 0; i < extant_bufs.size(); i++)
 		{
 			const ExtantBuf& eb = extant_bufs[i];
 			if(matches(eb, buf))
 				return (ssize_t)i;
 		}
 
 		return -1;	// not found
 	}
 
 	// add given buffer to extant list.
 	// long_lived indicates if this buffer will not be freed immediately
 	// (more precisely: before allocating the next buffer); see FB_LONG_LIVED.
 	// note: reuses a previous extant_bufs[] slot if one is unused.
 	void add(FileIOBuf buf, size_t size, const char* atom_fn, uint fb_flags)
 	{
 		// cache_allocator also does this; we need to follow suit so that
 		// matches() won't fail due to zero-length size.
 		if(!size)
 			size = 1;
 
 		// don't do was-immediately-freed check for long_lived buffers.
 		const bool long_lived = (fb_flags & FB_LONG_LIVED) != 0;
 		const uint this_epoch = long_lived? 0 : epoch++;
 
 		debug_assert(buf != 0);
 		// look for holes in array and reuse those
 		for(size_t i = 0; i < extant_bufs.size(); i++)
 		{
 			ExtantBuf& eb = extant_bufs[i];
 			if(eb.atom_fn == atom_fn)
 				debug_warn("already exists!");
 			// slot currently empty
 			if(!eb.buf)
 			{
 				debug_assert(eb.refs == 0);
 				eb.refs     = 1;
 				eb.buf      = buf;
 				eb.size     = size;
 				eb.fb_flags = fb_flags;
 				eb.atom_fn  = atom_fn;
 				eb.epoch    = this_epoch;
 				return;
 			}
 		}
 		// add another entry
 		extant_bufs.push_back(ExtantBuf(buf, size, fb_flags, atom_fn, this_epoch));
 	}
 
 	// indicate that a reference has been taken for <buf>;
 	// parameters are the same as for add().
 	void add_ref(FileIOBuf buf, size_t size, const char* atom_fn, bool long_lived)
 	{
 		ssize_t idx = find(buf);
 		// this buf was already on the extant list
 		if(idx != -1)
 			extant_bufs[idx].refs++;
 		// it was in cache and someone is 'reactivating' it, i.e. moving it
 		// to the extant list.
 		else
 			add(buf, size, atom_fn, long_lived);
 	}
 
 	// return atom_fn that was passed when add()-ing this buf, or 0 if
 	// it's not on extant list.
 	const char* get_owner_filename(FileIOBuf buf)
 	{
 		ssize_t idx = find(buf);
 		if(idx != -1)
 			return extant_bufs[idx].atom_fn;
 		else
 			return 0;
 	}
 
 	// return false and warn if buf is not on extant list; otherwise,
 	// pass back its size/owner filename and decrement reference count.
 	// the return value indicates whether it reached 0, i.e. was
 	// actually removed from the extant list.
 	bool find_and_remove(FileIOBuf buf, size_t& size, const char*& atom_fn)
 	{
 		ssize_t idx = find(buf);
 		if(idx == -1)
 		{
 			debug_warn("buf is not on extant list! double free?");
 			return false;
 		}
 
 		ExtantBuf& eb = extant_bufs[idx];
 		size      = eb.size;
 		atom_fn   = eb.atom_fn;
 
 		if(eb.epoch != 0 && eb.epoch != epoch-1)
 			debug_warn("buf not released immediately");
 		epoch++;
 
 		bool actually_removed = false;
 		// no more references
 		if(--eb.refs == 0)
 		{
 			// mark slot in extant_bufs[] as reusable
 			memset(&eb, 0, sizeof(eb));
 
 			actually_removed = true;
 		}
 
 		return actually_removed;
 	}
 
 	// wipe out the entire list without freeing any FileIOBuf.
 	// only meant to be used in file_cache_reset: since the allocator
 	// is completely reset, there's no need to free outstanding items first.
 	void clear()
 	{
 		extant_bufs.clear();
 	}
 
 	// if buf is not in extant list, complain; otherwise, mark it as
 	// coming from the file <atom_fn>.
 	// this is needed in the following case: uncompressed reads from archive
 	// boil down to a file_io of the archive file. the buffer is therefore
 	// tagged with the archive filename instead of the desired filename.
 	// afile_read sets things right by calling this.
 	void replace_owner(FileIOBuf buf, const char* atom_fn)
 	{
 		ssize_t idx = find(buf);
 		if(idx != -1)
 			extant_bufs[idx].atom_fn = atom_fn;
 		else
 			debug_warn("to-be-replaced buf not found");
 	}
 
 	// display list of all extant buffers in debug outut.
 	// meant to be called at exit, at which time any remaining buffers
 	// must apparently have been leaked.
 	void display_all_remaining()
 	{
 		debug_printf("Leaked FileIOBufs:\n");
 		for(size_t i = 0; i < extant_bufs.size(); i++)
 		{
 			ExtantBuf& eb = extant_bufs[i];
 			if(eb.buf)
 				debug_printf("  %p (0x%08x) %s\n", eb.buf, eb.size, eb.atom_fn);
 		}
 		debug_printf("--------\n");
 	}
 
 private:
 	struct ExtantBuf
 	{
 		// treat as user-visible padded buffer, although it may already be
 		// the correct exact_buf.
 		// rationale: file_cache_retrieve gets padded_buf from file_cache
 		// and then calls add_ref. if not already in extant list, that
 		// would be added, whereas file_buf_alloc's add() would specify
 		// the exact_buf. assuming it's padded_buf is safe because
 		// exact_buf_oracle can be used to get exact_buf from that.
 		FileIOBuf buf;
 
 		// treat as user-visible size, although it may already be the
 		// correct exact_size.
 		// rationale: this would also be available via TFile, but we want
 		// users to be able to allocate file buffers (and they don't know tf).
 		// therefore, we store this separately.
 		size_t size;
 
 		// FileBufFlags
 		uint fb_flags;
 
 		// which file was this buffer taken from?
 		// we search for given atom_fn as part of file_cache_retrieve
 		// (since we are responsible for already extant bufs).
 		// also useful for tracking down buf 'leaks' (i.e. someone
 		// forgetting to call file_buf_free).
 		const char* atom_fn;
 
 		// active references, i.e. how many times file_buf_free must be
 		// called until this buffer is freed and removed from extant list.
 		uint refs;
 
 		// used to check if this buffer was freed immediately
 		// (before allocating the next). that is the desired behavior
 		// because it avoids fragmentation and leaks.
 		uint epoch;
 
 		ExtantBuf(FileIOBuf buf_, size_t size_, uint fb_flags_, const char* atom_fn_, uint epoch_)
 			: buf(buf_), size(size_), fb_flags(fb_flags_), atom_fn(atom_fn_), refs(1), epoch(epoch_) {}
 	};
 
 	std::vector<ExtantBuf> extant_bufs;
 
 	// see if buf (which may be padded) falls within eb's buffer.
 	// this is necessary for file_buf_free; we do not know the size
 	// of buffer to free until after find_and_remove, so exact_buf_oracle
 	// cannot be used.
 	bool matches(const ExtantBuf& eb, FileIOBuf buf) const
 	{
 		return (eb.buf <= buf && buf < (u8*)eb.buf+eb.size);
 	}
 
 	uint epoch;
 };	// ExtantBufMgr
 static ExtantBufMgr extant_bufs;
 
 //-----------------------------------------------------------------------------
 
 // HACK: key type is really const char*, but the file_cache's STL (hash_)map
 // stupidly assumes that is a "string". (comparison can be done via
 // pointer compare, due to atom_fn mechanism) we define as void* to avoid
 // this behavior - it breaks the (const char*)1 self-test hack and is
 // inefficient.
 static Cache<const void*, FileIOBuf> file_cache;
 
 /*
 mapping of padded_buf to the original exact_buf and exact_size.
 
 rationale: cache stores the user-visible (padded) buffer, but we need
 to pass the original to cache_allocator.
 since not all buffers end up padded (only happens if reading
 uncompressed files from archive), it is more efficient to only
 store bookkeeping information for those who need it (rather than
 maintaining a complete list of allocs in cache_allocator).
 
 storing both padded and exact buf/size in a FileIOBuf struct is not really
 an option: that begs the question how users initialize it, and can't
 well be stored in Cache.
 */
 class ExactBufOracle
 {
 public:
 	typedef std::pair<FileIOBuf, size_t> BufAndSize;
 
 	// associate padded_buf with exact_buf and exact_size;
 	// these can later be retrieved via get().
 	// should only be called if necessary, i.e. they are not equal.
 	// assumes and verifies that the association didn't already exist
 	// (otherwise it's a bug, because it's removed when buf is freed)
 	void add(FileIOBuf exact_buf, size_t exact_size, FileIOBuf padded_buf)
 	{
 		debug_assert((uintptr_t)exact_buf % BUF_ALIGN == 0);
 		debug_assert(exact_buf <= padded_buf);
 
 		std::pair<Padded2Exact::iterator, bool> ret;
 		const BufAndSize item = std::make_pair(exact_buf, exact_size);
 		ret = padded2exact.insert(std::make_pair(padded_buf, item));
 		// make sure it wasn't already in the map
 		debug_assert(ret.second == true);
 	}
 
 	// return exact_buf and exact_size that were associated with <padded_buf>.
 	// can optionally remove that association afterwards (slightly more
 	// efficient than a separate remove() call).
 	BufAndSize get(FileIOBuf padded_buf, size_t size, bool remove_afterwards = false)
 	{
 		Padded2Exact::iterator it = padded2exact.find(padded_buf);
 
 		BufAndSize ret;
 		// not found => must already be exact_buf. will be verified below.
 		if(it == padded2exact.end())
 			ret = std::make_pair(padded_buf, size);
 		else
 		{
 			ret = it->second;
 
 			// something must be different, else it shouldn't have been
 			// added anyway.
 			// actually, no: file_io may have had to register these values
 			// (since its user_size != size), but they may match what
 			// caller passed us.
 			//debug_assert(ret.first != padded_buf || ret.second != size);
 
 			if(remove_afterwards)
 				padded2exact.erase(it);
 		}
 
 		// exact_buf must be aligned, or something is wrong.
 		debug_assert((uintptr_t)ret.first  % BUF_ALIGN == 0);
 		return ret;
 	}
 
 	// remove all associations. this is intended only for use in
 	// file_cache_reset.
 	void clear()
 	{
 		padded2exact.clear();
 	}
 
 private:
 	typedef std::map<FileIOBuf, BufAndSize> Padded2Exact;
 	Padded2Exact padded2exact;
 };
 static ExactBufOracle exact_buf_oracle;
 
 // referenced by cache_alloc
 static void free_padded_buf(FileIOBuf padded_buf, size_t size, bool from_heap = false);
 
 static void cache_free(FileIOBuf exact_buf, size_t exact_size)
 {
 	cache_allocator.dealloc((u8*)exact_buf, exact_size);
 }
 
 static FileIOBuf cache_alloc(size_t size)
 {
 	uint attempts = 0;
 	for(;;)
 	{
 		FileIOBuf buf = (FileIOBuf)cache_allocator.alloc(size);
 		if(buf)
 			return buf;
 
 		// remove least valuable entry from cache and free its buffer.
 		FileIOBuf discarded_buf; size_t size;
 		bool removed = file_cache.remove_least_valuable(&discarded_buf, &size);
 		// only false if cache is empty, which can't be the case because
 		// allocation failed.
 		TEST(removed);
 
 		// discarded_buf may be the least valuable entry in cache, but if
 		// still in use (i.e. extant), it must not actually be freed yet!
 		if(extant_bufs.find(discarded_buf) == -1)
 		{
 			free_padded_buf(discarded_buf, size);
 
 			// optional: this iteration doesn't really count because no
 			// memory was actually freed. helps prevent infinite loop
 			// warning without having to raise the limit really high.
 			attempts--;
 		}
 
 		// note: this may seem hefty, but 300 is known to be reached.
 		// (after building archive, file cache is full; attempting to
 		// allocate ~4MB while only freeing small blocks scattered over
 		// the entire cache can take a while)
 		if(++attempts > 500)
 			debug_warn("possible infinite loop: failed to make room in cache");
 	}
 
 	UNREACHABLE;
 }
 
 
 // translate <padded_buf> to the exact buffer and free it.
 // convenience function used by file_buf_alloc and file_buf_free.
 static void free_padded_buf(FileIOBuf padded_buf, size_t size, bool from_heap)
 {
 	const bool remove_afterwards = true;
 	ExactBufOracle::BufAndSize exact = exact_buf_oracle.get(padded_buf, size, remove_afterwards);
 	FileIOBuf exact_buf = exact.first; size_t exact_size = exact.second;
 
 	if(from_heap)
 		page_aligned_free((void*)exact_buf, exact_size);
 	else
 		cache_free(exact_buf, exact_size);
 }
 
 
 // allocate a new buffer of <size> bytes (possibly more due to internal
 // fragmentation). never returns 0.
 // <atom_fn>: owner filename (buffer is intended to be used for data from
 //   this file).
 // <fb_flags>: see FileBufFlags.
 FileIOBuf file_buf_alloc(size_t size, const char* atom_fn, uint fb_flags)
 {
 	const bool should_update_stats = (fb_flags & FB_NO_STATS) == 0;
 	const bool from_heap           = (fb_flags & FB_FROM_HEAP) != 0;
 
 	FileIOBuf buf;
 	if(from_heap)
 	{
 		buf = (FileIOBuf)page_aligned_alloc(size);
 		if(!buf)
 			WARN_ERR(ERR_NO_MEM);
 	}
 	else
 		buf = cache_alloc(size);
 
 	extant_bufs.add(buf, size, atom_fn, fb_flags);
 
 	if(should_update_stats)
 		stats_buf_alloc(size, round_up(size, BUF_ALIGN));
 	return buf;
 }
 
 
 // mark <buf> as no longer needed. if its reference count drops to 0,
 // it will be removed from the extant list. if it had been added to the
 // cache, it remains there until evicted in favor of another buffer.
 LibError file_buf_free(FileIOBuf buf, uint fb_flags)
 {
 	const bool should_update_stats = (fb_flags & FB_NO_STATS) == 0;
 	const bool from_heap           = (fb_flags & FB_FROM_HEAP) != 0;
 
 	if(!buf)
 		return ERR_OK;
 
 	size_t size; const char* atom_fn;
 	bool actually_removed = extant_bufs.find_and_remove(buf, size, atom_fn);
 	if(actually_removed)
 	{
 		// avoid any potential confusion and some overhead by skipping the
 		// retrieve step (not needed anyway).
 		if(from_heap)
 			goto free_immediately;
 
 		{
 		FileIOBuf buf_in_cache;
 		// it's still in cache - leave its buffer intact.
 		if(file_cache.retrieve(atom_fn, buf_in_cache, 0, false))
 		{
 			// sanity checks: what's in cache must match what we have.
 			// note: don't compare actual_size with cached size - they are
 			// usually different.
 			debug_assert(buf_in_cache == buf);
 		}
 		// buf is not in cache - needs to be freed immediately.
 		else
 		{
 free_immediately:
 			// note: extant_bufs cannot be relied upon to store and return
 			// exact_buf - see definition of ExtantBuf.buf.
 			// we have to use exact_buf_oracle, which is a bit slow, but hey.
 			free_padded_buf(buf, size, from_heap);
 		}
 		}
 	}
 
 	if(should_update_stats)
 		stats_buf_free();
 	trace_notify_free(atom_fn, size);
 
 	return ERR_OK;
 }
 
 
 // inform us that the buffer address will be increased by <padding>-bytes.
 // this happens when reading uncompressed files from archive: they
 // start at unaligned offsets and file_io rounds offset down to
 // next block boundary. the buffer therefore starts with padding, which
 // is skipped so the user only sees their data.
 // we make note of the new buffer address so that it can be freed correctly
 // by passing the new padded buffer.
 void file_buf_add_padding(FileIOBuf exact_buf, size_t exact_size, size_t padding)
 {
 	debug_assert(padding < FILE_BLOCK_SIZE);
 	FileIOBuf padded_buf = (FileIOBuf)((u8*)exact_buf + padding);
 	exact_buf_oracle.add(exact_buf, exact_size, padded_buf);
 }
 
 
 // if buf is not in extant list, complain; otherwise, mark it as
 // coming from the file <atom_fn>.
 // this is needed in the following case: uncompressed reads from archive
 // boil down to a file_io of the archive file. the buffer is therefore
 // tagged with the archive filename instead of the desired filename.
 // afile_read sets things right by calling this.
 LibError file_buf_set_real_fn(FileIOBuf buf, const char* atom_fn)
 {
 	// note: removing and reinserting would be easiest, but would
 	// mess up the epoch field.
 	extant_bufs.replace_owner(buf, atom_fn);
 	return ERR_OK;
 }
 
 
 // if file_cache_add-ing the given buffer, would it be added?
 // this is referenced by trace_entry_causes_io; see explanation there.
 bool file_cache_would_add(size_t size, const char* UNUSED(atom_fn),
 	uint file_flags)
 {
 	// caller is saying this file shouldn't be cached here.
 	if(file_flags & FILE_CACHED_AT_HIGHER_LEVEL)
 		return false;
 
 	// refuse to cache 0-length files (it would have no benefit and
 	// causes problems due to divide-by-0).
 	if(size == 0)
 		return false;
 
 	return true;
 }
 
 
 // "give" <buf> to the cache, specifying its size and owner filename.
 // since this data may be shared among users of the cache, it is made
 // read-only (via MMU) to make sure no one can corrupt/change it.
 //
 // note: the reference added by file_buf_alloc still exists! it must
 // still be file_buf_free-d after calling this.
 LibError file_cache_add(FileIOBuf buf, size_t size, const char* atom_fn,
 	uint file_flags)
 {
 	debug_assert(buf);
 
 	if(!file_cache_would_add(size, atom_fn, file_flags))
 		return INFO_SKIPPED;
 
 	// assign cost
 	uint cost = 1;
 
 	ExactBufOracle::BufAndSize bas = exact_buf_oracle.get(buf, size);
 	FileIOBuf exact_buf = bas.first; size_t exact_size = bas.second;
 	cache_allocator.make_read_only((u8*)exact_buf, exact_size);
 
 	file_cache.add(atom_fn, buf, size, cost);
 
 	return ERR_OK;
 }
 
 
 
 
 // check if the contents of the file <atom_fn> are in file cache.
 // if not, return 0; otherwise, return buffer address and optionally
 // pass back its size.
 //
 // note: does not call stats_cache because it does not know the file size
 // in case of cache miss! doing so is left to the caller.
 FileIOBuf file_cache_retrieve(const char* atom_fn, size_t* psize, uint fb_flags)
 {
 	// note: do not query extant_bufs - reusing that doesn't make sense
 	// (why would someone issue a second IO for the entire file while
 	// still referencing the previous instance?)
 
 	const bool long_lived = (fb_flags & FB_LONG_LIVED) != 0;
 	const bool should_account = (fb_flags & FB_NO_ACCOUNTING) == 0;
 	const bool should_update_stats = (fb_flags & FB_NO_STATS) == 0;
 
 	FileIOBuf buf;
 	const bool should_refill_credit = should_account;
 	if(!file_cache.retrieve(atom_fn, buf, psize, should_refill_credit))
 		return 0;
 
 	if(should_account)
 		extant_bufs.add_ref(buf, *psize, atom_fn, long_lived);
 
 	if(should_update_stats)
 		stats_buf_ref();
 
 	return buf;
 }
 
 
 // invalidate all data loaded from the file <fn>. this ensures the next
 // load of this file gets the (presumably new) contents of the file,
 // not previous stale cache contents.
 // call after hotloading code detects file has been changed.
 LibError file_cache_invalidate(const char* P_fn)
 {
 	const char* atom_fn = file_make_unique_fn_copy(P_fn);
 
 	// note: what if the file has an extant buffer?
 	// this *could* conceivably happen during hotloading if a file is
 	// saved right when the engine wants to access it (unlikely but not
 	// impossible).
 	// what we'll do is just let them continue as if nothing had happened;
 	// invalidating is only meant to make sure that the reload's IO
 	// will load the new data (not stale stuff from cache).
 	// => nothing needs to be done.
 
 	// mark all blocks from the file as invalid
 	block_mgr.invalidate(atom_fn);
 
 	// file was cached: remove it and free that memory
 	FileIOBuf cached_buf; size_t size;
 	if(file_cache.retrieve(atom_fn, cached_buf, &size))
 	{
 		file_cache.remove(atom_fn);
 		free_padded_buf(cached_buf, size);
 	}
 
 	return ERR_OK;
 }
 
 
 // reset entire state of the file cache to what it was after initialization.
 // that means completely emptying the extant list and cache.
 // used after simulating cache operation, which fills the cache with
 // invalid data.
 void file_cache_reset()
 {
 	// just wipe out extant list and cache without freeing the bufs -
 	// cache allocator is completely reset below.
 
 	extant_bufs.clear();
 
 	// note: do not loop until file_cache.empty - there may still be
 	// some items pending eviction even though cache is "empty".
 	FileIOBuf discarded_buf; size_t size;
 	while(file_cache.remove_least_valuable(&discarded_buf, &size))
 	{
 	}
 
 	cache_allocator.reset();
 	exact_buf_oracle.clear();
 }
 
 
 
 void file_cache_init()
 {
 }
 
 
 void file_cache_shutdown()
 {
 	extant_bufs.display_all_remaining();
 	cache_allocator.shutdown();
 	block_mgr.shutdown();
 }
-
-
-//-----------------------------------------------------------------------------
-// built-in self test
-//-----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-static void test_cache_allocator()
-{
-	// allocated address -> its size
-	typedef std::map<void*, size_t> AllocMap;
-	AllocMap allocations;
-
-	// put allocator through its paces by allocating several times
-	// its capacity (this ensures memory is reused)
-	srand(1);
-	size_t total_size_used = 0;
-	while(total_size_used < 4*MAX_CACHE_SIZE)
-	{
-		size_t size = rand(1, MAX_CACHE_SIZE/4);
-		total_size_used += size;
-		void* p;
-		// until successful alloc:
-		for(;;)
-		{
-			p = cache_allocator.alloc(size);
-			if(p)
-				break;
-			// out of room - remove a previous allocation
-			// .. choose one at random
-			size_t chosen_idx = (size_t)rand(0, (uint)allocations.size());
-			AllocMap::iterator it = allocations.begin();
-			for(; chosen_idx != 0; chosen_idx--)
-				++it;
-			cache_allocator.dealloc((u8*)it->first, it->second);
-			allocations.erase(it);
-		}
-
-		// must not already have been allocated
-		TEST(allocations.find(p) == allocations.end());
-		allocations[p] = size;
-	}
-
-	// reset to virginal state
-	cache_allocator.reset();
-}
-
-static void test_file_cache()
-{
-	// we need a unique address for file_cache_add, but don't want to
-	// actually put it in the atom_fn storage (permanently clutters it).
-	// just increment this pointer (evil but works since it's not used).
-//	const char* atom_fn = (const char*)1;
-	// give to file_cache
-//	file_cache_add((FileIOBuf)p, size, atom_fn++);
-
-	file_cache_reset();
-	TEST(file_cache.empty());
-
-	// note: even though everything has now been freed,
-	// the freelists may be a bit scattered already.
-}
-
-static void self_test()
-{
-	test_cache_allocator();
-	test_file_cache();
-}
-
-SELF_TEST_RUN;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED
Index: ps/trunk/source/lib/res/graphics/tex.cpp
===================================================================
--- ps/trunk/source/lib/res/graphics/tex.cpp	(revision 3910)
+++ ps/trunk/source/lib/res/graphics/tex.cpp	(revision 3911)
@@ -1,706 +1,738 @@
 /**
  * =========================================================================
  * File        : tex.cpp
  * Project     : 0 A.D.
  * Description : support routines for 2d texture access/writing.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2004 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <math.h>
 #include <stdlib.h>
 
 #include <algorithm>
 
 #include "lib.h"
 #include "timer.h"
 #include "../res.h"
 #include "tex.h"
 #include "tex_codec.h"
 
 
+//-----------------------------------------------------------------------------
+// validation
+//-----------------------------------------------------------------------------
+
 // be careful not to use other tex_* APIs here because they call us.
 LibError tex_validate(const Tex* t)
 {
 	// pixel data
 	size_t tex_file_size;
 	void* tex_file = mem_get_ptr(t->hm, &tex_file_size);
 	// .. only check validity if the image is still in memory.
 	//    (e.g. ogl_tex frees the data after uploading to GL)
 	if(tex_file)
 	{
 		// file size smaller than header+pixels.
 		// possible causes: texture file header is invalid,
 		// or file wasn't loaded completely.
 		if(tex_file_size < t->ofs + t->w*t->h*t->bpp/8)
 			WARN_RETURN(ERR_1);
 	}
 
 	// bits per pixel
 	// (we don't bother checking all values; a sanity check is enough)
 	if(t->bpp % 4 || t->bpp > 32)
 		WARN_RETURN(ERR_2);
 
 	// flags
 	// .. DXT value
 	const uint dxt = t->flags & TEX_DXT;
 	if(dxt != 0 && dxt != 1 && dxt != DXT1A && dxt != 3 && dxt != 5)
 		WARN_RETURN(ERR_3);
 	// .. orientation
 	const uint orientation = t->flags & TEX_ORIENTATION;
 	if(orientation == (TEX_BOTTOM_UP|TEX_TOP_DOWN))
 		WARN_RETURN(ERR_4);
 
 	return ERR_OK;
 }
 
 #define CHECK_TEX(t) RETURN_ERR(tex_validate(t))
 
 
 // check if the given texture format is acceptable: 8bpp grey,
 // 24bpp color or 32bpp color+alpha (BGR / upside down are permitted).
 // basically, this is the "plain" format understood by all codecs and
 // tex_codec_plain_transform.
 LibError tex_validate_plain_format(uint bpp, uint flags)
 {
 	const bool alpha   = (flags & TEX_ALPHA  ) != 0;
 	const bool grey    = (flags & TEX_GREY   ) != 0;
 	const bool dxt     = (flags & TEX_DXT    ) != 0;
 	const bool mipmaps = (flags & TEX_MIPMAPS) != 0;
 
 	if(dxt || mipmaps)
 		WARN_RETURN(ERR_TEX_FMT_INVALID);
 
 	// grey must be 8bpp without alpha, or it's invalid.
 	if(grey)
 	{
 		if(bpp == 8 && !alpha)
 			return ERR_OK;
 		WARN_RETURN(ERR_TEX_FMT_INVALID);
 	}
 
 	if(bpp == 24 && !alpha)
 		return ERR_OK;
 	if(bpp == 32 && alpha)
 		return ERR_OK;
 
 	WARN_RETURN(ERR_TEX_FMT_INVALID);
 }
 
 
+//-----------------------------------------------------------------------------
+// mipmaps
+//-----------------------------------------------------------------------------
+
+void tex_util_foreach_mipmap(uint w, uint h, uint bpp, const u8* restrict data,
+	int levels_to_skip, uint data_padding, MipmapCB cb, void* restrict ctx)
+{
+	uint level_w = w, level_h = h;
+	const u8* level_data = data;
+
+	// we iterate through the loop (necessary to skip over image data),
+	// but do not actually call back until the requisite number of
+	// levels have been skipped (i.e. level == 0).
+	int level = -(int)levels_to_skip;
+	if(levels_to_skip == -1)
+		level = 0;
+
+	// until at level 1x1:
+	for(;;)
+	{
+		// used to skip past this mip level in <data>
+		const size_t level_data_size = (size_t)(round_up(level_w, data_padding) * round_up(level_h, data_padding) * bpp/8);
+
+		if(level >= 0)
+			cb((uint)level, level_w, level_h, level_data, level_data_size, ctx);
+
+		level_data += level_data_size;
+
+		// 1x1 reached - done
+		if(level_w == 1 && level_h == 1)
+			break;
+		level_w /= 2;
+		level_h /= 2;
+		// if the texture is non-square, one of the dimensions will become
+		// 0 before the other. to satisfy OpenGL's expectations, change it
+		// back to 1.
+		if(level_w == 0) level_w = 1;
+		if(level_h == 0) level_h = 1;
+		level++;
+
+		// special case: no mipmaps, we were only supposed to call for
+		// the base level
+		if(levels_to_skip == TEX_BASE_LEVEL_ONLY)
+			break;
+	}
+}
+
+
 struct CreateLevelData
 {
 	uint num_components;
 
 	uint prev_level_w;
 	uint prev_level_h;
 	const u8* prev_level_data;
 	size_t prev_level_data_size;
 };
 
 // uses 2x2 box filter
 static void create_level(uint level, uint level_w, uint level_h,
 	const u8* restrict level_data, size_t level_data_size, void* restrict ctx)
 {
 	CreateLevelData* cld = (CreateLevelData*)ctx;
 	const size_t src_w = cld->prev_level_w;
 	const size_t src_h = cld->prev_level_h;
 	const u8* src = cld->prev_level_data;
 	u8* dst = (u8*)level_data;
 
 	// base level - must be copied over from source buffer
 	if(level == 0)
 	{
 		debug_assert(level_data_size == cld->prev_level_data_size);
 		memcpy2(dst, src, level_data_size);
 	}
 	else
 	{
 		const uint num_components = cld->num_components;
 		const size_t dx = num_components, dy = dx*src_w;
 
 		// special case: image is too small for 2x2 filter
 		if(cld->prev_level_w == 1 || cld->prev_level_h == 1)
 		{
 			// image is either a horizontal or vertical line.
 			// their memory layout is the same (packed pixels), so no special
 			// handling is needed; just pick max dimension.
 			for(uint y = 0; y < MAX(src_w, src_h); y += 2)
 			{
 				for(uint i = 0; i < num_components; i++)
 				{
 					*dst++ = (src[0]+src[dx]+1)/2;
 					src += 1;
 				}
 
 				src += dx;	// skip to next pixel (since box is 2x2)
 			}
 		}
 		// normal
 		else
 		{
 			for(uint y = 0; y < src_h; y += 2)
 			{
 				for(uint x = 0; x < src_w; x += 2)
 				{
 					for(uint i = 0; i < num_components; i++)
 					{
 						*dst++ = (src[0]+src[dx]+src[dy]+src[dx+dy]+2)/4;
 						src += 1;
 					}
 
 					src += dx;	// skip to next pixel (since box is 2x2)
 				}
 
 				src += dy;	// skip to next row (since box is 2x2)
 			}
 		}
 
 		debug_assert(dst == level_data + level_data_size);
 		debug_assert(src == cld->prev_level_data + cld->prev_level_data_size);
 	}
 
 	cld->prev_level_data = level_data;
 	cld->prev_level_data_size = level_data_size;
 	cld->prev_level_w = level_w;
 	cld->prev_level_h = level_h;
 }
 
 
+static LibError add_mipmaps(Tex* t, uint w, uint h, uint bpp,
+	void* new_data, size_t data_size)
+{
+	// this code assumes the image is of POT dimension; we don't
+	// go to the trouble of implementing image scaling because
+	// the only place this is used (ogl_tex_upload) requires POT anyway.
+	if(!is_pow2(w) || !is_pow2(h))
+		WARN_RETURN(ERR_TEX_INVALID_SIZE);
+	t->flags |= TEX_MIPMAPS;	// must come before tex_img_size!
+	const size_t mipmap_size = tex_img_size(t);
+	Handle hm;
+	const u8* mipmap_data = (const u8*)mem_alloc(mipmap_size, 4*KiB, 0, &hm);
+	if(!mipmap_data)
+		WARN_RETURN(ERR_NO_MEM);
+	CreateLevelData cld = { bpp/8, w, h, (const u8*)new_data, data_size };
+	tex_util_foreach_mipmap(w, h, bpp, mipmap_data, 0, 1, create_level, &cld);
+	mem_free_h(t->hm);
+	t->hm = hm;
+	t->ofs = 0;
+
+	return ERR_OK;
+}
+
+
+//-----------------------------------------------------------------------------
+// pixel format conversion (transformation)
+//-----------------------------------------------------------------------------
+
 TIMER_ADD_CLIENT(tc_plain_transform);
 
 // handles BGR and row flipping in "plain" format (see below).
 //
 // called by codecs after they get their format-specific transforms out of
 // the way. note that this approach requires several passes over the image,
 // but is much easier to maintain than providing all<->all conversion paths.
 //
 // somewhat optimized (loops are hoisted, cache associativity accounted for)
 static LibError plain_transform(Tex* t, uint transforms)
 {
 TIMER_ACCRUE(tc_plain_transform);
 
 	// (this is also called directly instead of through ogl_tex, so
 	// we need to validate)
 	CHECK_TEX(t);
 
 	// extract texture info
 	const uint w = t->w, h = t->h, bpp = t->bpp, flags = t->flags;
 	u8* const data = tex_get_data(t);
 	const size_t data_size = tex_img_size(t);
 
 	// sanity checks (not errors, we just can't handle these cases)
 	// .. unknown transform
 	if(transforms & ~(TEX_BGR|TEX_ORIENTATION|TEX_MIPMAPS))
 		return INFO_TEX_CODEC_CANNOT_HANDLE;
 	// .. data is not in "plain" format
 	RETURN_ERR(tex_validate_plain_format(bpp, flags));
 	// .. nothing to do
 	if(!transforms)
 		return ERR_OK;
 
 	// allocate copy of the image data.
 	// rationale: L1 cache is typically A2 => swapping in-place with a
 	// line buffer leads to thrashing. we'll assume the whole texture*2
 	// fits in cache, allocate a copy, and transfer directly from there.
 	//
 	// this is necessary even when not flipping because the initial Tex.hm
 	// (which is a FileIOBuf) is read-only.
 	Handle hm;
 	void* new_data = mem_alloc(data_size, 4*KiB, 0, &hm);
 	if(!new_data)
 		WARN_RETURN(ERR_NO_MEM);
 	memcpy2(new_data, data, data_size);
 
 	// setup row source/destination pointers (simplifies outer loop)
 	u8* dst = (u8*)new_data;
 	const u8* src = (const u8*)new_data;
 	const size_t pitch = w * bpp/8;
 	// .. avoid y*pitch multiply in row loop; instead, add row_ofs.
 	ssize_t row_ofs = (ssize_t)pitch;
 
 	// flipping rows (0,1,2 -> 2,1,0)
 	if(transforms & TEX_ORIENTATION)
 	{
 		src = (const u8*)data+data_size-pitch;	// last row
 		row_ofs = -(ssize_t)pitch;
 	}
 
 	// no BGR convert necessary
 	if(!(transforms & TEX_BGR))
 	{
 		for(uint y = 0; y < h; y++)
 		{
 			memcpy2(dst, src, pitch);
 			dst += pitch;
 			src += row_ofs;
 		}
 	}
 	// RGB <-> BGR
 	else if(bpp == 24)
 	{
 		for(uint y = 0; y < h; y++)
 		{
 			for(uint x = 0; x < w; x++)
 			{
 				// need temporaries in case src == dst (i.e. not flipping)
 				const u8 b = src[0], g = src[1], r = src[2];
 				dst[0] = r; dst[1] = g; dst[2] = b;
 				dst += 3;
 				src += 3;
 			}
 			src += row_ofs - pitch;	// flip? previous row : stay
 		}
 	}
 	// RGBA <-> BGRA
 	else if(bpp == 32)
 	{
 		for(uint y = 0; y < h; y++)
 		{
 			for(uint x = 0; x < w; x++)
 			{
 				// need temporaries in case src == dst (i.e. not flipping)
 				const u8 b = src[0], g = src[1], r = src[2], a = src[3];
 				dst[0] = r; dst[1] = g; dst[2] = b; dst[3] = a;
 				dst += 4;
 				src += 4;
 			}
 			src += row_ofs - pitch;	// flip? previous row : stay
 		}
 	}
 
 	mem_free_h(t->hm);
 	t->hm = hm;
 	t->ofs = 0;
 
 	if(!(t->flags & TEX_MIPMAPS) && transforms & TEX_MIPMAPS)
+		RETURN_ERR(add_mipmaps(t, w, h, bpp, new_data, data_size));
+
+	CHECK_TEX(t);
+	return ERR_OK;
+}
+
+
+TIMER_ADD_CLIENT(tc_transform);
+
+// change <t>'s pixel format by flipping the state of all TEX_* flags
+// that are set in transforms.
+LibError tex_transform(Tex* t, uint transforms)
+{
+	TIMER_ACCRUE(tc_transform);
+	CHECK_TEX(t);
+
+	const uint target_flags = t->flags ^ transforms;
+	uint remaining_transforms;
+	for(;;)
 	{
-		// this code assumes the image is of POT dimension; we don't
-		// go to the trouble of implementing image scaling because
-		// the only place this is used (ogl_tex_upload) requires POT anyway.
-		if(!is_pow2(w) || !is_pow2(h))
-			WARN_RETURN(ERR_TEX_INVALID_SIZE);
-		t->flags |= TEX_MIPMAPS;	// must come before tex_img_size!
-		const size_t mipmap_size = tex_img_size(t);
-		Handle hm;
-		const u8* mipmap_data = (const u8*)mem_alloc(mipmap_size, 4*KiB, 0, &hm);
-		if(!mipmap_data)
-			WARN_RETURN(ERR_NO_MEM);
-		CreateLevelData cld = { bpp/8, w, h, (const u8*)new_data, data_size };
-		tex_util_foreach_mipmap(w, h, bpp, mipmap_data, 0, 1, create_level, &cld);
-		mem_free_h(t->hm);
-		t->hm = hm;
-		t->ofs = 0;
+		remaining_transforms = target_flags ^ t->flags;
+		// we're finished (all required transforms have been done)
+		if(remaining_transforms == 0)
+			return ERR_OK;
+
+		LibError ret = tex_codec_transform(t, remaining_transforms);
+		if(ret != 0)
+			break;
 	}
 
-	CHECK_TEX(t);
+	// last chance
+	RETURN_ERR(plain_transform(t, remaining_transforms));
 	return ERR_OK;
 }
 
 
+// change <t>'s pixel format to the new format specified by <new_flags>.
+// (note: this is equivalent to tex_transform(t, t->flags^new_flags).
+LibError tex_transform_to(Tex* t, uint new_flags)
+{
+	// tex_transform takes care of validating <t>
+	const uint transforms = t->flags ^ new_flags;
+	return tex_transform(t, transforms);
+}
+
+
 //-----------------------------------------------------------------------------
 // image orientation
 //-----------------------------------------------------------------------------
 
 // see "Default Orientation" in docs.
 
 static int global_orientation = TEX_TOP_DOWN;
 
 // set the orientation (either TEX_BOTTOM_UP or TEX_TOP_DOWN) to which
 // all loaded images will automatically be converted
 // (excepting file formats that don't specify their orientation, i.e. DDS).
 void tex_set_global_orientation(int o)
 {
 	debug_assert(o == TEX_TOP_DOWN || o == TEX_BOTTOM_UP);
 	global_orientation = o;
 }
 
 
 static void flip_to_global_orientation(Tex* t)
 {
 	// (can't use normal CHECK_TEX due to void return)
 	WARN_ERR(tex_validate(t));
 
 	uint orientation = t->flags & TEX_ORIENTATION;
 	// if codec knows which way around the image is (i.e. not DDS):
 	if(orientation)
 	{
 		// flip image if necessary
 		uint transforms = orientation ^ global_orientation;
 		WARN_ERR(plain_transform(t, transforms));
 	}
 
 	// indicate image is at global orientation. this is still done even
 	// if the codec doesn't know: the default orientation should be chosen
 	// to make that work correctly (see "Default Orientation" in docs).
 	t->flags = (t->flags & ~TEX_ORIENTATION) | global_orientation;
 
 	// (can't use normal CHECK_TEX due to void return)
 	WARN_ERR(tex_validate(t));
 }
 
 
 // indicate if the orientation specified by <src_flags> matches
 // dst_orientation (if the latter is 0, then the global_orientation).
 // (we ask for src_flags instead of src_orientation so callers don't
 // have to mask off TEX_ORIENTATION)
 bool tex_orientations_match(uint src_flags, uint dst_orientation)
 {
 	const uint src_orientation = src_flags & TEX_ORIENTATION;
 	if(dst_orientation == 0)
 		dst_orientation = global_orientation;
 	return (src_orientation == dst_orientation);
 }
 
 
 //-----------------------------------------------------------------------------
-// util
-//-----------------------------------------------------------------------------
-
-void tex_util_foreach_mipmap(uint w, uint h, uint bpp, const u8* restrict data,
-	int levels_to_skip, uint data_padding, MipmapCB cb, void* restrict ctx)
-{
-	uint level_w = w, level_h = h;
-	const u8* level_data = data;
-
-	// we iterate through the loop (necessary to skip over image data),
-	// but do not actually call back until the requisite number of
-	// levels have been skipped (i.e. level == 0).
-	int level = -(int)levels_to_skip;
-	if(levels_to_skip == -1)
-		level = 0;
-
-	// until at level 1x1:
-	for(;;)
-	{
-		// used to skip past this mip level in <data>
-		const size_t level_data_size = (size_t)(round_up(level_w, data_padding) * round_up(level_h, data_padding) * bpp/8);
-
-		if(level >= 0)
-			cb((uint)level, level_w, level_h, level_data, level_data_size, ctx);
-
-		level_data += level_data_size;
-
-		// 1x1 reached - done
-		if(level_w == 1 && level_h == 1)
-			break;
-		level_w /= 2;
-		level_h /= 2;
-		// if the texture is non-square, one of the dimensions will become
-		// 0 before the other. to satisfy OpenGL's expectations, change it
-		// back to 1.
-		if(level_w == 0) level_w = 1;
-		if(level_h == 0) level_h = 1;
-		level++;
-
-		// special case: no mipmaps, we were only supposed to call for
-		// the base level
-		if(levels_to_skip == -1)
-			break;
-	}
-}
-
-
-//-----------------------------------------------------------------------------
-// API
+// misc. API
 //-----------------------------------------------------------------------------
 
 // indicate if <filename>'s extension is that of a texture format
 // supported by tex_load. case-insensitive.
 //
 // rationale: tex_load complains if the given file is of an
 // unsupported type. this API allows users to preempt that warning
 // (by checking the filename themselves), and also provides for e.g.
 // enumerating only images in a file picker.
 // an alternative might be a flag to suppress warning about invalid files,
 // but this is open to misuse.
 bool tex_is_known_extension(const char* filename)
 {
 	const TexCodecVTbl* dummy;
 	// found codec for it => known extension
 	if(tex_codec_for_filename(filename, &dummy) == ERR_OK)
 		return true;
 
 	return false;
 }
 
 
-// split out of tex_load to ease resource cleanup
-static LibError tex_load_impl(FileIOBuf file_, size_t file_size, Tex* t)
-{
-	u8* file = (u8*)file_;
-	const TexCodecVTbl* c;
-	RETURN_ERR(tex_codec_for_header(file, file_size, &c));
-
-	// make sure the entire header has been read
-	const size_t min_hdr_size = c->hdr_size(0);
-	if(file_size < min_hdr_size)
-		WARN_RETURN(ERR_INCOMPLETE_HEADER);
-	const size_t hdr_size = c->hdr_size(file);
-	if(file_size < hdr_size)
-		WARN_RETURN(ERR_INCOMPLETE_HEADER);
-	t->ofs = hdr_size;
-
-	DynArray da;
-	RETURN_ERR(da_wrap_fixed(&da, file, file_size));
-
-	RETURN_ERR(c->decode(&da, t));
-
-	(void)da_free(&da);	// for completeness only; just zeros <da>
-
-	// sanity checks
-	if(!t->w || !t->h || t->bpp > 32)
-		WARN_RETURN(ERR_TEX_FMT_INVALID);
-	// .. note: decode() may have decompressed the image; cannot use file_size.
-	size_t hm_size;
-	(void)mem_get_ptr(t->hm, &hm_size);
-	if(hm_size < t->ofs + tex_img_size(t))
-		WARN_RETURN(ERR_TEX_INVALID_SIZE);
-
-	flip_to_global_orientation(t);
-
-	return ERR_OK;
-}
-
-
-// MEM_DTOR -> file_buf_free adapter (used for mem_wrap-ping FileIOBuf)
-static void file_buf_dtor(void* p, size_t UNUSED(size), uintptr_t UNUSED(ctx))
-{
-	(void)file_buf_free((FileIOBuf)p);
-}
-
-// load the specified image from file into the given Tex object.
-// currently supports BMP, TGA, JPG, JP2, PNG, DDS.
-LibError tex_load(const char* fn, Tex* t, uint file_flags)
-{
-	// load file
-	FileIOBuf file; size_t file_size;
-	// rationale: we need the Handle return value for Tex.hm - the data pointer
-	// must be protected against being accidentally free-d in that case.
-
-	RETURN_ERR(vfs_load(fn, file, file_size, file_flags));
-	Handle hm = mem_wrap((void*)file, file_size, 0, 0, 0, file_buf_dtor, 0, (void*)tex_load);
-	t->hm = hm;
-	LibError ret = tex_load_impl(file, file_size, t);
-	if(ret < 0)
-	{
-		(void)tex_free(t);
-		debug_warn("failed");
-		return ret;
-	}
-
-	// do not free hm! it either still holds the image data (i.e. texture
-	// wasn't compressed) or was replaced by a new buffer for the image data.
-
-	CHECK_TEX(t);
-	return ERR_OK;
-}
-
-
 // store the given image data into a Tex object; this will be as if
 // it had been loaded via tex_load.
 //
 // rationale: support for in-memory images is necessary for
 //   emulation of glCompressedTexImage2D and useful overall.
 //   however, we don't want to  provide an alternate interface for each API;
 //   these would have to be changed whenever fields are added to Tex.
 //   instead, provide one entry point for specifying images.
 // note: since we do not know how <img> was allocated, the caller must do
 //   so (after calling tex_free, which is required regardless of alloc type).
 //
 // we need only add bookkeeping information and "wrap" it in
 // our Tex struct, hence the name.
 LibError tex_wrap(uint w, uint h, uint bpp, uint flags, void* img, Tex* t)
 {
 	t->w     = w;
 	t->h     = h;
 	t->bpp   = bpp;
 	t->flags = flags;
 
 	// note: we can't use tex_img_size because that requires all
 	// Tex fields to be valid, but this calculation must be done first.
 	const size_t img_size = w*h*bpp/8;
 	t->hm = mem_wrap(img, img_size, 0, 0, 0, 0, 0, (void*)&tex_wrap);
 	RETURN_ERR(t->hm);
 
 	// the exact value of img is lost, since the handle references the
 	// allocation and disregards the offset within it given by <img>.
 	// fix that up by setting t->ofs.
 	void* reported_ptr = mem_get_ptr(t->hm);
 	t->ofs = (u8*)img - (u8*)reported_ptr;
 
 	CHECK_TEX(t);
 	return ERR_OK;
 }
 
 
 // free all resources associated with the image and make further
 // use of it impossible.
 LibError tex_free(Tex* t)
 {
 	// do not validate <t> - this is called from tex_load if loading
 	// failed, so not all fields may be valid.
 
 	LibError ret = mem_free_h(t->hm);
 
 	// do not zero out the fields! that could lead to trouble since
 	// ogl_tex_upload followed by ogl_tex_free is legit, but would
 	// cause OglTex_validate to fail (since its Tex.w is == 0).
 	return ret;
 }
 
 
 //-----------------------------------------------------------------------------
-
-TIMER_ADD_CLIENT(tc_transform);
-
-// change <t>'s pixel format by flipping the state of all TEX_* flags
-// that are set in transforms.
-LibError tex_transform(Tex* t, uint transforms)
-{
-TIMER_ACCRUE(tc_transform);
-	CHECK_TEX(t);
-
-	const uint target_flags = t->flags ^ transforms;
-	uint remaining_transforms;
-	for(;;)
-	{
-		remaining_transforms = target_flags ^ t->flags;
-		// we're finished (all required transforms have been done)
-		if(remaining_transforms == 0)
-			return ERR_OK;
-
-		LibError ret = tex_codec_transform(t, remaining_transforms);
-		if(ret != 0)
-			break;
-	}
-
-	// last chance
-	RETURN_ERR(plain_transform(t, remaining_transforms));
-	return ERR_OK;
-}
-
-
-// change <t>'s pixel format to the new format specified by <new_flags>.
-// (note: this is equivalent to tex_transform(t, t->flags^new_flags).
-LibError tex_transform_to(Tex* t, uint new_flags)
-{
-	// tex_transform takes care of validating <t>
-	const uint transforms = t->flags ^ new_flags;
-	return tex_transform(t, transforms);
-}
-
-
+// getters
 //-----------------------------------------------------------------------------
 
 // returns a pointer to the image data (pixels), taking into account any
 // header(s) that may come before it. see Tex.hm comment above.
 u8* tex_get_data(const Tex* t)
 {
 	// (can't use normal CHECK_TEX due to u8* return value)
 	WARN_ERR(tex_validate(t));
 
 	u8* p = (u8*)mem_get_ptr(t->hm);
 	if(!p)
 		return 0;
 	return p + t->ofs;
 }
 
 
 static void add_level_size(uint UNUSED(level), uint UNUSED(level_w), uint UNUSED(level_h),
 	const u8* restrict UNUSED(level_data), size_t level_data_size, void* restrict ctx)
 {
 	size_t* ptotal_size = (size_t*)ctx;
 	*ptotal_size += level_data_size;
 }
 
 // return total byte size of the image pixels. (including mipmaps!)
 // this is preferable to calculating manually because it's
 // less error-prone (e.g. confusing bits_per_pixel with bytes).
 size_t tex_img_size(const Tex* t)
 {
 	// (can't use normal CHECK_TEX due to size_t return value)
 	WARN_ERR(tex_validate(t));
 
 	const int levels_to_skip = (t->flags & TEX_MIPMAPS)? 0 : TEX_BASE_LEVEL_ONLY;
 	const uint data_padding = (t->flags & TEX_DXT)? 4 : 1;
 	size_t out_size = 0;
 	tex_util_foreach_mipmap(t->w, t->h, t->bpp, 0, levels_to_skip,
 		data_padding, add_level_size, &out_size);
 	return out_size;
 }
 
 
-//-----------------------------------------------------------------------------
-
 // return the minimum header size (i.e. offset to pixel data) of the
 // file format indicated by <fn>'s extension (that is all it need contain:
 // e.g. ".bmp"). returns 0 on error (i.e. no codec found).
 // this can be used to optimize calls to tex_write: when allocating the
 // buffer that will hold the image, allocate this much extra and
 // pass the pointer as base+hdr_size. this allows writing the header
 // directly into the output buffer and makes for zero-copy IO.
 size_t tex_hdr_size(const char* fn)
 {
 	const TexCodecVTbl* c;
 	CHECK_ERR(tex_codec_for_filename(fn, &c));
 	return c->hdr_size(0);
 }
 
 
-// write the specified texture to disk.
-// note: <t> cannot be made const because the image may have to be
-// transformed to write it out in the format determined by <fn>'s extension.
-LibError tex_write(Tex* t, const char* fn)
+//-----------------------------------------------------------------------------
+// read/write from memory and disk
+//-----------------------------------------------------------------------------
+
+LibError tex_decode(const u8* data, size_t data_size, MEM_DTOR dtor, Tex* t)
+{
+	const TexCodecVTbl* c;
+	RETURN_ERR(tex_codec_for_header(data, data_size, &c));
+
+	// make sure the entire header is available
+	const size_t min_hdr_size = c->hdr_size(0);
+	if(data_size < min_hdr_size)
+		WARN_RETURN(ERR_INCOMPLETE_HEADER);
+	const size_t hdr_size = c->hdr_size(data);
+	if(data_size < hdr_size)
+		WARN_RETURN(ERR_INCOMPLETE_HEADER);
+
+	// wrap pointer into a Handle; required for Tex.hm.
+	// rationale: a Handle protects the texture memory from being
+	// accidentally free-d.
+	Handle hm = mem_wrap((void*)data, data_size, 0, 0, 0, dtor, 0, (void*)tex_decode);
+
+	t->hm = hm;
+	t->ofs = hdr_size;
+
+	// for orthogonality, encode and decode both receive the memory as a
+	// DynArray. package data into one and free it again after decoding:
+	DynArray da;
+	RETURN_ERR(da_wrap_fixed(&da, (u8*)data, data_size));
+
+	RETURN_ERR(c->decode(&da, t));
+
+	// note: not reached if decode fails. that's not a problem;
+	// this call just zeroes <da> and could be left out.
+	(void)da_free(&da);
+
+	// sanity checks
+	if(!t->w || !t->h || t->bpp > 32)
+		WARN_RETURN(ERR_TEX_FMT_INVALID);
+	// .. note: can't use data_size - decode may have decompressed the image.
+	size_t hm_size;
+	(void)mem_get_ptr(t->hm, &hm_size);
+	if(hm_size < t->ofs + tex_img_size(t))
+		WARN_RETURN(ERR_TEX_INVALID_SIZE);
+
+	flip_to_global_orientation(t);
+
+	return ERR_OK;
+}
+
+
+LibError tex_encode(Tex* t, const char* fn, DynArray* da)
 {
 	CHECK_TEX(t);
 	CHECK_ERR(tex_validate_plain_format(t->bpp, t->flags));
 
 	// we could be clever here and avoid the extra alloc if our current
 	// memory block ensued from the same kind of texture file. this is
 	// most likely the case if in_img == <hm's user pointer> + c->hdr_size(0).
 	// this would make for zero-copy IO.
 
-	DynArray da;
 	const size_t max_out_size = tex_img_size(t)*4 + 256*KiB;
-	RETURN_ERR(da_alloc(&da, max_out_size));
+	RETURN_ERR(da_alloc(da, max_out_size));
 
 	const TexCodecVTbl* c;
 	CHECK_ERR(tex_codec_for_filename(fn, &c));
 
 	// encode into <da>
-	LibError err = c->encode(t, &da);
+	LibError err = c->encode(t, da);
 	if(err < 0)
 	{
-		debug_printf("%s (%s) failed: %d", __func__, c->name, err);
-		debug_warn("failed");
-		goto fail;
+		(void)da_free(da);
+		WARN_RETURN(err);
 	}
 
+	return ERR_OK;
+}
+
+
+
+// MEM_DTOR -> file_buf_free adapter (used for mem_wrap-ping FileIOBuf)
+static void file_buf_dtor(void* p, size_t UNUSED(size), uintptr_t UNUSED(ctx))
+{
+	(void)file_buf_free((FileIOBuf)p);
+}
+
+// load the specified image from file into the given Tex object.
+// currently supports BMP, TGA, JPG, JP2, PNG, DDS.
+LibError tex_load(const char* fn, Tex* t, uint file_flags)
+{
+	// load file
+	FileIOBuf file; size_t file_size;
+	RETURN_ERR(vfs_load(fn, file, file_size, file_flags));
+
+	LibError ret = tex_decode(file, file_size, file_buf_dtor, t);
+	if(ret < 0)
+	{
+		(void)tex_free(t);
+		WARN_RETURN(ret);
+	}
+
+	// do not free hm! it either still holds the image data (i.e. texture
+	// wasn't compressed) or was replaced by a new buffer for the image data.
+
+	CHECK_TEX(t);
+	return ERR_OK;
+}
+
+
+// write the specified texture to disk.
+// note: <t> cannot be made const because the image may have to be
+// transformed to write it out in the format determined by <fn>'s extension.
+LibError tex_write(Tex* t, const char* fn)
+{
+	DynArray da;
+	RETURN_ERR(tex_encode(t, fn, &da));
+
 	// write to disk
+	LibError ret = ERR_OK;
 	{
 	const size_t sector_aligned_size = round_up(da.cur_size, file_sector_size);
 	(void)da_set_size(&da, sector_aligned_size);
 	const ssize_t bytes_written = vfs_store(fn, da.base, da.pos);
-	debug_assert(bytes_written == (ssize_t)da.pos);
+	if(bytes_written > 0)
+		debug_assert(bytes_written == (ssize_t)da.pos);
+	else
+		ret = (LibError)bytes_written;
 	}
 
-fail:
 	(void)da_free(&da);
-	return err;
+	return ret;
 }
Index: ps/trunk/source/lib/res/graphics/ogl_tex.cpp
===================================================================
--- ps/trunk/source/lib/res/graphics/ogl_tex.cpp	(revision 3910)
+++ ps/trunk/source/lib/res/graphics/ogl_tex.cpp	(revision 3911)
@@ -1,975 +1,975 @@
 /**
  * =========================================================================
  * File        : ogl_tex.cpp
  * Project     : 0 A.D.
  * Description : wrapper for all OpenGL texturing calls.
  *             : provides caching, hotloading and lifetime management.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2003-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include "lib.h"
 #include "app_hooks.h"
 #include "sysdep/gfx.h"
 #include "../res.h"
 #include "ogl.h"
 #include "tex.h"
 #include "ogl_tex.h"
 
 
 //----------------------------------------------------------------------------
 // OpenGL helper routines
 //----------------------------------------------------------------------------
 
 static bool filter_valid(GLint filter)
 {
 	switch(filter)
 	{
 	case GL_NEAREST:
 	case GL_LINEAR:
 	case GL_NEAREST_MIPMAP_NEAREST:
 	case GL_LINEAR_MIPMAP_NEAREST:
 	case GL_NEAREST_MIPMAP_LINEAR:
 	case GL_LINEAR_MIPMAP_LINEAR:
 		return true;
 	default:
 		return false;
 	}
 }
 
 
 static bool wrap_valid(GLint wrap)
 {
 	switch(wrap)
 	{
 	case GL_CLAMP:
 	case GL_CLAMP_TO_EDGE:
 	case GL_CLAMP_TO_BORDER:
 	case GL_REPEAT:
 	case GL_MIRRORED_REPEAT:
 		return true;
 	default:
 		return false;
 	}
 }
 
 
 static bool filter_uses_mipmaps(GLint filter)
 {
 	switch(filter)
 	{
 	case GL_NEAREST_MIPMAP_NEAREST:
 	case GL_LINEAR_MIPMAP_NEAREST:
 	case GL_NEAREST_MIPMAP_LINEAR:
 	case GL_LINEAR_MIPMAP_LINEAR:
 		return true;
 	default:
 		return false;
 	}
 }
 
 
 static bool fmt_is_s3tc(GLenum fmt)
 {
 	switch(fmt)
 	{
 	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
 	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
 	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
 	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
 		return true;
 	default:
 		return false;
 	}
 }
 
 
 // determine OpenGL texture format, given <bpp> and Tex <flags>.
 static GLint choose_fmt(uint bpp, uint flags)
 {
 	const bool alpha = (flags & TEX_ALPHA) != 0;
 	const bool bgr   = (flags & TEX_BGR  ) != 0;
 	const bool grey  = (flags & TEX_GREY ) != 0;
 	const uint dxt   = flags & TEX_DXT;
 
 	// S3TC
 	if(dxt != 0)
 	{
 		switch(dxt)
 		{
 		case 1:
 			return alpha? GL_COMPRESSED_RGBA_S3TC_DXT1_EXT : GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
 		case 3:
 			return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
 		case 5:
 			return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
 		default:
 			debug_warn("invalid DXT value");
 			return 0;
 		}
 	}
 
 	// uncompressed
 	switch(bpp)
 	{
 	case 8:
 		debug_assert(grey);
 		return GL_LUMINANCE;
 	case 16:
 		return GL_LUMINANCE_ALPHA;
 	case 24:
 		debug_assert(!alpha);
 		return bgr? GL_BGR : GL_RGB;
 	case 32:
 		debug_assert(alpha);
 		return bgr? GL_BGRA : GL_RGBA;
 	default:
 		debug_warn("invalid bpp");
 		return 0;
 	}
 
 	UNREACHABLE;
 }
 
 
 //----------------------------------------------------------------------------
 // quality mechanism
 //----------------------------------------------------------------------------
 
 static GLint default_filter = GL_LINEAR;	// one of the GL *minify* filters
 static uint default_q_flags = OGL_TEX_FULL_QUALITY;	// OglTexQualityFlags
 
 static bool q_flags_valid(uint q_flags)
 {
 	const uint bits = OGL_TEX_FULL_QUALITY|OGL_TEX_HALF_BPP|OGL_TEX_HALF_RES;
 	// unrecognized bits are set - invalid
 	if((q_flags & ~bits) != 0)
 		return false;
 	// "full quality" but other reduction bits are set - invalid
 	if(q_flags & OGL_TEX_FULL_QUALITY && q_flags & ~OGL_TEX_FULL_QUALITY)
 		return false;
 	return true;
 }
 
 
 // change default settings - these affect performance vs. quality.
 // may be overridden for individual textures via parameter to
 // ogl_tex_upload or ogl_tex_set_filter, respectively.
 // 
 // pass 0 to keep the current setting; defaults and legal values are:
 // - q_flags: OGL_TEX_FULL_QUALITY; combination of OglTexQualityFlags 
 // - filter: GL_LINEAR; any valid OpenGL minification filter
 void ogl_tex_set_defaults(uint q_flags, GLint filter)
 {
 	if(q_flags)
 	{
 		debug_assert(q_flags_valid(q_flags));
 		default_q_flags = q_flags;
 	}
 
 	if(filter)
 	{
 		debug_assert(filter_valid(filter));
 		default_filter = filter;
 	}
 }
 
 
 // choose an internal format for <fmt> based on the given q_flags.
 static GLint choose_int_fmt(GLenum fmt, uint q_flags)
 {
 	// true => 4 bits per component; otherwise, 8
 	const bool half_bpp = (q_flags & OGL_TEX_HALF_BPP) != 0;
 
 	// early-out for S3TC textures: they don't need an internal format
 	// (because upload is via glCompressedTexImage2DARB), but we must avoid
 	// triggering the default case below. we might as well return a
 	// meaningful value (i.e. int_fmt = fmt).
 	if(fmt_is_s3tc(fmt))
 		return fmt;
 
 	switch(fmt)
 	{
 	// 8bpp
 	case GL_LUMINANCE:
 		return half_bpp? GL_LUMINANCE4 : GL_LUMINANCE8;
 	case GL_INTENSITY:
 		return half_bpp? GL_INTENSITY4 : GL_INTENSITY8;
 	case GL_ALPHA:
 		return half_bpp? GL_ALPHA4 : GL_ALPHA8;
 
 	// 16bpp
 	case GL_LUMINANCE_ALPHA:
 		return half_bpp? GL_LUMINANCE4_ALPHA4 : GL_LUMINANCE8_ALPHA8;
 
 	// 24bpp
 	case GL_RGB:
 	case GL_BGR:	// note: BGR can't be used as internal format
 		return half_bpp? GL_RGB4 : GL_RGB8;
 
 	// 32bpp
 	case GL_RGBA:
 	case GL_BGRA:	// note: BGRA can't be used as internal format
 		return half_bpp? GL_RGBA4 : GL_RGBA8;
 
 	default:
 		{
 		wchar_t buf[100];
 		swprintf(buf, ARRAY_SIZE(buf), L"choose_int_fmt: fmt 0x%x isn't covered! please add it", fmt);
 		DISPLAY_ERROR(buf);
 		debug_warn("given fmt isn't covered! please add it.");
 		// fall back to a reasonable default
 		return half_bpp? GL_RGB4 : GL_RGB8;
 		}
 	}
 
 	UNREACHABLE;
 }
 
 
 //----------------------------------------------------------------------------
 // texture state to allow seamless reload
 //----------------------------------------------------------------------------
 
 // see "Texture Parameters" in docs.
 
 // all GL state tied to the texture that must be reapplied after reload.
 // (this mustn't get too big, as it's stored in the already sizeable OglTex)
 struct OglTexState
 {
 	// glTexParameter
 	// note: there are more options, but they do not look to
 	//       be important and will not be applied after a reload!
 	//       in particular, LOD_BIAS isn't needed because that is set for
 	//       the entire texturing unit via glTexEnv.
 	// .. texture filter
 	//    note: this is the minification filter value; magnification filter
 	//          is GL_NEAREST if it's GL_NEAREST, otherwise GL_LINEAR.
 	//          we don't store mag_filter explicitly because it
 	//          doesn't appear useful - either apps can tolerate LINEAR, or
 	//          mipmaps aren't called for and filter could be NEAREST anyway).
 	GLint filter;
 	// .. wrap mode
 	//    note: to simplify things, we assume that apps will never want to
 	//          set S/T modes independently. it that becomes necessary,
 	//          it's easy to add.
 	GLint wrap;
 };
 
 
 // fill the given state object with default values.
 static void state_set_to_defaults(OglTexState* ots)
 {
 	ots->filter = default_filter;
 	ots->wrap = GL_REPEAT;
 }
 
 
 // send all state to OpenGL (actually the currently bound texture).
 // called from ogl_tex_upload.
 static void state_latch(OglTexState* ots)
 {
 	// filter
 	const GLint filter = ots->filter;
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter);
 	const GLint mag_filter = (filter == GL_NEAREST)? GL_NEAREST : GL_LINEAR;
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, mag_filter);
 
 	// wrap
 	const GLint wrap = ots->wrap;
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, wrap);
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, wrap);
 	// .. only CLAMP and REPEAT are guaranteed to be available.
 	//    if we're using one of the others, we squelch the error that
 	//    may have resulted if this GL implementation is old.
 	if(wrap != GL_CLAMP && wrap != GL_REPEAT)
 		oglSquelchError(GL_INVALID_ENUM);
 }
 
 
 //----------------------------------------------------------------------------
 // texture resource object
 //----------------------------------------------------------------------------
 
 // ideally we would split OglTex into data and state objects as in
 // SndData / VSrc. this gives us the benefits of caching while still
 // leaving each "instance" (state object, which owns a data reference)
 // free to change its state. however, unlike in OpenAL, there is no state
 // independent of the data object - all parameters are directly tied to the
 // GL texture object. therefore, splitting them up is impossible.
 // (we shouldn't even keep the texel data in memory since that's already
 // covered by the FS cache).
 //
 // given that multiple "instances" share the state stored here, we conclude:
 // - a refcount is necessary to prevent ogl_tex_upload from freeing
 //   <t> as long as other instances are active.
 // - concurrent use risks cross-talk (if the 2nd "instance" changes state and
 //   the first is reloaded, its state may change to that of the 2nd)
 //
 // as bad as it sounds, the latter issue isn't a problem: we do not expect
 // multiple instances of the same texture where someone changes its filter.
 // even if it is reloaded, the differing state is not critical.
 // the alternative is even worse: disabling *all* caching/reuse would
 // really hurt performance and h_mgr doesn't support only disallowing
 // reuse of active objects (this would break the index lookup code, since
 // multiple instances may then exist).
 
 // note: make sure these values fit inside OglTex.flags (only 16 bits)
 enum OglTexFlags
 {
 	// "the texture is currently uploaded"; reset in dtor.
 	OT_IS_UPLOADED = 1,
 
 	// "the enclosed Tex object is valid and holds a texture";
 	// reset in dtor and after ogl_tex_upload's tex_free.
 	OT_TEX_VALID = 2,
 	//uint tex_valid : 1;
 
 	// "reload() should automatically re-upload the texture" (because
 	// it had been uploaded before the reload); never reset.
 	OT_NEED_AUTO_UPLOAD = 4,
 
 	// (used for validating flags)
 	OT_ALL_FLAGS = OT_IS_UPLOADED|OT_TEX_VALID|OT_NEED_AUTO_UPLOAD
 };
 
 struct OglTex
 {
 	Tex t;
 
 	// allocated by OglTex_reload; indicates the texture is currently uploaded.
 	GLuint id;
 
 	// ogl_tex_upload calls choose_fmt to determine these from <t>.
 	// however, its caller may override those values via parameters.
 	// note: these are stored here to allow retrieving via ogl_tex_get_format;
 	// they are only used within ogl_tex_upload.
 	GLenum fmt;
 	GLint int_fmt;
 
 	OglTexState state;
 
 	// OglTexQualityFlags
 	uint q_flags : 8;
 
 	// to which Texture Mapping Unit was this bound?
 	uint tmu : 8;
 
 	uint flags : 16;
 };
 
 H_TYPE_DEFINE(OglTex);
 
 static void OglTex_init(OglTex* ot, va_list args)
 {
 	Tex* wrapped_tex = va_arg(args, Tex*);
 	if(wrapped_tex)
 	{
 		ot->t = *wrapped_tex;
 		// indicate ot->t is now valid, thus skipping loading from file.
 		// note: ogl_tex_wrap prevents actual reloads from happening.
 		ot->flags |= OT_TEX_VALID;
 	}
 
 	state_set_to_defaults(&ot->state);
 	ot->q_flags = default_q_flags;
 }
 
 static void OglTex_dtor(OglTex* ot)
 {
 	if(ot->flags & OT_TEX_VALID)
 	{
 		(void)tex_free(&ot->t);
 		ot->flags &= ~OT_TEX_VALID;
 	}
 
 	// note: do not check if OT_IS_UPLOADED is set, because we allocate
 	// OglTex.id without necessarily having done an upload.
 	glDeleteTextures(1, &ot->id);
 	ot->id = 0;
 	ot->flags &= ~OT_IS_UPLOADED;
 }
 
 static LibError OglTex_reload(OglTex* ot, const char* fn, Handle h)
 {
 	// we're reusing a freed but still in-memory OglTex object
 	if(ot->flags & OT_IS_UPLOADED)
 		return ERR_OK;
 
 	// if we don't already have the texture in memory (*), load from file.
 	// * this happens if the texture is "wrapped".
 	if(!(ot->flags & OT_TEX_VALID))
 		RETURN_ERR(tex_load(fn, &ot->t, FILE_CACHED_AT_HIGHER_LEVEL));
 	ot->flags |= OT_TEX_VALID;
 
 	glGenTextures(1, &ot->id);
 
 	// if it had already been uploaded before this reload,
 	// re-upload it (this also does state_latch).
 	if(ot->flags & OT_NEED_AUTO_UPLOAD)
 		(void)ogl_tex_upload(h);
 
 	return ERR_OK;
 }
 
 static LibError OglTex_validate(const OglTex* ot)
 {
 	RETURN_ERR(tex_validate(&ot->t));
 
 	// width, height
 	// (note: this is done here because tex.cpp doesn't impose any
 	// restrictions on dimensions, while OpenGL does).
 	GLsizei w = (GLsizei)ot->t.w;
 	GLsizei h = (GLsizei)ot->t.h;
 	// .. == 0; texture file probably not loaded successfully.
 	if(w == 0 || h == 0)
 		WARN_RETURN(ERR_11);
 	// .. greater than max supported tex dimension.
 	//    no-op if oglInit not yet called
 	if(w > (GLsizei)ogl_max_tex_size || h > (GLsizei)ogl_max_tex_size)
 		WARN_RETURN(ERR_12);
 	// .. not power-of-2.
 	//    note: we can't work around this because both NV_texture_rectangle
 	//    and subtexture require work for the client (changing tex coords).
 	//    TODO: ARB_texture_non_power_of_two
 	if(!is_pow2(w) || !is_pow2(h))
 		WARN_RETURN(ERR_13);
 
 	// texture state
 	if(!filter_valid(ot->state.filter))
 		WARN_RETURN(ERR_14);
 	if(!wrap_valid(ot->state.wrap))
 		WARN_RETURN(ERR_15);
 
 	// misc
 	if(!q_flags_valid(ot->q_flags))
 		WARN_RETURN(ERR_16);
 	if(ot->tmu >= 128)	// unexpected that there will ever be this many
 		WARN_RETURN(ERR_17);
 	if(ot->flags > OT_ALL_FLAGS)
 		WARN_RETURN(ERR_18);
 	// .. note: don't check ot->fmt and ot->int_fmt - they aren't set
 	//    until during ogl_tex_upload.
 
 	return ERR_OK;
 }
 
 static LibError OglTex_to_string(const OglTex* ot, char* buf)
 {
 	snprintf(buf, H_STRING_LEN, "id=%d", ot->id);
 	return ERR_OK;
 }
 
 
 // load and return a handle to the texture given in <fn>.
 // for a list of supported formats, see tex.h's tex_load.
 Handle ogl_tex_load(const char* fn, uint flags)
 {
 	Tex* wrapped_tex = 0;	// we're loading from file
 	return h_alloc(H_OglTex, fn, flags, wrapped_tex);
 }
 
 
 // return Handle to an existing object, if it has been loaded and
 // is still in memory; otherwise, a negative error code.
 Handle ogl_tex_find(const char* fn)
 {
 	return h_find(H_OglTex, (uintptr_t)fn);
 }
 
 
 // make the given Tex object ready for use as an OpenGL texture
 // and return a handle to it. this will be as if its contents
 // had been loaded by ogl_tex_load.
 //
 // we need only add bookkeeping information and "wrap" it in
 // a resource object (accessed via Handle), hence the name.
 //
 // <fn> isn't strictly needed but should describe the texture so that
 // h_filename will return a meaningful comment for debug purposes.
 // note: because we cannot guarantee that callers will pass distinct
 // "filenames", caching is disabled for the created object. this avoids
 // mistakenly reusing previous objects that share the same comment.
 Handle ogl_tex_wrap(Tex* t, const char* fn, uint flags)
 {
 	// this object may not be backed by a file ("may", because
 	// someone could do tex_load and then ogl_tex_wrap).
 	// if h_mgr asks for a reload, the dtor will be called but
 	// we won't be able to reconstruct it. therefore, disallow reloads.
 	// (they are improbable anyway since caller is supposed to pass a
 	// 'descriptive comment' instead of filename, but don't rely on that)
 	// also disable caching as explained above.
 	flags |= RES_DISALLOW_RELOAD|RES_NO_CACHE;
 	return h_alloc(H_OglTex, fn, flags, t);
 }
 
 
 // free all resources associated with the texture and make further
 // use of it impossible. (subject to refcount)
 LibError ogl_tex_free(Handle& ht)
 {
 	return h_free(ht, H_OglTex);
 }
 
 
 //----------------------------------------------------------------------------
 // state setters (see "Texture Parameters" in docs)
 //----------------------------------------------------------------------------
 
 // we require the below functions be called before uploading; this avoids
 // potentially redundant glTexParameter calls (we'd otherwise need to always
 // set defaults because we don't know if an override is forthcoming).
 
 // raise a debug warning if the texture has already been uploaded
 // (except in the few cases where this is allowed; see below).
 // this is so that you will notice incorrect usage - only one instance of a
 // texture should be active at a time, because otherwise they vie for
 // control of one shared OglTexState.
 static void warn_if_uploaded(Handle ht, const OglTex* ot)
 {
 #ifndef NDEBUG
 	// we do not require users of this module to remember if they've
 	// already uploaded a texture (inconvenient). since they also can't
 	// tell if the texture was newly loaded (due to h_alloc interface),
 	// we have to squelch this warning in 2 cases:
 	// - it's ogl_tex_loaded several times (i.e. refcount > 1) and the
 	//   caller (typically a higher-level LoadTexture) is setting filter etc.
 	// - caller is using our Handle as a caching mechanism, and calls
 	//   ogl_tex_set_* before every use of the texture. note: this
 	//   need not fall under the above check, e.g. if freed but cached.
 	//   workaround is that ogl_tex_set_* won't call us if the
 	//   same state values are being set (harmless anyway).
 	int refs = h_get_refcnt(ht);
 	if(refs > 1)
 		return;	// don't complain
 
 	if(ot->flags & OT_IS_UPLOADED)
 		debug_warn("ogl_tex_set_*: texture already uploaded and shouldn't be changed");
 #else
 	// (prevent warnings; the alternative of wrapping all call sites in
 	// #ifndef is worse)
 	UNUSED2(ht);
 	UNUSED2(ot);
 #endif
 }
 
 
 // override default filter (as set above) for this texture.
 // must be called before uploading (raises a warning if called afterwards).
 // filter is as defined by OpenGL; it is applied for both minification and
 // magnification (for rationale and details, see OglTexState)
 LibError ogl_tex_set_filter(Handle ht, GLint filter)
 {
 	H_DEREF(ht, OglTex, ot);
 
 	if(!filter_valid(filter))
 		WARN_RETURN(ERR_INVALID_PARAM);
 
 	if(ot->state.filter != filter)
 	{
 		warn_if_uploaded(ht, ot);
 		ot->state.filter = filter;
 	}
 	return ERR_OK;
 }
 
 
 // override default wrap mode (GL_REPEAT) for this texture.
 // must be called before uploading (raises a warning if called afterwards).
 // wrap is as defined by OpenGL and applies to both S and T coordinates
 // (rationale: see OglTexState).
 LibError ogl_tex_set_wrap(Handle ht, GLint wrap)
 {
 	H_DEREF(ht, OglTex, ot);
 
 	if(!wrap_valid(wrap))
 		WARN_RETURN(ERR_INVALID_PARAM);
 
 	if(ot->state.wrap != wrap)
 	{
 		warn_if_uploaded(ht, ot);
 		ot->state.wrap = wrap;
 	}
 	return ERR_OK;
 }
 
 
 //----------------------------------------------------------------------------
 // upload
 //----------------------------------------------------------------------------
 
 // OpenGL has several features that are helpful for uploading but not
 // available in all implementations. we check for their presence but
 // provide for user override (in case they don't work on a card/driver
 // combo we didn't test).
 
 // tristate; -1 is undecided
 static int have_auto_mipmap_gen = -1;
 static int have_s3tc = -1;
 
 // override the default decision and force/disallow use of the
 // given feature. should be called from ah_override_gl_upload_caps.
 void ogl_tex_override(OglTexOverrides what, OglTexAllow allow)
 {
 	debug_assert(allow == OGL_TEX_ENABLE || allow == OGL_TEX_DISABLE);
 	const bool enable = (allow == OGL_TEX_ENABLE);
 
 	switch(what)
 	{
 	case OGL_TEX_S3TC:
 		have_s3tc = enable;
 		break;
 	case OGL_TEX_AUTO_MIPMAP_GEN:
 		have_auto_mipmap_gen = enable;
 		break;
 	default:
 		debug_warn("invalid <what>");
 		break;
 	}
 }
 
 
 // detect caps (via OpenGL extension list) and give an app_hook the chance to
 // override this (e.g. via list of card/driver combos on which S3TC breaks).
 // called once from the first ogl_tex_upload.
 static void detect_gl_upload_caps()
 {
 	// detect features, but only change the variables if they were at
 	// "undecided" (if overrides were set before this, they must remain).
 	if(have_auto_mipmap_gen == -1)
 	{
 		have_auto_mipmap_gen = oglHaveExtension("GL_SGIS_generate_mipmap");
 	}
 	if(have_s3tc == -1)
 	{
 		// note: we don't bother checking for GL_S3_s3tc - it is incompatible
 		// and irrelevant (was never widespread).
 		have_s3tc = oglHaveExtensions(0, "GL_ARB_texture_compression", "GL_EXT_texture_compression_s3tc", 0) == 0;
 	}
 
 	ah_override_gl_upload_caps();
 
 	// warn if more-or-less essential features are missing
 	if(!have_s3tc)
 		DISPLAY_ERROR(L"Performance warning: your graphics card does not support compressed textures. The game will try to continue anyway, but may be slower than expected. Please try updating your graphics drivers; if that doesn't help, please try upgrading your hardware.");
 }
 
 
 // take care of mipmaps. if they are called for by <filter>, either
 // arrange for OpenGL to create them, or see to it that the Tex object
 // contains them (if need be, creating them in software).
 // sets *plevels_to_skip to influence upload behavior (depending on
 // whether mipmaps are needed and the quality settings).
 // returns 0 to indicate success; otherwise, caller must disable
 // mipmapping by switching filter to e.g. GL_LINEAR.
 static LibError get_mipmaps(Tex* t, GLint filter, uint q_flags, int* plevels_to_skip)
 {
 	// decisions:
 	// .. does filter call for uploading mipmaps?
 	const bool need_mipmaps = filter_uses_mipmaps(filter);
 	// .. does the image data include mipmaps? (stored as separate
 	//    images after the regular texels)
 	const bool includes_mipmaps = (t->flags & TEX_MIPMAPS) != 0;
 	// .. is this texture in S3TC format? (more generally, "compressed")
 	const bool is_s3tc = (t->flags & TEX_DXT) != 0;
 
 	*plevels_to_skip = TEX_BASE_LEVEL_ONLY;
 	if(!need_mipmaps)
 		return ERR_OK;
 
 	// image already contains pregenerated mipmaps; we need do nothing.
 	// this is the nicest case, because they are fastest to load
 	// (no extra processing needed) and typically filtered better than
 	// if automatically generated.
 	if(includes_mipmaps)
 		*plevels_to_skip = 0;	// t contains mipmaps
 	// OpenGL supports automatic generation; we need only
 	// activate that and upload the base image.
 	else if(have_auto_mipmap_gen)
 	{
 		// note: we assume GL_GENERATE_MIPMAP and GL_GENERATE_MIPMAP_SGIS
 		// have the same values - it's heavily implied by the spec
 		// governing 'promoted' ARB extensions and just plain makes sense.
 		glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_TRUE);
 	}
 	// image is S3TC-compressed and the previous 2 alternatives weren't
 	// available; we're going to cheat and just disable mipmapping.
 	// rationale: having tex_transform add mipmaps would be slow (since
 	// all<->all transforms aren't implemented, it'd have to decompress
 	// from S3TC first), and DDS images ought to include mipmaps!
 	else if(is_s3tc)
 		return ERR_FAIL;	// NOWARN
 	// image is uncompressed and we're on an old OpenGL implementation;
 	// we will generate mipmaps in software.
 	else
 	{
 		RETURN_ERR(tex_transform_to(t, t->flags|TEX_MIPMAPS));
 		*plevels_to_skip = 0;	// t contains mipmaps
 	}
 
 	// t contains mipmaps; we can apply our resolution reduction trick:
 	if(*plevels_to_skip == 0)
 	{
 		// this saves texture memory by skipping some of the lower
 		// (high-resolution) mip levels.
 		//
 		// note: we don't just use GL_TEXTURE_BASE_LEVEL because it would
 		// require uploading unused levels, which is wasteful.
 		// .. can be expanded to reduce to 1/4, 1/8 by encoding factor in q_flags.
 		const uint reduce = (q_flags & OGL_TEX_HALF_RES)? 2 : 1;
 		*plevels_to_skip = log2(reduce);
 	}
 
 	return ERR_OK;
 }
 
 
 // tex_util_foreach_mipmap callbacks: upload the given level to OpenGL.
 
 struct UploadParams
 {
 	GLenum fmt;
 	GLint int_fmt;
 };
 
 static void upload_level(uint level, uint level_w, uint level_h,
 	const u8* restrict level_data, size_t UNUSED(level_data_size), void* restrict ctx)
 {
 	const UploadParams* up = (const UploadParams*)ctx;
 	glTexImage2D(GL_TEXTURE_2D, level, up->int_fmt, level_w, level_h, 0,
 		up->fmt, GL_UNSIGNED_BYTE, level_data);
 }
 
 static void upload_compressed_level(uint level, uint level_w, uint level_h,
 	const u8* restrict level_data, size_t level_data_size, void* restrict ctx)
 {
 	const UploadParams* up = (const UploadParams*)ctx;
 	pglCompressedTexImage2DARB(GL_TEXTURE_2D, level, up->fmt,
 		(GLsizei)level_w, (GLsizei)level_h, 0, (GLsizei)level_data_size, level_data);
 }
 
 // upload the texture in the specified (internal) format.
 // split out of ogl_tex_upload because it was too big.
 //
 // pre: <t> is valid for OpenGL use; texture is bound.
 static void upload_impl(Tex* t, GLenum fmt, GLint int_fmt, int levels_to_skip)
 {
 	const GLsizei w  = (GLsizei)t->w;
 	const GLsizei h  = (GLsizei)t->h;
 	const uint bpp   = t->bpp;
 	const u8* data = (const u8*)tex_get_data(t);
 	const UploadParams up = { fmt, int_fmt };
 
 	if(t->flags & TEX_DXT)
 		tex_util_foreach_mipmap(w, h, bpp, data, levels_to_skip, 4, upload_compressed_level, (void*)&up);
 	else
 		tex_util_foreach_mipmap(w, h, bpp, data, levels_to_skip, 1, upload_level, (void*)&up);
 }
 
 
 // upload the texture to OpenGL.
 // if not 0, parameters override the following:
 //   fmt_ovr     : OpenGL format (e.g. GL_RGB) decided from bpp / Tex flags;
 //   q_flags_ovr : global default "quality vs. performance" flags;
 //   int_fmt_ovr : internal format (e.g. GL_RGB8) decided from fmt / q_flags.
 //
 // side effects:
 // - enables texturing on TMU 0 and binds the texture to it;
 // - frees the texel data! see ogl_tex_get_data.
 LibError ogl_tex_upload(const Handle ht, GLenum fmt_ovr, uint q_flags_ovr, GLint int_fmt_ovr)
 {
 	ONCE(detect_gl_upload_caps());
 
 	H_DEREF(ht, OglTex, ot);
 	Tex* t = &ot->t;
 	const char* fn = h_filename(ht);
 	if(!fn)
 		fn = "(could not determine filename)";
 	debug_assert(q_flags_valid(q_flags_ovr));
 	// we don't bother verifying *fmt_ovr - there are too many values
 
 	// upload already happened; no work to do.
 	// (this also happens if a cached texture is "loaded")
 	if(ot->flags & OT_IS_UPLOADED)
 		return ERR_OK;
 
 	debug_assert(ot->flags & OT_TEX_VALID);
 
 	// decompress S3TC if that's not supported by OpenGL.
 	if((t->flags & TEX_DXT) && !have_s3tc)
 		(void)tex_transform_to(t, t->flags & ~TEX_DXT);
 
 	// determine fmt and int_fmt, allowing for user override.
 	ot->fmt = choose_fmt(t->bpp, t->flags);
 	if(fmt_ovr) ot->fmt = fmt_ovr;
 	if(q_flags_ovr) ot->q_flags = q_flags_ovr;
 	ot->int_fmt = choose_int_fmt(ot->fmt, ot->q_flags);
 	if(int_fmt_ovr) ot->int_fmt = int_fmt_ovr;
 
 	// now actually send to OpenGL:
 	oglCheck();
 	{
 		// (note: we know ht is valid due to H_DEREF, but ogl_tex_bind can
 		// fail in debug builds if OglTex.id isn't a valid texture name)
 		RETURN_ERR(ogl_tex_bind(ht, ot->tmu));
 		int levels_to_skip;
 		if(get_mipmaps(t, ot->state.filter, ot->q_flags, &levels_to_skip) < 0)
 			// error => disable mipmapping
 			ot->state.filter = GL_LINEAR;
 		// (note: if first time, applies our defaults/previous overrides;
 		// otherwise, replays all state changes)
 		state_latch(&ot->state);
 		upload_impl(t, ot->fmt, ot->int_fmt, levels_to_skip);
 	}
 	oglCheck();
 
 	ot->flags |= OT_NEED_AUTO_UPLOAD|OT_IS_UPLOADED;
 
 	// see rationale for <refs> at declaration of OglTex.
 	// note: tex_free is safe even if this OglTex was wrapped -
 	//       the Tex contains a mem handle.
 	int refs = h_get_refcnt(ht);
 	if(refs == 1)
 	{
 		// note: we verify above that OT_TEX_VALID is set
 		(void)tex_free(t);
 		ot->flags &= ~OT_TEX_VALID;
 	}
 
 	return ERR_OK;
 }
 
 
 //----------------------------------------------------------------------------
 // getters
 //----------------------------------------------------------------------------
 
 // retrieve texture dimensions and bits per pixel.
 // all params are optional and filled if non-NULL.
 LibError ogl_tex_get_size(Handle ht, uint* w, uint* h, uint* bpp)
 {
 	H_DEREF(ht, OglTex, ot);
 
 	if(w)
 		*w = ot->t.w;
 	if(h)
 		*h = ot->t.h;
 	if(bpp)
 		*bpp = ot->t.bpp;
 	return ERR_OK;
 }
 
 
-// retrieve Tex.flags and the corresponding OpenGL format.
+// retrieve TexFlags and the corresponding OpenGL format.
 // the latter is determined during ogl_tex_upload and is 0 before that.
 // all params are optional and filled if non-NULL.
 LibError ogl_tex_get_format(Handle ht, uint* flags, GLenum* fmt)
 {
 	H_DEREF(ht, OglTex, ot);
 
 	if(flags)
 		*flags = ot->t.flags;
 	if(fmt)
 	{
 		if(!(ot->flags & OT_IS_UPLOADED))
 			debug_warn("hasn't been defined yet!");
 		*fmt = ot->fmt;
 	}
 	return ERR_OK;
 }
 
 
 // retrieve pointer to texel data.
 //
 // note: this memory is freed after a successful ogl_tex_upload for
 // this texture. after that, the pointer we retrieve is NULL but
 // the function doesn't fail (negative return value) by design.
 // if you still need to get at the data, add a reference before
 // uploading it or read directly from OpenGL (discouraged).
 LibError ogl_tex_get_data(Handle ht, void** p)
 {
 	H_DEREF(ht, OglTex, ot);
 
 	*p = tex_get_data(&ot->t);
 	return ERR_OK;
 }
 
 
 //----------------------------------------------------------------------------
 // misc API
 //----------------------------------------------------------------------------
 
 // bind the texture to the specified unit [number] in preparation for
 // using it in rendering. if <ht> is 0, texturing is disabled instead.
 //
 // side effects:
 // - changes the active texture unit;
 // - (if return value is 0:) texturing was enabled/disabled on that unit.
 //
 // notes:
 // - assumes multitexturing is available.
 // - not necessary before calling ogl_tex_upload!
 // - on error, the unit's texture state is unchanged; see implementation.
 LibError ogl_tex_bind(Handle ht, uint unit)
 {
 	// note: there are many call sites of glActiveTextureARB, so caching
 	// those and ignoring redundant sets isn't feasible.
 	pglActiveTextureARB(GL_TEXTURE0+unit);
 
 	// special case: disable texturing
 	if(ht == 0)
 	{
 		glDisable(GL_TEXTURE_2D);
 		return ERR_OK;
 	}
 
 	// if this fails, the texture unit's state remains unchanged.
 	// we don't bother catching that and disabling texturing because a
 	// debug warning is raised anyway, and it's quite unlikely.
 	H_DEREF(ht, OglTex, ot);
 	ot->tmu = unit;
 
 	// if 0, there's a problem in the OglTex reload/dtor logic.
 	// binding it results in whiteness, which can have many causes;
 	// we therefore complain so this one can be ruled out.
 	debug_assert(ot->id != 0);
 
 	glEnable(GL_TEXTURE_2D);
 	glBindTexture(GL_TEXTURE_2D, ot->id);
 	return ERR_OK;
 }
 
 
 // apply the specified transforms (as in tex_transform) to the image.
 // must be called before uploading (raises a warning if called afterwards).
 LibError ogl_tex_transform(Handle ht, uint transforms)
 {
 	H_DEREF(ht, OglTex, ot);
 	LibError ret = tex_transform(&ot->t, transforms);
 	return ret;
 }
 
 
 // change the pixel format to that specified by <new_flags>.
 // (note: this is equivalent to ogl_tex_transform(ht, ht_flags^new_flags).
 LibError ogl_tex_transform_to(Handle ht, uint new_flags)
 {
 	H_DEREF(ht, OglTex, ot);
 	LibError ret = tex_transform_to(&ot->t, new_flags);
 	return ret;
 }
Index: ps/trunk/source/lib/res/graphics/tex_internal.h
===================================================================
--- ps/trunk/source/lib/res/graphics/tex_internal.h	(nonexistent)
+++ ps/trunk/source/lib/res/graphics/tex_internal.h	(revision 3911)
@@ -0,0 +1,85 @@
+/**
+ * =========================================================================
+ * File        : tex_internal.h
+ * Project     : 0 A.D.
+ * Description : private texture loader helper functions
+ *
+ * @author Jan.Wassenberg@stud.uni-karlsruhe.de
+ * =========================================================================
+ */
+
+/*
+ * Copyright (c) 2006 Jan Wassenberg
+ *
+ * Redistribution and/or modification are also permitted under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation (version 2 or later, at your option).
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef TEX_INTERNAL_H__
+#define TEX_INTERNAL_H__
+
+#include "../mem.h"	// MEM_DTOR
+#include "lib/allocators.h"	// DynArray
+
+/**
+ * check if the given texture format is acceptable: 8bpp grey,
+ * 24bpp color or 32bpp color+alpha (BGR / upside down are permitted).
+ * basically, this is the "plain" format understood by all codecs and
+ * tex_codec_plain_transform.
+ * @param bpp bits per pixel
+ * @param flags TexFlags
+ * @return LibError
+ **/
+extern LibError tex_validate_plain_format(uint bpp, uint flags);
+
+
+/**
+ * indicate if the two vertical orientations match.
+ *
+ * used by tex_codec.
+ * 
+ * @param src_flags TexFlags, used to extract the orientation.
+ * we ask for this instead of src_orientation so callers don't have to
+ * mask off TEX_ORIENTATION.
+ * @param dst_orientation orientation to compare against.
+ * can be one of TEX_BOTTOM_UP, TEX_TOP_DOWN, or 0 for the
+ * "global orientation".
+ * @return bool
+ **/
+extern bool tex_orientations_match(uint src_flags, uint dst_orientation);
+
+
+/**
+ * decode an in-memory texture file into texture object.
+ *
+ * split out of tex_load to ease resource cleanup and allow
+ * decoding images without needing to write out to disk.
+ *
+ * @param data input data
+ * @param data_size its size [bytes]
+ * @param dtor the function used to release it when the texture object is
+ * freed (can be NULL). note: this is necessary because the Tex object
+ * assumes ownership (necessary due to Tex.hm).
+ * @param t output texture object.
+ * @return LibError.
+ **/
+extern LibError tex_decode(const u8* data, size_t data_size, MEM_DTOR dtor, Tex* t);
+
+/**
+ * encode a texture into a memory buffer in the desired file format.
+ *
+ * @param t input texture object
+ * @param fn filename; only used to determine the desired file format
+ * (via extension)
+ * @param da output memory array. allocated here; caller must free it
+ * when no longer needed. invalid unless function succeeds.
+ * @return LibError
+ **/
+extern LibError tex_encode(Tex* t, const char* fn, DynArray* da);
+
+#endif	// #ifndef TEX_INTERNAL_H__
Index: ps/trunk/source/lib/res/graphics/tex_codec.h
===================================================================
--- ps/trunk/source/lib/res/graphics/tex_codec.h	(revision 3910)
+++ ps/trunk/source/lib/res/graphics/tex_codec.h	(revision 3911)
@@ -1,112 +1,242 @@
 /**
  * =========================================================================
  * File        : tex_codec.cpp
  * Project     : 0 A.D.
  * Description : support routines and interface for texture codecs.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2004 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #ifndef TEX_CODEC_H__
 #define TEX_CODEC_H__
 
 #include "tex.h"
+#include "tex_internal.h"	// for codec's convenience
 #include "lib/allocators.h"
 
-// rationale: no C++ to allow us to store const char* name in vtbl.
-
+/**
+ * virtual method table for TexCodecs.
+ * rationale: this works in C and also allows storing name and next in vtbl.
+ * 'template method'-style interface to increase code reuse and
+ * simplify writing new codecs.
+ **/
 struct TexCodecVTbl
 {
-	// 'template method' to increase code reuse and simplify writing new codecs
-
-	// pointers aren't const, because the textures
-	// may have to be flipped in-place - see "texture orientation".
-	// size is guaranteed to be >= 4.
-	// (usually enough to compare the header's "magic" field;
-	// anyway, no legitimate file will be smaller)
-	LibError (*decode)(DynArray* restrict da, Tex* restrict t);
-
-	// rationale: some codecs cannot calculate the output size beforehand
-	// (e.g. PNG output via libpng); we therefore require each one to
-	// allocate memory itself and return the pointer.
-	//
-	// note: <t> cannot be made const because encoding may require a
-	// tex_transform.
-	LibError (*encode)(Tex* restrict t, DynArray* restrict da);
-
+	/**
+	 * decode the file into a Tex structure.
+	 *
+	 * @param da input data array (not const, because the texture
+	 * may have to be flipped in-place - see "texture orientation").
+	 * its size is guaranteed to be >= 4.
+	 * (usually enough to compare the header's "magic" field;
+	 * anyway, no legitimate file will be smaller)
+	 * @param t output texture object
+	 * @return LibError
+	 **/
+	LibError (*decode)(DynArray * restrict da, Tex * restrict t);
+
+
+	/**
+	 * encode the texture data into the codec's file format (in memory).
+	 *
+	 * @param t input texture object. note: non-const because encoding may
+	 * require a tex_transform.
+	 * @param da output data array, allocated by codec.
+	 * rationale: some codecs cannot calculate the output size beforehand
+	 * (e.g. PNG output via libpng), so the output memory cannot be allocated
+	 * by the caller.
+	 * @return LibError
+	 **/
+	LibError (*encode)(Tex * restrict t, DynArray * restrict da);
+
+	/**
+	 * transform the texture's pixel format.
+	 *
+	 * @param t texture object
+	 * @param transforms: OR-ed combination of TEX_* flags that are to
+	 * be changed. note: the codec needs only handle situations specific
+	 * to its format; generic pixel format transforms are handled by
+	 * the caller.
+	 **/
 	LibError (*transform)(Tex* t, uint transforms);
 
-	// only guaranteed 4 bytes!
-	bool (*is_hdr)(const u8* file);
-
-	// precondition: ext is valid string
-	// ext doesn't include '.'; just compare against e.g. "png"
-	// must compare case-insensitive!
+	/**
+	 * indicate if the data appears to be an instance of this codec's header,
+	 * i.e. can this codec decode it?
+	 *
+	 * @param file input data; only guaranteed to be 4 bytes!
+	 * (this should be enough to examine the header's 'magic' field)
+	 * @return bool
+	 **/
+	bool (*is_hdr)(const u8 * file);
+
+	/**
+	 * is the extension that of a file format supported by this codec?
+	 *
+	 * rationale: cannot just return the extension string and have
+	 * caller compare it (-> smaller code) because a codec's file format
+	 * may have several valid extensions (e.g. jpg and jpeg).
+	 *
+	 * @param ext non-NULL extension string; does not contain '.'.
+	 * must be compared as case-insensitive.
+	 * @return bool
+	 **/
 	bool (*is_ext)(const char* ext);
 
+	/**
+	 * return size of the file header supported by this codec.
+	 *
+	 * @param file the specific header to return length of (taking its
+	 * variable-length fields into account). if NULL, return minimum
+	 * guaranteed header size, i.e. the header without any
+	 * variable-length fields.
+	 * @return size [bytes]
+	 **/
 	size_t (*hdr_size)(const u8* file);
 
+	/**
+	 * name of codec for debug purposes. typically set via TEX_CODEC_REGISTER.
+	 **/
 	const char* name;
 
-	// intrusive linked-list of codecs: more convenient than fixed-size
-	// static storage.
+	/**
+	 * intrusive linked-list of codecs: more convenient than fixed-size
+	 * static storage.
+	 * set by caller; should be initialized to NULL.
+	 **/
 	const TexCodecVTbl* next;
 };
 
 
+/**
+ * build codec vtbl and register it. the codec will be queried for future
+ * texture load requests. call order is undefined, but since each codec
+ * only steps up if it can handle the given format, this is not a problem.
+ *
+ * @param name identifier of codec (not string!). used to bind 'member'
+ * functions prefixed with it to the vtbl, and as the TexCodecVTbl name.
+ * it should also mirror the default file extension (e.g. dds) -
+ * this is relied upon (but verified) in the self-test.
+ *
+ * usage: at file scope within the source file containing the codec's methods.
+ **/
 #define TEX_CODEC_REGISTER(name)\
-	static TexCodecVTbl vtbl = { name##_decode, name##_encode, name##_transform, name##_is_hdr, name##_is_ext, name##_hdr_size, #name};\
+	static TexCodecVTbl vtbl = \
+	{\
+		name##_decode, name##_encode, name##_transform,\
+		name##_is_hdr, name##_is_ext, name##_hdr_size,\
+		#name\
+	};\
 	static int dummy = tex_codec_register(&vtbl);
 
 
-// add this vtbl to the codec list. called at NLSO init time by the
-// TEX_CODEC_REGISTER in each codec file. note that call order and therefore
-// order in the list is undefined, but since each codec only steps up if it
-// can handle the given format, this is not a problem.
-//
-// returns int to alloc calling from a macro at file scope.
+/**
+ * add this vtbl to the codec list. called at NLSO init time by the
+ * TEX_CODEC_REGISTER in each codec file.
+ * order in list is unspecified; see TEX_CODEC_REGISTER.
+ *
+ * @param c pointer to vtbl.
+ * @return int (allows calling from a macro at file scope; value is not used)
+ **/
 extern int tex_codec_register(TexCodecVTbl* c);
 
 
-// find codec that recognizes the desired output file extension,
-// or return ERR_UNKNOWN_FORMAT if unknown.
-// note: does not raise a warning because it is used by
-// tex_is_known_extension.
+/**
+ * find codec that recognizes the desired output file extension.
+ *
+ * @param fn filename; only the extension (that after '.') is used.
+ * case-insensitive.
+ * @param c (out) vtbl of responsible codec
+ * @return LibError; ERR_UNKNOWN_FORMAT (without warning, because this is
+ * called by tex_is_known_extension) if no codec indicates they can
+ * handle the given extension.
+ **/
 extern LibError tex_codec_for_filename(const char* fn, const TexCodecVTbl** c);
 
-// find codec that recognizes the header's magic field
-extern LibError tex_codec_for_header(const u8* file, size_t file_size, const TexCodecVTbl** c);
+/**
+ * find codec that recognizes the header's magic field.
+ *
+ * @param data typically contents of file, but need only include the
+ * (first 4 bytes of) header.
+ * @param data_size [bytes]
+ * @param c (out) vtbl of responsible codec
+ * @return LibError; ERR_UNKNOWN_FORMAT if no codec indicates they can
+ * handle the given format (header).
+ **/
+extern LibError tex_codec_for_header(const u8* data, size_t data_size, const TexCodecVTbl** c);
 
-extern LibError tex_codec_transform(Tex* t, uint transforms);
+/**
+ * enumerate all registered codecs.
+ *
+ * used by self-test to test each one of them in turn.
+ *
+ * @param prev_codec the last codec returned by this function.
+ * pass 0 the first time.
+ * note: this routine is stateless and therefore reentrant.
+ * @return the next codec, or 0 if all have been returned.
+ **/
+extern const TexCodecVTbl* tex_codec_next(const TexCodecVTbl* prev_codec);
 
+/**
+ * transform the texture's pixel format.
+ * tries each codec's transform method once, or until one indicates success.
+ *
+ * @param t texture object
+ * @param transforms: OR-ed combination of TEX_* flags that are to
+ * be changed.
+ * @return LibError
+ **/
+extern LibError tex_codec_transform(Tex* t, uint transforms);
 
-// allocate an array of row pointers that point into the given texture data.
-// <file_orientation> indicates whether the file format is top-down or
-// bottom-up; the row array is inverted if necessary to match global
-// orienatation. (this is more efficient than "transforming" later)
-//
-// used by PNG and JPG codecs; caller must free() rows when done.
-//
-// note: we don't allocate the data param ourselves because this function is
-// needed for encoding, too (where data is already present).
+/**
+ * allocate an array of row pointers that point into the given texture data.
+ * for texture decoders that support output via row pointers (e.g. PNG),
+ * this allows flipping the image vertically (useful when matching bottom-up
+ * textures to a global orientation) directly, which is much more
+ * efficient than transforming later via copying all pixels.
+ *
+ * @param data the texture data into which row pointers will point.
+ * note: we don't allocate it here because this function is
+ * needed for encoding, too (where data is already present).
+ * @param h height [pixels] of texture.
+ * @param pitch size [bytes] of one texture row, i.e. width*bytes_per_pixel.
+ * @param src_flags TexFlags of source texture. used to extract its
+ * orientation.
+ * @param dst_orientation desired orientation of the output data.
+ * can be one of TEX_BOTTOM_UP, TEX_TOP_DOWN, or 0 for the
+ * "global orientation".
+ * depending on src and dst, the row array is flipped if necessary.
+ * @param rows (out) array of row pointers; caller must free() it when done.
+ * @return LibError
+ **/
 typedef const u8* RowPtr;
 typedef RowPtr* RowArray;
 extern LibError tex_codec_alloc_rows(const u8* data, size_t h, size_t pitch,
 	uint src_flags, uint dst_orientation, RowArray& rows);
 
+/**
+ * apply transforms and then copy header and image into output buffer.
+ *
+ * @param t input texture object
+ * @param transforms transformations to be applied to pixel format
+ * @param hdr header data
+ * @param hdr_size [bytes]
+ * @param da output data array (will be expanded as necessary)
+ * @return LibError
+ **/
 extern LibError tex_codec_write(Tex* t, uint transforms, const void* hdr, size_t hdr_size, DynArray* da);
 
-#endif	// #ifndef TEX_CODEC_H__
+#endif	 // #ifndef TEX_CODEC_H__
Index: ps/trunk/source/lib/res/graphics/tex.h
===================================================================
--- ps/trunk/source/lib/res/graphics/tex.h	(revision 3910)
+++ ps/trunk/source/lib/res/graphics/tex.h	(revision 3911)
@@ -1,304 +1,408 @@
 /**
  * =========================================================================
  * File        : tex.h
  * Project     : 0 A.D.
  * Description : read/write 2d texture files; allows conversion between
  *             : pixel formats and automatic orientation correction.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
- * Copyright (c) 2004 Jan Wassenberg
+ * Copyright (c) 2004-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
-/*
+/**
 
 Introduction
 ------------
 
 This module allows reading/writing 2d images in various file formats and
 encapsulates them in Tex objects.
 It supports converting between pixel formats; this is to an extent done
 automatically when reading/writing. Provision is also made for flipping
 all images to a default orientation.
 
 
 Format Conversion
 -----------------
 
 Image file formats have major differences in their native pixel format:
 some store in BGR order, or have rows arranged bottom-up.
 We must balance runtime cost/complexity and convenience for the
 application (not dumping the entire problem on its lap).
 That means rejecting really obscure formats (e.g. right-to-left pixels),
 but converting everything else to uncompressed RGB "plain" format
 except where noted in enum TexFlags (1).
 
 Note: conversion is implemented as a pipeline: e.g. "DDS decompress +
 vertical flip" would be done by decompressing to RGB (DDS codec) and then
 flipping (generic transform). This is in contrast to all<->all
 conversion paths: that would be much more complex, if more efficient.
 
 Since any kind of preprocessing at runtime is undesirable (the absolute
 priority is minimizing load time), prefer file formats that are
 close to the final pixel format.
 
 1) one of the exceptions is S3TC compressed textures. glCompressedTexImage2D
    requires these be passed in their original format; decompressing would be
-   counterproductive. In this and similar cases, Tex.flags indicates such
+   counterproductive. In this and similar cases, TexFlags indicates such
    deviations from the plain format.
 
 
 Default Orientation
 -------------------
 
 After loading, all images (except DDS, because its orientation is
 indeterminate) are automatically converted to the global row
 orientation: top-down or bottom-up, as specified by
 tex_set_global_orientation. If that isn't called, the default is top-down
 to match Photoshop's DDS output (since this is meant to be the
 no-preprocessing-required optimized format).
 Reasons to change it might be to speed up loading bottom-up
 BMP or TGA images, or to match OpenGL's convention for convenience;
 however, be aware of the abovementioned issues with DDS.
 
 Rationale: it is not expected that this will happen at the renderer layer
 (a 'flip all texcoords' flag is too much trouble), so the
 application would have to do the same anyway. By taking care of it here,
 we unburden the app and save time, since some codecs (e.g. PNG) can
 flip for free when loading.
 
 
 Codecs / IO Implementation
 --------------------------
 
 To ease adding support for new formats, they are organized as codecs.
 The interface aims to minimize code duplication, so it's organized
 following the principle of "Template Method" - this module both
 calls into codecs, and provides helper functions that they use.
 
 IO is done via VFS, but the codecs are decoupled from this and
 work with memory buffers. Access to them is endian-safe.
 
 When "writing", the image is put into an expandable memory region.
 This supports external libraries like libpng that do not know the
 output size beforehand, but avoids the need for a buffer between
 library and IO layer. Read and write are zero-copy.
 
-*/
+**/
 
 #ifndef TEX_H__
 #define TEX_H__
 
 #include "../handle.h"
 
-// flags describing the pixel format. these are to be interpreted as
-// deviations from "plain" format, i.e. uncompressed RGB.
+/**
+ * flags describing the pixel format. these are to be interpreted as
+ * deviations from "plain" format, i.e. uncompressed RGB.
+ **/
 enum TexFlags
 {
-	// flags & TEX_DXT is a field indicating compression.
-	// if 0, the texture is uncompressed;
-	// otherwise, it holds the S3TC type: 1,3,5 or DXT1A.
-	// not converted by default - glCompressedTexImage2D receives
-	// the compressed data.
-	TEX_DXT = 0x7,	// mask
-	// we need a special value for DXT1a to avoid having to consider
-	// flags & TEX_ALPHA to determine S3TC type.
-	// the value is arbitrary; do not rely on it!
+	/**
+	 * flags & TEX_DXT is a field indicating compression.
+	 * if 0, the texture is uncompressed;
+	 * otherwise, it holds the S3TC type: 1,3,5 or DXT1A.
+	 * not converted by default - glCompressedTexImage2D receives
+	 * the compressed data.
+	 **/
+	TEX_DXT = 0x7,	 // mask
+
+	/**
+	 * we need a special value for DXT1a to avoid having to consider
+	 * flags & TEX_ALPHA to determine S3TC type.
+	 * the value is arbitrary; do not rely on it!
+	 **/
 	DXT1A = 7,
 
-	// indicates B and R pixel components are exchanged. depending on
-	// flags & TEX_ALPHA or bpp, this means either BGR or BGRA.
-	// not converted by default - it's an acceptable format for OpenGL.
+	/**
+	 * indicates B and R pixel components are exchanged. depending on
+	 * flags & TEX_ALPHA or bpp, this means either BGR or BGRA.
+	 * not converted by default - it's an acceptable format for OpenGL.
+	 **/
 	TEX_BGR = 0x08,
 
-	// indicates the image contains an alpha channel. this is set for
-	// your convenience - there are many formats containing alpha and
-	// divining this information from them is hard.
-	// (conversion is not applicable here)
+	/**
+	 * indicates the image contains an alpha channel. this is set for
+	 * your convenience - there are many formats containing alpha and
+	 * divining this information from them is hard.
+	 * (conversion is not applicable here)
+	 **/
 	TEX_ALPHA = 0x10,
 
-	// indicates the image is 8bpp greyscale. this is required to
-	// differentiate between alpha-only and intensity formats.
-	// not converted by default - it's an acceptable format for OpenGL.
+	/**
+	 * indicates the image is 8bpp greyscale. this is required to
+	 * differentiate between alpha-only and intensity formats.
+	 * not converted by default - it's an acceptable format for OpenGL.
+	 **/
 	TEX_GREY = 0x20,
 
-	// flags & TEX_ORIENTATION is a field indicating orientation,
-	// i.e. in what order the pixel rows are stored.
-	//
-	// tex_load always sets this to the global orientation
-	// (and flips the image accordingly).
-	// texture codecs may in intermediate steps during loading set this
-	// to 0 if they don't know which way around they are (e.g. DDS),
-	// or to whatever their file contains.
+	/**
+	 * flags & TEX_ORIENTATION is a field indicating orientation,
+	 * i.e. in what order the pixel rows are stored.
+	 *
+	 * tex_load always sets this to the global orientation
+	 * (and flips the image accordingly to match).
+	 * texture codecs may in intermediate steps during loading set this
+	 * to 0 if they don't know which way around they are (e.g. DDS),
+	 * or to whatever their file contains.
+	 **/
 	TEX_BOTTOM_UP = 0x40,
 	TEX_TOP_DOWN  = 0x80,
-	TEX_ORIENTATION = TEX_BOTTOM_UP|TEX_TOP_DOWN,	// mask
+	TEX_ORIENTATION = TEX_BOTTOM_UP|TEX_TOP_DOWN,	 /// mask
 
-	// indicates the image data includes mipmaps. they are stored from lowest
-	// to highest (1x1), one after the other.
-	// (conversion is not applicable here)
+	/**
+	 * indicates the image data includes mipmaps. they are stored from lowest
+	 * to highest (1x1), one after the other.
+	 * (conversion is not applicable here)
+	 **/
 	TEX_MIPMAPS = 0x100
 };
 
 
-// stores all data describing an image.
-// we try to minimize size, since this is stored in OglTex resources
-// (which are big and pushing the h_mgr limit).
+/**
+ * stores all data describing an image.
+ * we try to minimize size, since this is stored in OglTex resources
+ * (which are big and pushing the h_mgr limit).
+ **/
 struct Tex
 {
-	// H_Mem handle to image data. note: during the course of transforms
-	// (which may occur when being loaded), this may be replaced with
-	// a Handle to a new buffer (e.g. if decompressing file contents).
+	/**
+	 * H_Mem handle to image data. note: during the course of transforms
+	 * (which may occur when being loaded), this may be replaced with
+	 * a Handle to a new buffer (e.g. if decompressing file contents).
+	 **/
 	Handle hm;
 
-	// offset to image data in file. this is required since
-	// tex_get_data needs to return the pixels, but mem_get_ptr(hm)
-	// returns the actual file buffer. zero-copy load and
-	// write-back to file is also made possible.
+	/**
+	 * offset to image data in file. this is required since
+	 * tex_get_data needs to return the pixels, but mem_get_ptr(hm)
+	 * returns the actual file buffer. zero-copy load and
+	 * write-back to file is also made possible.
+	 **/
 	size_t ofs;
 
 	uint w : 16;
 	uint h : 16;
 	uint bpp : 16;
 
-	// see TexFlags and "Format Conversion" in docs.
+	/// see TexFlags and "Format Conversion" in docs.
 	uint flags : 16;
 };
 
 
-// set the orientation (either TEX_BOTTOM_UP or TEX_TOP_DOWN) to which
-// all loaded images will automatically be converted
-// (excepting file formats that don't specify their orientation, i.e. DDS).
-// see "Default Orientation" in docs.
+/**
+ * is the texture object valid and self-consistent?
+ * @return LibError
+ **/
+extern LibError tex_validate(const Tex* t);
+
+
+/**
+ * set the orientation to which all loaded images will
+ * automatically be converted (excepting file formats that don't specify
+ * their orientation, i.e. DDS). see "Default Orientation" in docs.
+ * @param orientation either TEX_BOTTOM_UP or TEX_TOP_DOWN
+ **/
 extern void tex_set_global_orientation(int orientation);
 
 
 //
 // open/close
 //
 
-// indicate if <filename>'s extension is that of a texture format
-// supported by tex_load. case-insensitive.
-//
-// rationale: tex_load complains if the given file is of an
-// unsupported type. this API allows users to preempt that warning
-// (by checking the filename themselves), and also provides for e.g.
-// enumerating only images in a file picker.
-// an alternative might be a flag to suppress warning about invalid files,
-// but this is open to misuse.
-extern bool tex_is_known_extension(const char* filename);
-
-// load the specified image from file into the given Tex object.
-// currently supports BMP, TGA, JPG, JP2, PNG, DDS.
+/**
+ * load the specified image from file into a Tex object.
+ *
+ * FYI, currently BMP, TGA, JPG, JP2, PNG, DDS are supported - but don't
+ * rely on this (not all codecs may be included).
+ *
+ * @param fn filename
+ * @param t output texture object
+ * @param file_flags additional flags for vfs_load
+ * @return LibError
+ **/
 extern LibError tex_load(const char* fn, Tex* t, uint file_flags = 0);
 
-// store the given image data into a Tex object; this will be as if
-// it had been loaded via tex_load.
-//
-// rationale: support for in-memory images is necessary for
-//   emulation of glCompressedTexImage2D and useful overall.
-//   however, we don't want to  provide an alternate interface for each API;
-//   these would have to be changed whenever fields are added to Tex.
-//   instead, provide one entry point for specifying images.
-// note: since we do not know how <img> was allocated, the caller must do
-//   so (after calling tex_free, which is required regardless of alloc type).
-//
-// we need only add bookkeeping information and "wrap" it in
-// our Tex struct, hence the name.
+/**
+ * store the given image data into a Tex object; this will be as if
+ * it had been loaded via tex_load.
+ *
+ * rationale: support for in-memory images is necessary for
+ *   emulation of glCompressedTexImage2D and useful overall.
+ *   however, we don't want to provide an alternate interface for each API;
+ *   these would have to be changed whenever fields are added to Tex.
+ *   instead, provide one entry point for specifying images.
+ * note: since we do not know how <img> was allocated, the caller must free
+ *   it themselves (after calling tex_free, which is required regardless of
+ *   alloc type).
+ *
+ * we need only add bookkeeping information and "wrap" it in
+ * our Tex struct, hence the name.
+ *
+ * @param w, h pixel dimensions
+ * @param bpp bits per pixel
+ * @param flags TexFlags
+ * @param img texture data. note: size is calculated from other params.
+ * @param t output texture object.
+ * @return LibError
+ **/
 extern LibError tex_wrap(uint w, uint h, uint bpp, uint flags, void* img, Tex* t);
 
-// free all resources associated with the image and make further
-// use of it impossible.
+/**
+ * free all resources associated with the image and make further
+ * use of it impossible.
+ *
+ * @param t texture object (note: not zeroed afterwards; see impl)
+ * @return LibError
+ **/
 extern LibError tex_free(Tex* t);
 
 
 //
 // modify image
 //
 
-// change <t>'s pixel format by flipping the state of all TEX_* flags
-// that are set in transforms.
+/**
+ * change <t>'s pixel format.
+ *
+ * @param transforms TexFlags that are to be flipped.
+ * @return LibError
+ **/
 extern LibError tex_transform(Tex* t, uint transforms);
 
-// change <t>'s pixel format to the new format specified by <new_flags>.
-// (note: this is equivalent to tex_transform(t, t->flags^new_flags).
+/**
+ * change <t>'s pixel format (2nd version)
+ * (note: this is equivalent to tex_transform(t, t->flags^new_flags).
+ *
+ * @param new_flags desired new value of TexFlags.
+ * @return LibError
+ **/
 extern LibError tex_transform_to(Tex* t, uint new_flags);
 
 
 //
 // return image information
 //
 
-// since Tex is a struct, its fields are accessible to callers.
-// this is more for C compatibility than convenience; the following should
-// be used instead of direct access to the corresponding fields because
-// they take care of some dirty work.
+/**
+ * rationale: since Tex is a struct, its fields are accessible to callers.
+ * this is more for C compatibility than convenience; the following should
+ * be used instead of direct access to the corresponding fields because
+ * they take care of some dirty work.
+ **/
 
-// returns a pointer to the image data (pixels), taking into account any
-// header(s) that may come before it. see Tex.hm comment above.
+/**
+ * return a pointer to the image data (pixels), taking into account any
+ * header(s) that may come before it. see Tex.hm comment above.
+ *
+ * @param t input texture object
+ * @return pointer to data returned by mem_get_ptr (holds reference)!
+ **/
 extern u8* tex_get_data(const Tex* t);
 
-// return total byte size of the image pixels. (including mipmaps!)
-// this is preferable to calculating manually because it's
-// less error-prone (e.g. confusing bits_per_pixel with bytes).
+/**
+ * return total byte size of the image pixels. (including mipmaps!)
+ * rationale: this is preferable to calculating manually because it's
+ * less error-prone (e.g. confusing bits_per_pixel with bytes).
+ *
+ * @param t input texture object
+ * @return size [bytes]
+ **/
 extern size_t tex_img_size(const Tex* t);
 
 
-//
-// image writing
-//
-
-// return the minimum header size (i.e. offset to pixel data) of the
-// file format indicated by <fn>'s extension (that is all it need contain:
-// e.g. ".bmp"). returns 0 on error (i.e. no codec found).
-// this can be used to optimize calls to tex_write: when allocating the
-// buffer that will hold the image, allocate this much extra and
-// pass the pointer as base+hdr_size. this allows writing the header
-// directly into the output buffer and makes for zero-copy IO.
-extern size_t tex_hdr_size(const char* fn);
-
-// write the specified texture to disk.
-// note: <t> cannot be made const because the image may have to be
-// transformed to write it out in the format determined by <fn>'s extension.
-extern LibError tex_write(Tex* t, const char* fn);
-
-
-// internal use only:
-extern LibError tex_validate(const Tex* t);
-
-// check if the given texture format is acceptable: 8bpp grey,
-// 24bpp color or 32bpp color+alpha (BGR / upside down are permitted).
-// basically, this is the "plain" format understood by all codecs and
-// tex_codec_plain_transform.
-extern LibError tex_validate_plain_format(uint bpp, uint flags);
-
-
-// indicate if the orientation specified by <src_flags> matches
-// dst_orientation (if the latter is 0, then the global_orientation).
-// (we ask for src_flags instead of src_orientation so callers don't
-// have to mask off TEX_ORIENTATION)
-extern bool tex_orientations_match(uint src_flags, uint dst_orientation);
+/**
+ * special value for levels_to_skip: the callback will only be called
+ * for the base mipmap level (i.e. 100%)
+ **/
+const int TEX_BASE_LEVEL_ONLY = -1;
 
+/**
+ * callback function for each mipmap level.
+ *
+ * @param level number; 0 for base level (i.e. 100%), or the first one
+ * in case some were skipped.
+ * @param level_w, level_h pixel dimensions (powers of 2, never 0)
+ * @param level_data the level's texels
+ * @param level_data_size [bytes]
+ * @param ctx passed through from tex_util_foreach_mipmap.
+ **/
 typedef void (*MipmapCB)(uint level, uint level_w, uint level_h,
 	const u8* level_data, size_t level_data_size, void* ctx);
 
-// special value for levels_to_skip: the callback will only be called
-// for the base mipmap level (i.e. 100%)
-const int TEX_BASE_LEVEL_ONLY = -1;
-
+/**
+ * for a series of mipmaps stored from base to highest, call back for
+ * each level.
+ *
+ * @param w, h pixel dimensions
+ * @param bpp bits per pixel
+ * @param data series of mipmaps
+ * @param levels_to_skip number of levels (counting from base) to skip, or
+ * TEX_BASE_LEVEL_ONLY to only call back for the base image.
+ * rationale: this avoids needing to special case for images with or
+ * without mipmaps.
+ * @param data_padding minimum pixel dimensions of mipmap levels.
+ * this is used in S3TC images, where each level is actually stored in
+ * 4x4 blocks. usually 1 to indicate levels are consecutive.
+ * @param cb MipmapCB to call
+ * @param ctx extra data to pass to cb
+ **/
 extern void tex_util_foreach_mipmap(uint w, uint h, uint bpp, const u8* restrict data,
 	int levels_to_skip, uint data_padding, MipmapCB cb, void* restrict ctx);
 
 
-#endif	// TEX_H__
+//
+// image writing
+//
+
+/**
+ * is the file's extension that of a texture format supported by tex_load?
+ *
+ * rationale: tex_load complains if the given file is of an
+ * unsupported type. this API allows users to preempt that warning
+ * (by checking the filename themselves), and also provides for e.g.
+ * enumerating only images in a file picker.
+ * an alternative might be a flag to suppress warning about invalid files,
+ * but this is open to misuse.
+ *
+ * @param filename only the extension (that after '.') is used. case-insensitive.
+ * @return bool
+ **/
+extern bool tex_is_known_extension(const char* filename);
+
+/**
+ * return the minimum header size (i.e. offset to pixel data) of the
+ * file format corresponding to the filename.
+ *
+ * rationale: this can be used to optimize calls to tex_write: when
+ * allocating the buffer that will hold the image, allocate this much
+ * extra and pass the pointer as base+hdr_size. this allows writing the
+ * header directly into the output buffer and makes for zero-copy IO.
+ *
+ * @param fn filename; only the extension (that after '.') is used.
+ * case-insensitive.
+ * @return size [bytes] or 0 on error (i.e. no codec found).
+ **/
+extern size_t tex_hdr_size(const char* fn);
+
+/**
+ * write the specified texture to disk.
+ *
+ * @param t input texture object. note: cannot be made const because the
+ * image may have to be transformed to write it out in the format
+ * determined by <fn>'s extension.
+ * @return LibError
+ **/
+extern LibError tex_write(Tex* t, const char* fn);
+
+#endif	 // TEX_H__
Index: ps/trunk/source/lib/res/graphics/tex_codec.cpp
===================================================================
--- ps/trunk/source/lib/res/graphics/tex_codec.cpp	(revision 3910)
+++ ps/trunk/source/lib/res/graphics/tex_codec.cpp	(revision 3911)
@@ -1,157 +1,168 @@
 /**
  * =========================================================================
  * File        : tex_codec.cpp
  * Project     : 0 A.D.
  * Description : support routines for texture codecs
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2004 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <string.h>
 #include <stdlib.h>
 
 #include "tex_codec.h"
 #include "tex.h"
 #include "lib/path_util.h"
 
 static const TexCodecVTbl* codecs;
 
 // add this vtbl to the codec list. called at NLSO init time by the
 // TEX_CODEC_REGISTER in each codec file. note that call order and therefore
 // order in the list is undefined, but since each codec only steps up if it
 // can handle the given format, this is not a problem.
 //
 // returns int to alloc calling from a macro at file scope.
 int tex_codec_register(TexCodecVTbl* c)
 {
 	debug_assert(c);
 
 	// insert at front of list.
 	c->next = codecs;
 	codecs = c;
 
 	return 0;	// (assigned to dummy variable)
 }
 
 
 // find codec that recognizes the desired output file extension,
 // or return ERR_UNKNOWN_FORMAT if unknown.
 // note: does not raise a warning because it is used by
 // tex_is_known_extension.
 LibError tex_codec_for_filename(const char* fn, const TexCodecVTbl** c)
 {
 	const char* ext = path_extension(fn);
 	for(*c = codecs; *c; *c = (*c)->next)
 	{
 		// we found it
 		if((*c)->is_ext(ext))
 			return ERR_OK;
 	}
 
 	return ERR_UNKNOWN_FORMAT;	// NOWARN
 }
 
 
 // find codec that recognizes the header's magic field
 LibError tex_codec_for_header(const u8* file, size_t file_size, const TexCodecVTbl** c)
 {
 	// we guarantee at least 4 bytes for is_hdr to look at
 	if(file_size < 4)
 		WARN_RETURN(ERR_INCOMPLETE_HEADER);
 
 	for(*c = codecs; *c; *c = (*c)->next)
 	{
 		// we found it
 		if((*c)->is_hdr(file))
 			return ERR_OK;
 	}
 
 	WARN_RETURN(ERR_UNKNOWN_FORMAT);
 }
 
 
+const TexCodecVTbl* tex_codec_next(const TexCodecVTbl* prev_codec)
+{
+	// first time
+	if(!prev_codec)
+		return codecs;
+	// middle of list: return next (can be 0 to indicate end of list)
+	else
+		return prev_codec->next;
+}
+
+
 LibError tex_codec_transform(Tex* t, uint transforms)
 {
 	LibError ret = INFO_TEX_CODEC_CANNOT_HANDLE;
 
 	// find codec that understands the data, and transform
 	for(const TexCodecVTbl* c = codecs; c; c = c->next)
 	{
 		LibError err = c->transform(t, transforms);
 		// success
 		if(err == ERR_OK)
 			return ERR_OK;
 		// something went wrong
 		else if(err != INFO_TEX_CODEC_CANNOT_HANDLE)
 		{
 			ret = err;
 			debug_warn("codec indicates error");
 		}
 	}
 
 	return ret;
 }
 
 
 //-----------------------------------------------------------------------------
 // helper functions used by codecs
 //-----------------------------------------------------------------------------
 
 // allocate an array of row pointers that point into the given texture data.
 // <file_orientation> indicates whether the file format is top-down or
 // bottom-up; the row array is inverted if necessary to match global
 // orienatation. (this is more efficient than "transforming" later)
 //
 // used by PNG and JPG codecs; caller must free() rows when done.
 //
 // note: we don't allocate the data param ourselves because this function is
 // needed for encoding, too (where data is already present).
 LibError tex_codec_alloc_rows(const u8* data, size_t h, size_t pitch,
 	uint src_flags, uint dst_orientation, RowArray& rows)
 {
 	const bool flip = !tex_orientations_match(src_flags, dst_orientation);
 
 	rows = (RowArray)malloc(h * sizeof(RowPtr));
 	if(!rows)
 		WARN_RETURN(ERR_NO_MEM);
 
 	// determine start position and direction
 	RowPtr pos        = flip? data+pitch*(h-1) : data;
 	const ssize_t add = flip? -(ssize_t)pitch : (ssize_t)pitch;
 	const RowPtr end  = flip? data-pitch : data+pitch*h;
 
 	for(size_t i = 0; i < h; i++)
 	{
 		rows[i] = pos;
 		pos += add;
 	}
 
 	debug_assert(pos == end);
 	return ERR_OK;
 }
 
 
 LibError tex_codec_write(Tex* t, uint transforms, const void* hdr, size_t hdr_size, DynArray* da)
 {
 	RETURN_ERR(tex_transform(t, transforms));
 
 	void* img_data = tex_get_data(t); const size_t img_size = tex_img_size(t);
 	RETURN_ERR(da_append(da, hdr, hdr_size));
 	RETURN_ERR(da_append(da, img_data, img_size));
 	return ERR_OK;
 }
Index: ps/trunk/source/lib/res/sound/snd_mgr.h
===================================================================
--- ps/trunk/source/lib/res/sound/snd_mgr.h	(revision 3910)
+++ ps/trunk/source/lib/res/sound/snd_mgr.h	(revision 3911)
@@ -1,274 +1,351 @@
 /**
  * =========================================================================
  * File        : snd_mgr.h
  * Project     : 0 A.D.
  * Description : OpenAL sound engine. handles sound I/O, buffer
  *             : suballocation and voice management/prioritization.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2004-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #ifndef SND_MGR_H__
 #define SND_MGR_H__
 
 #include "../handle.h"
 
-/*
-
-[KEEP IN SYNC WITH WIKI]
+/**
 
 overview
 --------
 
 this module provides a moderately high-level sound interface. basic usage
 is opening a sound and requesting it be played; it is closed automatically
 when playback has finished (fire and forget).
 any number of sound play requests may be issued; the most 'important' ones
 are actually played (necessary due to limited hardware mixing capacity).
 3d positional sounds (heard to emanate from a given spot) are supported.
 active sound instances are referenced by Handles, so changing volume etc.
 during playback is possible (useful for fadeout).
 
 
 sound setup
 -----------
 
 OpenAL provides portable access to the underlying sound hardware, and
 falls back to software mixing if no acceleration is provided.
 we allow the user to specify the device to use (in case the default
 has problems) and maximum number of sources (to reduce mixing cost).
 
 
 performance
 -----------
 much effort has been invested in efficiency: all sound data is cached,
 so every open() after the first is effectively free. large sound files are
 streamed from disk to reduce load time and memory usage. hardware mixing
 resources are suballocated to avoid delays when starting to play.
 therefore, the user can confidently fire off hundreds of sound requests.
 finally, lengthy initialization steps are delayed until the sound engine
 is actually needed (i.e. upon first open()). perceived startup time is
 therefore reduced - the user sees e.g. our main menu earlier.
 
 
 terminology
 -----------
 
 "hardware voice" refers to mixing resources on the DSP. strictly
   speaking, we mean 'OpenAL source', but this term is more clear.
   voice ~= source, unless expensive effects (e.g. EAX) are enabled.
   note: software mixing usually doesn't have a fixed 'source' cap.
 "gain" is quantified volume. 1 is unattenuated, 0.5 corresponds to -6 dB,
   and 0 is silence. this can be set per-source as well as globally.
 "position" of a sound is within the app's coordinate system,
   the orientation of which is passed to snd_update.
 "importance" of a sound derives from the app-assigned priority
   (e.g. voiceover must not be skipped in favor of seagulls) and
   distance from the listener. it's calculated by our prioritizer.
 "virtual source" denotes a sound play request issued by the app.
   this is in contrast to an actual AL source, which will be mixed
   into the output channel. the most important VSrc receive an al_src.
 "sound instances" store playback parameters (e.g. position), and
   reference the (centrally cached) "sound data" that will be played.
 
-*/
+**/
 
 
 //
 // device enumeration
 //
 
-// prepare to enumerate all device names (this resets the list returned by
-// snd_dev_next). return 0 on success, otherwise -1 (only if the requisite
-// OpenAL extension isn't available). on failure, a "cannot enum device"
-// message should be presented to the user, and snd_dev_set need not be
-// called; OpenAL will use its default device.
-// may be called each time the device list is needed.
+/**
+ * prepare to enumerate all device names (this resets the list returned by
+ * snd_dev_next).
+ * may be called each time the device list is needed.
+ *
+ * @return LibError; fails iff the requisite OpenAL extension isn't available.
+ * in that case, a "cannot enum device" message should be displayed, but
+ * snd_dev_set need not be called; OpenAL will use its default device.
+ **/
 extern LibError snd_dev_prepare_enum();
 
-// return the next device name, or 0 if all have been returned.
-// do not call unless snd_dev_prepare_enum succeeded!
-// not thread-safe! (static data from snd_dev_prepare_enum is used)
+/**
+ * get next device name in list.
+ *
+ * do not call unless snd_dev_prepare_enum succeeded!
+ * not thread-safe! (static data from snd_dev_prepare_enum is used)
+ *
+ * @return device name string, or 0 if all have been returned.
+ **/
 extern const char* snd_dev_next();
 
 
 //
 // sound system setup
 //
 
-// tell OpenAL to use the specified device in future.
-// name = 0 reverts to OpenAL's default choice, which will also
-// be used if this routine is never called.
-//
-// the device name is typically taken from a config file at init-time;
-// the snd_dev* enumeration routines below are used to present a list
-// of choices to the user in the options screen.
-//
-// if OpenAL hasn't yet been initialized (i.e. no sounds have been opened),
-//   this just stores the device name for use when init does occur.
-//   note: we can't check now if it's invalid (if so, init will fail).
-// otherwise, we shut OpenAL down (thereby stopping all sounds) and
-// re-initialize with the new device. that's fairly time-consuming,
-// so preferably call this routine before sounds are loaded.
-//
-// return 0 on success, or the status returned by OpenAL re-init.
+/**
+ * tell OpenAL to use the specified device in future.
+ *
+ * @param alc_new_dev_name device name string. if 0, revert to
+ * OpenAL's default choice, which will also be used if
+ * this routine is never called.
+ * the device name is typically taken from a config file at init-time;
+ * the snd_dev* enumeration routines above are used to present a list
+ * of choices to the user in the options screen.
+ *
+ * if OpenAL hasn't yet been initialized (i.e. no sounds have been opened),
+ *   this just stores the device name for use when init does occur.
+ *   note: we can't check now if it's invalid (if so, init will fail).
+ * otherwise, we shut OpenAL down (thereby stopping all sounds) and
+ * re-initialize with the new device. that's fairly time-consuming,
+ * so preferably call this routine before sounds are loaded.
+ *
+ * @return LibError (the status returned by OpenAL re-init)
+ **/
 extern LibError snd_dev_set(const char* alc_new_dev_name);
 
-// set maximum number of voices to play simultaneously,
-// to reduce mixing cost on low-end systems.
-// return 0 on success, or 1 if limit was ignored
-// (e.g. if higher than an implementation-defined limit anyway).
+/**
+ * set maximum number of voices to play simultaneously;
+ * this can be used to reduce mixing cost on low-end systems.
+ *
+ * @param cap maximum number of voices. ignored if higher than
+ * an implementation-defined limit anyway.
+ * @return LibError
+ **/
 extern LibError snd_set_max_voices(uint cap);
 
-// set amplitude modifier, which is effectively applied to all sounds.
-// must be non-negative; 1 -> unattenuated, 0.5 -> -6 dB, 0 -> silence.
+/**
+ * set amplitude modifier, which is effectively applied to all sounds.
+ * this is akin to a global "volume" control.
+ *
+ * @param gain amplitude modifier. must be non-negative;
+ * 1 -> unattenuated, 0.5 -> -6 dB, 0 -> silence.
+ * @return LibError
+ **/
 extern LibError snd_set_master_gain(float gain);
 
 
 //
 // sound instance
 //
 
-// open and return a handle to a sound instance.
-//
-// if <snd_fn> is a text file (extension "txt"), it is assumed
-// to be a definition file containing the sound file name and
-// its gain (0.0 .. 1.0).
-// otherwise, <snd_fn> is taken to be the sound file name and
-// gain is set to the default of 1.0 (no attenuation).
-//
-// is_stream (default false) forces the sound to be opened as a stream:
-// opening is faster, it won't be kept in memory, but only one instance
-// can be open at a time.
+/**
+ * open and return a handle to a sound instance.
+ * this loads the sound data and makes it ready for other snd_* APIs.
+ *
+ * @param snd_fn input filename. if a text file (extension "txt"), it is
+ *   assumed to be a definition file containing the sound file name and
+ *   its gain (0.0 .. 1.0).
+ * otherwise, it is taken to be the sound file name and
+ *   gain is set to the default of 1.0 (no attenuation).
+ *
+ * @param is_stream (default false) forces the sound to be opened as a
+ * stream: opening is faster, it won't be kept in memory, but
+ * only one instance can be open at a time.
+ * @return Handle or LibError
+ **/
 extern Handle snd_open(const char* snd_fn, bool stream = false);
 
-// close the sound <hs> and set hs to 0. if it was playing,
-// it will be stopped. sounds are closed automatically when done
-// playing; this is provided for completeness only.
+/**
+ * close the sound instance. if it was playing, it will be stopped.
+ *
+ * rationale: sounds are already closed automatically when done playing;
+ * this API is provided for completeness only.
+ *
+ * @param hs Handle to sound instance. zeroed afterwards.
+ * @return LibError
+ **/
 extern LibError snd_free(Handle& hs);
 
-// request the sound <hs> be played. once done playing, the sound is
-// automatically closed (allows fire-and-forget play code).
-// if no hardware voice is available, this sound may not be played at all,
-// or in the case of looped sounds, start later.
-// priority (min 0 .. max 1, default 0) indicates which sounds are
-// considered more important; this is attenuated by distance to the
-// listener (see snd_update).
+/**
+ * start playing the sound.
+ *
+ * Notes:
+ * <UL>
+ *   <LI> once done playing, the sound is automatically closed (allows
+ *        fire-and-forget play code).
+ *   <LI> if no hardware voice is available, this sound may not be
+ *        played at all, or in the case of looped sounds, start later.
+ * </UL>
+ *
+ * @param priority (min 0 .. max 1, default 0) indicates which sounds are
+ * considered more important (i.e. will override others when no hardware
+ * voices are available). the static priority is attenuated by
+ * distance to the listener; see snd_update.
+ *
+ * @return LibError
+ **/
 extern LibError snd_play(Handle hs, float priority = 0.0f);
 
-// change 3d position of the sound source.
-// if relative (default false), (x,y,z) is treated as relative to the
-// listener; otherwise, it is the position in world coordinates.
-//
-// may be called at any time; fails with invalid handle return if
-// the sound has already been closed (e.g. it never played).
+/**
+ * change 3d position of the sound source.
+ *
+ * may be called at any time; fails with invalid handle return if
+ * the sound has already been closed (e.g. it never played).
+ *
+ * @param relative treat (x,y,z) as relative to the listener;
+ * if false (the default), it is the position in world coordinates.
+ * @return LibError
+ **/
 extern LibError snd_set_pos(Handle hs, float x, float y, float z, bool relative = false);
 
-// change gain (amplitude modifier) of the sound source.
-// must be non-negative; 1 -> unattenuated, 0.5 -> -6 dB, 0 -> silence.
-//
-// should not be called during a fade (see note in implementation);
-// fails with invalid handle return if the sound has already been
-// closed (e.g. it never played).
+/**
+ * change gain (amplitude modifier) of the sound source.
+ *
+ * should not be called during a fade (see note in implementation);
+ * fails with invalid handle return if the sound has already been
+ * closed (e.g. it never played).
+ *
+ * @param gain amplitude modifier. must be non-negative;
+ * 1 -> unattenuated, 0.5 -> -6 dB, 0 -> silence.
+ * @return LibError
+ **/
 extern LibError snd_set_gain(Handle hs, float gain);
 
-// change pitch shift of the sound source.
-// 1.0 means no change; each reduction by 50% equals a pitch shift of
-// -12 semitones (one octave). zero is invalid.
-//
-// may be called at any time; fails with invalid handle return if
-// the sound has already been closed (e.g. it never played).
+/**
+ * change pitch shift of the sound source.
+ *
+ * may be called at any time; fails with invalid handle return if
+ * the sound has already been closed (e.g. it never played).
+ *
+ * @param pitch 1.0 means no change; each reduction by 50% equals a
+ * pitch shift of -12 semitones (one octave). zero is invalid.
+ * @return LibError
+ **/
 extern LibError snd_set_pitch(Handle hs, float pitch);
 
-// enable/disable looping on the sound source.
-// used to implement variable-length sounds (e.g. while building).
-//
-// may be called at any time; fails with invalid handle return if
-// the sound has already been closed (e.g. it never played).
-//
-// notes:
-// - looping sounds are not discarded if they cannot be played for lack of
-//   a hardware voice at the moment play was requested.
-// - once looping is again disabled and the sound has reached its end,
-//   the sound instance is freed automatically (as if never looped).
+/**
+ * enable/disable looping on the sound source.
+ * used to implement variable-length sounds (e.g. while building).
+ *
+ * may be called at any time; fails with invalid handle return if
+ * the sound has already been closed (e.g. it never played).
+ *
+ * Notes:
+ * <UL>
+ *   <LI> looping sounds are not discarded if they cannot be played for
+ *        lack of a hardware voice at the moment play was requested.
+ *   <LI> once looping is again disabled and the sound has reached its end,
+ *        the sound instance is freed automatically (as if never looped).
+ * </UL>
+ * @return LibError
+ **/
 extern LibError snd_set_loop(Handle hs, bool loop);
 
-
+/// types of fade in/out operations
 enum FadeType
 {
-	FT_NONE,
-	FT_LINEAR,
-	FT_EXPONENTIAL,
-	FT_S_CURVE,
+	FT_NONE,		/// currently no fade in progres
+	FT_LINEAR,		/// f(t) = t
+	FT_EXPONENTIAL,	/// f(t) = t**3
+	FT_S_CURVE,		/// cosine curve
 
-	FT_ABORT
+	FT_ABORT		/// abort and mark pending fade as complete
 };
 
-// fade the sound source in or out over time.
-// its gain starts at <initial_gain> (immediately) and is moved toward
-// <final_gain> over <length> seconds. <type> determines the fade curve:
-// linear, exponential or S-curve. for guidance on which to use, see
-// http://www.transom.org/tools/editing_mixing/200309.stupidfadetricks.html
-// you can also pass FT_ABORT to stop fading (if in progress) and
-// set gain to the current <final_gain> parameter.
-// special cases:
-// - if <initial_gain> < 0 (an otherwise illegal value), the sound's
-//   current gain is used as the start value (useful for fading out).
-// - if <final_gain> is 0, the sound is freed when the fade completes or
-//   is aborted, thus allowing fire-and-forget fadeouts. no cases are
-//   foreseen where this is undesirable, and it is easier to implement
-//   than an extra set-free-after-fade-flag function.
-//
-// may be called at any time; fails with invalid handle return if
-// the sound has already been closed (e.g. it never played).
-//
-// note that this function doesn't busy-wait until the fade is complete;
-// any number of fades may be active at a time (allows cross-fading).
-// each snd_update calculates a new gain value for all pending fades.
-// it is safe to start another fade on the same sound source while
-// one is already in progress; the old one will be discarded.
+/**
+ * fade the sound source in or out over time.
+ *
+ * may be called at any time; fails with invalid handle return if
+ * the sound has already been closed (e.g. it never played).
+ *
+ * gain starts at <initial_gain> (immediately) and is moved toward
+ * <final_gain> over <length> seconds.
+ * @param type of fade curve: linear, exponential or S-curve.
+ * for guidance on which to use, see
+ * http://www.transom.org/tools/editing_mixing/200309.stupidfadetricks.html
+ * you can also pass FT_ABORT to stop fading (if in progress) and
+ * set gain to the current <final_gain> parameter.
+ * special cases:
+ * - if <initial_gain> < 0 (an otherwise illegal value), the sound's
+ *   current gain is used as the start value (useful for fading out).
+ * - if <final_gain> is 0, the sound is freed when the fade completes or
+ *   is aborted, thus allowing fire-and-forget fadeouts. no cases are
+ *   foreseen where this is undesirable, and it is easier to implement
+ *   than an extra set-free-after-fade-flag function.
+ *
+ * note that this function doesn't busy-wait until the fade is complete;
+ * any number of fades may be active at a time (allows cross-fading).
+ * each snd_update calculates a new gain value for all pending fades.
+ * it is safe to start another fade on the same sound source while
+ * one is already in progress; the old one will be discarded.
+ * @return LibError
+ **/
 extern LibError snd_fade(Handle hvs, float initial_gain, float final_gain,
 	float length, FadeType type);
 
 
 //
 // sound engine
 //
 
-// (temporarily) disable all sound output. because it causes future snd_open
-// calls to immediately abort before they demand-initialize OpenAL,
-// startup is sped up considerably (500..1000ms). therefore, this must be
-// called before the first snd_open to have any effect; otherwise, the
-// cat will already be out of the bag and we debug_warn of it.
-//
-// rationale: this is a quick'n dirty way of speeding up startup during
-// development without having to change the game's sound code.
-//
-// can later be called to reactivate sound; all settings ever changed
-// will be applied and subsequent sound load / play requests will work.
+/**
+ * (temporarily) disable all sound output.
+ *
+ * because it causes future snd_open calls to immediately abort before they
+ * demand-initialize OpenAL, startup is sped up considerably (500..1000ms).
+ * therefore, this must be called before the first snd_open to have
+ * any effect; otherwise, the cat will already be out of the bag and
+ * we debug_warn of it.
+ *
+ * rationale: this is a quick'n dirty way of speeding up startup during
+ * development without having to change the game's sound code.
+ *
+ * can later be called to reactivate sound; all settings ever changed
+ * will be applied and subsequent sound load / play requests will work.
+ * @return LibError
+ **/
 extern LibError snd_disable(bool disabled);
 
-// perform housekeeping (e.g. streaming); call once a frame.
-//
-// additionally, if any parameter is non-NULL, we set the listener
-// position, look direction, and up vector (in world coordinates).
+/**
+ * perform housekeeping (e.g. streaming); call once a frame.
+ *
+ * all parameters are expressed in world coordinates. they can all be NULL
+ * to avoid updating the listener data; this is useful when the game world
+ * has not been initialized yet.
+ * @param pos listener's position
+ * @param dir listener view direction
+ * @param up listener's local up vector
+ * @return LibError
+ **/
 extern LibError snd_update(const float* pos, const float* dir, const float* up);
 
-// free all resources and shut down the sound system.
-// call before h_mgr_shutdown.
+/**
+ * free all resources and shut down the sound system.
+ * call before h_mgr_shutdown.
+ **/
 extern void snd_shutdown();
 
 #endif	// #ifndef SND_MGR_H__
Index: ps/trunk/source/lib/lib.h
===================================================================
--- ps/trunk/source/lib/lib.h	(revision 3910)
+++ ps/trunk/source/lib/lib.h	(revision 3911)
@@ -1,385 +1,554 @@
 /**
  * =========================================================================
  * File        : lib.h
  * Project     : 0 A.D.
  * Description : various utility functions.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2003-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
-/*
-
-[KEEP IN SYNC WITH WIKI]
+/**
 
 low-level aka "lib"
 -------------------
 
 this codebase was grown from modules shared between several projects,
 i.e. my personal library; hence the name "lib". it has been expanded to
 fit the needs of 0ad - in particular, resource loading.
 
 owing to the dual-use situation, the 0ad coding conventions are not met;
 also, major changes are ill-advised because they may break other projects.
 
 
 design goals
 ------------
 
 - fast and low-overhead, including startup time
 - portable: must run on Win32, Mac OS X and Linux
 - reusable across projects, i.e. no dependency on a
   central 'manager' that ties modules together.
 
 
 scope
 -----
 
 - POSIX definitions
 - resource management
 - debugging tools (including memory tracker)
 - low-level helper functions, e.g. ADTs, endian conversion and timing
 - platform-dependent system/feature detection
 
-*/
+**/
 
 #ifndef LIB_H__
 #define LIB_H__
 
 #include <stddef.h>
 #include <math.h>	// fabsf
 
 
 #include "config.h"
 #include "lib/types.h"
 #include "sysdep/sysdep.h"
 #include "sysdep/cpu.h"	// CAS
+//#include "sysdep/sysdep.h"	// moved down; see below.
 
 
 #if defined(__cplusplus)
-#define EXTERN_C extern "C"
+# define EXTERN_C extern "C"
 #else
-#define EXTERN_C extern
+# define EXTERN_C extern
 #endif
 
-// package code into a single statement.
-// notes:
-// - for(;;) { break; } and {} don't work because invocations of macros
-//   implemented with STMT often end with ";", thus breaking if() expressions.
-// - we'd really like to eliminate "conditional expression is constant"
-//   warnings. replacing 0 literals with extern volatile variables fools
-//   VC7 but isn't guaranteed to be free of overhead. we will just
-//   squelch the warning (unfortunately non-portable).
+
+const size_t KiB = 1ul << 10;
+const size_t MiB = 1ul << 20;
+const size_t GiB = 1ul << 30;
+
+
+//-----------------------------------------------------------------------------
+// code-generating macros
+//-----------------------------------------------------------------------------
+
+/**
+ * package code into a single statement.
+ *
+ * @param STMT_code__ code to be bundled. (must be interpretable as
+ * a macro argument, i.e. sequence of tokens).
+ * the argument name is chosen to avoid conflicts.
+ *
+ * notes:
+ * - for(;;) { break; } and {} don't work because invocations of macros
+ *   implemented with STMT often end with ";", thus breaking if() expressions.
+ * - we'd really like to eliminate "conditional expression is constant"
+ *   warnings. replacing 0 literals with extern volatile variables fools
+ *   VC7 but isn't guaranteed to be free of overhead. we will just
+ *   squelch the warning (unfortunately non-portable).
+ **/
 #define STMT(STMT_code__) do { STMT_code__; } while(false)
 
 // must come after definition of STMT
 #include "lib/lib_errors.h"
 
-// execute the code passed as a parameter only the first time this is
-// reached.
-// may be called at any time (in particular before main), but is not
-// thread-safe. if that's important, use pthread_once() instead.
+/**
+ * execute the code passed as a parameter only the first time this is
+ * reached.
+ * may be called at any time (in particular before main), but is not
+ * thread-safe. if that's important, use pthread_once() instead.
+ **/
 #define ONCE(ONCE_code__)\
 STMT(\
 	static bool ONCE_done__ = false;\
 	if(!ONCE_done__)\
 	{\
 		ONCE_done__ = true;\
 		ONCE_code__;\
 	}\
 )
 
-// execute the code passed as a parameter except the first time this is
-// reached.
-// may be called at any time (in particular before main), but is not
-// thread-safe.
+/**
+ * execute the code passed as a parameter except the first time this is
+ * reached.
+ * may be called at any time (in particular before main), but is not
+ * thread-safe.
+ **/
 #define ONCE_NOT(ONCE_code__)\
 STMT(\
 	static bool ONCE_done__ = false;\
 	if(!ONCE_done__)\
 		ONCE_done__ = true;\
 	else\
 		ONCE_code__;\
 )
 
 
-// useful because VC6 may return 0 on failure, instead of throwing.
-// this wraps the exception handling, and creates a NULL pointer on failure.
+/**
+ * C++ new wrapper: allocates an instance of the given type and stores a
+ * pointer to it. sets pointer to 0 on allocation failure.
+ *
+ * this simplifies application code when on VC6, which may or
+ * may not throw/return 0 on failure.
+ **/
 #define SAFE_NEW(type, ptr)\
 	type* ptr;\
 	try\
 	{\
 		ptr = new type();\
 	}\
 	catch(std::bad_alloc)\
 	{\
 		ptr = 0;\
 	}
 
+/**
+ * delete memory ensuing from new and set the pointer to zero
+ * (thus making double-frees safe / a no-op)
+ **/
 #define SAFE_DELETE(p)\
 STMT(\
 	delete (p);	/* if p == 0, delete is a no-op */ \
 	(p) = 0;\
 )
 
+/**
+ * delete memory ensuing from new[] and set the pointer to zero
+ * (thus making double-frees safe / a no-op)
+ **/
+#define SAFE_ARRAY_DELETE(p)\
+STMT(\
+	delete[] (p);	/* if p == 0, delete is a no-op */ \
+	(p) = 0;\
+)
 
+/**
+ * free memory ensuing from malloc and set the pointer to zero
+ * (thus making double-frees safe / a no-op)
+ **/
 #define SAFE_FREE(p)\
 STMT(\
 	free(p);	/* if p == 0, free is a no-op */ \
 	(p) = 0;\
 )
 
 
+//-----------------------------------------------------------------------------
+// source code annotation
+//-----------------------------------------------------------------------------
 
-#ifndef MIN
-#define MIN(a, b) (((a) < (b))? (a) : (b))
-#endif
-
-#ifndef MAX
-#define MAX(a, b) (((a) > (b))? (a) : (b))
-#endif
-
-
-// 2 ways of avoiding "unreferenced formal parameter" warnings:
-// .. inside the function body, e.g. void f(int x) { UNUSED2(x); }
+/**
+ * mark a function local variable or parameter as unused and avoid
+ * the corresponding compiler warning.
+ * use inside the function body, e.g. void f(int x) { UNUSED2(x); }
+ **/
 #define UNUSED2(param) (void)param;
-// .. wrapped around the parameter name, e.g. void f(int UNUSED(x))
+
+/**
+ * mark a function parameter as unused and avoid
+ * the corresponding compiler warning.
+ * wrap around the parameter name, e.g. void f(int UNUSED(x))
+ **/
 #define UNUSED(param)
 
-// mark the copy constructor as inaccessible. this squelches
-// "cannot be generated" warnings for classes with const members.
-//
-// intended to be used at end of class definition.
-// must be followed by semicolon.
+/**
+ * mark the copy constructor as inaccessible. this squelches
+ * "cannot be generated" warnings for classes with const members.
+ *
+ * intended to be used at end of class definition.
+ * must be followed by semicolon.
+ **/
 #define NO_COPY_CTOR(class_name)\
 	private:\
 	class_name& operator=(const class_name&)
 
 
-/*
+/**
 "unreachable code" helpers
 
 unreachable lines of code are often the source or symptom of subtle bugs.
 they are flagged by compiler warnings; however, the opposite problem -
 erroneously reaching certain spots (e.g. due to missing return statement)
 is worse and not detected automatically.
 
 to defend against this, the programmer can annotate their code to
 indicate to humans that a particular spot should never be reached.
 however, that isn't much help; better is a sentinel that raises an
 error if if it is actually reached. hence, the UNREACHABLE macro.
 
 ironically, if the code guarded by UNREACHABLE works as it should,
 compilers may flag the macro's code as unreachable. this would
 distract from genuine warnings, which is unacceptable.
 
 even worse, compilers differ in their code checking: GCC only complains if
 non-void functions end without returning a value (i.e. missing return
 statement), while VC checks if lines are unreachable (e.g. if they are
 preceded by a return on all paths).
 
 our implementation of UNREACHABLE solves this dilemna as follows:
 - on GCC: call abort(); since it has the noreturn attributes, the
   "non-void" warning disappears.
 - on VC: avoid generating any code. we allow the compiler to assume the
   spot is actually unreachable, which incidentally helps optimization.
   if reached after all, a crash usually results. in that case, compile with
   CONFIG_PARANOIA, which will cause an error message to be displayed.
 
 this approach still allows for the possiblity of automated
 checking, but does not cause any compiler warnings.
-*/
+**/
+#define UNREACHABLE	// actually defined below.. this is for
+# undef UNREACHABLE	// CppDoc's benefit only.
 
 // 1) final build: optimize assuming this location cannot be reached.
 //    may crash if that turns out to be untrue, but removes checking overhead.
 #if CONFIG_FINAL
 # define UNREACHABLE SYS_UNREACHABLE
 // 2) normal build:
 #else
 //    a) normal implementation: includes "abort", which is declared with
 //       noreturn attribute and therefore avoids GCC's "execution reaches
 //       end of non-void function" warning.
 # if !MSC_VERSION || CONFIG_PARANOIA
 #  define UNREACHABLE\
 	STMT(\
 		debug_warn("hit supposedly unreachable code");\
 		abort();\
 	)
 //    b) VC only: don't generate any code; squelch the warning and optimize.
 # else
 #  define UNREACHABLE SYS_UNREACHABLE
 # endif
 #endif
 
-/*
+/**
 convenient specialization of UNREACHABLE for switch statements whose
 default can never be reached. example usage:
 int x;
 switch(x % 2)
 {
 	case 0: break;
 	case 1: break;
 	NODEFAULT;
 }
-*/
+**/
 #define NODEFAULT default: UNREACHABLE
 
-
-#define ARRAY_SIZE(name) (sizeof(name) / sizeof(name[0]))
-
-
-//
-// compile-time debug_assert, especially useful for testing sizeof().
-// no runtime overhead; may be used anywhere, including file scope.
-//
+//-----------------------------------------------------------------------------
+// cassert
 
 // generate a symbol containing the line number of the macro invocation.
 // used to give a unique name (per file) to types made by cassert.
 // we can't prepend __FILE__ to make it globally unique - the filename
 // may be enclosed in quotes. need the 2 macro expansions to make sure
 // __LINE__ is expanded correctly.
 #define MAKE_UID2__(l) LINE_ ## l
 #define MAKE_UID1__(l) MAKE_UID2__(l)
 #define UID__ MAKE_UID1__(__LINE__)
 
-// more descriptive error message, but may cause a struct redefinition
-// warning if used from the same line in different files.
+/**
+ * compile-time debug_assert. causes a compile error if the expression
+ * evaluates to zero/false.
+ *
+ * no runtime overhead; may be used anywhere, including file scope.
+ * especially useful for testing sizeof types.
+ *
+ * this version has a more descriptive error message, but may cause a
+ * struct redefinition warning if used from the same line in different files.
+ *
+ * note: alternative method in C++: specialize a struct only for true;
+ * using it will raise 'incomplete type' errors if instantiated with false.
+ *
+ * @param expression that is expected to evaluate to non-zero at compile-time.
+ **/
 #define cassert(expr) struct UID__ { int CASSERT_FAILURE: (expr); }
 
-// less helpful error message, but redefinition doesn't trigger warnings.
+/**
+ * compile-time debug_assert. causes a compile error if the expression
+ * evaluates to zero/false.
+ *
+ * no runtime overhead; may be used anywhere, including file scope.
+ * especially useful for testing sizeof types.
+ *
+ * this version has a less helpful error message, but redefinition doesn't
+ * trigger warnings.
+ *
+ * @param expression that is expected to evaluate to non-zero at compile-time.
+ **/
 #define cassert2(expr) extern char CASSERT_FAILURE[1][(expr)]
 
-// note: alternative method in C++: specialize a struct only for true;
-// using it will raise 'incomplete type' errors if instantiated with false.
-
-
-
-
-const size_t KiB = 1ul << 10;
-const size_t MiB = 1ul << 20;
-const size_t GiB = 1ul << 30;
-
-
 
+//-----------------------------------------------------------------------------
+// bit bashing
+//-----------------------------------------------------------------------------
 
+/**
+ * value of bit number <n>.
+ *
+ * @param n bit index (0..CHAR_BIT*sizeof(int)-1)
+ **/
 #define BIT(n) (1ul << (n))
 
 
 // these are declared in the header and inlined to aid compiler optimizations
 // (they can easily end up being time-critical).
+// note: GCC can't inline extern functions, while VC's "Whole Program
+// Optimization" can.
 
+/**
+ * a mask that includes the lowest N bits
+ *
+ * @param num_bits number of bits in mask
+ **/
 inline uint bit_mask(uint num_bits)
 {
 	return (1u << num_bits)-1;
 }
 
+/**
+ * extract the value of bits hi_idx:lo_idx within num
+ *
+ * example: bits(0x69, 2, 5) == 0x0A
+ *
+ * @param num number whose bits are to be extracted
+ * @param lo_idx bit index of lowest  bit to include
+ * @param hi_idx bit index of highest bit to include
+ * @return value of extracted bits.
+ **/
 inline uint bits(uint num, uint lo_idx, uint hi_idx)
 {
 	const uint count = (hi_idx - lo_idx)+1;	// # bits to return
 	uint result = num >> lo_idx;
 	result &= bit_mask(count);
 	return result;
 }
 
+/// is the given number a power of two?
+extern bool is_pow2(uint n);
 
-// FNV1-A hash - good for strings.
-// if len = 0 (default), treat buf as a C-string;
-// otherwise, hash <len> bytes of buf.
-extern u32 fnv_hash(const void* buf, size_t len = 0);
-extern u64 fnv_hash64(const void* buf, size_t len = 0);
+/**
+ * @return -1 if not an integral power of 2,
+ * otherwise the base2 logarithm.
+ **/
+extern int ilog2(uint n);
 
-// special version for strings: first converts to lowercase
-// (useful for comparing mixed-case filenames)
-extern u32 fnv_lc_hash(const char* str, size_t len = 0);
+/**
+ * @return log base 2, rounded up.
+ **/
+extern uint log2(uint x);
 
-// hash (currently FNV) of a filename
-typedef u32 FnHash;
+/**
+ * another implementation; uses the FPU normalization hardware.
+ *
+ * @return log base 2, rounded up.
+ **/
+extern int ilog2(const float x);
 
+/**
+ * round up to nearest power of two; no change if already POT.
+ **/
+extern uint round_up_to_pow2(uint x);
 
-extern u16 addusw(u16 x, u16 y);
-extern u16 subusw(u16 x, u16 y);
 
-// zero-extend <size> (truncated to 8) bytes of little-endian data to u64,
-// starting at address <p> (need not be aligned).
-extern u64 movzx_64le(const u8* p, size_t size);
+//-----------------------------------------------------------------------------
+// misc arithmetic
 
-// sign-extend <size> (truncated to 8) bytes of little-endian data to i64,
-// starting at address <p> (need not be aligned).
-extern i64 movsx_64le(const u8* p, size_t size);
+/// canonical minimum macro
+#ifndef MIN
+#define MIN(a, b) (((a) < (b))? (a) : (b))
+#endif
 
+/// canonical maximum macro
+#ifndef MAX
+#define MAX(a, b) (((a) > (b))? (a) : (b))
+#endif
 
-extern bool is_pow2(uint n);
+/// number of array elements
+#define ARRAY_SIZE(name) (sizeof(name) / sizeof(name[0]))
 
-// return -1 if not an integral power of 2,
-// otherwise the base2 logarithm
-extern int ilog2(uint n);
+/**
+ * round number up/down to the next given multiple.
+ *
+ * @param multiple: must be a power of two.
+ **/
+extern uintptr_t round_up  (uintptr_t n, uintptr_t multiple);
+extern uintptr_t round_down(uintptr_t n, uintptr_t multiple);
 
-// return log base 2, rounded up.
-extern uint log2(uint x);
+/// 16-bit saturating (does not overflow) addition.
+extern u16 addusw(u16 x, u16 y);
+/// 16-bit saturating (does not underflow) subtraction.
+extern u16 subusw(u16 x, u16 y);
 
-extern uint round_up_to_pow2(uint x);
+/**
+ * are the given floats nearly "equal"?
+ *
+ * @return whether the numbers are within "epsilon" of each other.
+ *
+ * notes:
+ * - the epsilon magic number varies with the magnitude of the inputs.
+ *   we use a sane default, but don't use this routine for very
+ *   large/small comparands.
+ * - floating-point numbers don't magically lose precision. addition,
+ *   subtraction and multiplication results are precise up to the mantissa's
+ *   least-significant bit. only division, sqrt, sin/cos and other
+ *   trancendental operations introduce error.
+ **/
+inline bool feq(float f1, float f2)
+{
+	const float epsilon = 0.00001f;
+	return fabsf(f1 - f2) < epsilon;
+}
 
 
-// multiple must be a power of two.
-extern uintptr_t round_up  (uintptr_t n, uintptr_t multiple);
-extern uintptr_t round_down(uintptr_t n, uintptr_t multiple);
+/**
+* return random integer in [min, max).
+* avoids several common pitfalls; see discussion at
+* http://www.azillionmonkeys.com/qed/random.html
+**/
+extern uint rand(uint min_inclusive, uint max_exclusive);
 
-// these avoid a common mistake in using >> (ANSI requires shift count be
-// less than the bit width of the type).
-extern u32 u64_hi(u64 x);
-extern u32 u64_lo(u64 x);
-extern u16 u32_hi(u32 x);
-extern u16 u32_lo(u32 x);
 
+//-----------------------------------------------------------------------------
+// type conversion
 
-extern u64 u64_from_u32(u32 hi, u32 lo);
-extern u32 u32_from_u16(u16 hi, u16 lo);
 
+// note: these avoid a common mistake in using >> (ANSI requires
+// shift count be less than the bit width of the type).
 
+extern u32 u64_hi(u64 x);	/// return upper 32-bits
+extern u32 u64_lo(u64 x);	/// return lower 32-bits
+extern u16 u32_hi(u32 x);	/// return upper 16-bits
+extern u16 u32_lo(u32 x);	/// return lower 16-bits
 
+extern u64 u64_from_u32(u32 hi, u32 lo);	/// assemble u64 from u32
+extern u32 u32_from_u16(u16 hi, u16 lo);	/// assemble u32 from u16
 
-inline bool feq(float f1, float f2)
-{
-	// the requisite value will change with the magnitude of f1 and f2!
-	// this is a sane default, but don't use this routine for very
-	// large/small comparands.
-	const float epsilon = 0.00001f;
-	return fabsf(f1 - f2) < epsilon;
-}
+/**
+ * zero-extend <size> (truncated to 8) bytes of little-endian data to u64,
+ * starting at address <p> (need not be aligned).
+ **/
+extern u64 movzx_64le(const u8* p, size_t size);
 
+/**
+ * sign-extend <size> (truncated to 8) bytes of little-endian data to i64,
+ * starting at address <p> (need not be aligned).
+ **/
+extern i64 movsx_64le(const u8* p, size_t size);
 
+/// convert double to u8; verifies number is in range.
+extern u8  fp_to_u8 (double in);
+/// convert double to u16; verifies number is in range.
 extern u16 fp_to_u16(double in);
 
 
-// big endian!
-extern void base32(const int len, const u8* in, u8* out);
+//-----------------------------------------------------------------------------
+// string processing
+
+/**
+ * this is strcpy, but indicates that the programmer checked usage and
+ * promises it is safe.
+ **/
+#define SAFE_STRCPY strcpy
+
+
+/**
+ * generate the base32 textual representation of a buffer.
+ *
+ * @param len size [bytes] of input
+ * @param big-endian input data (assumed to be integral number of bytes)
+ * @param output string; zero-terminated. must be big enough
+ * (i.e. at least ceil(len*CHAR_BIT/5) + 1 chars)
+ **/
+extern void base32(const size_t len, const u8* in, u8* out);
 
 
-// case-insensitive check if string <s> matches the pattern <w>,
-// which may contain '?' or '*' wildcards. if so, return 1, otherwise 0.
-// note: NULL wildcard pattern matches everything!
+/**
+ * partial regex implementation: see if string matches pattern.
+ *
+ * @param s input string
+ * @param w pseudo-regex to match against. case-insensitive;
+ * may contain '?' and/or '*' wildcards. if NULL, matches everything.
+ *
+ * @return 1 if they match, otherwise 0.
+ *
+ * algorithmfrom http://www.codeproject.com/string/wildcmp.asp.
+ **/
 extern int match_wildcard(const char* s, const char* w);
+/// unicode version of match_wildcard.
 extern int match_wildcardw(const wchar_t* s, const wchar_t* w);
 
-// this is strcpy, but indicates that the programmer checked usage and
-// promises it is safe.
-#define SAFE_STRCPY strcpy
 
-// return random integer in [min, max).
-// avoids several common pitfalls; see discussion at
-// http://www.azillionmonkeys.com/qed/random.html
-extern uint rand(uint min_inclusive, uint max_exclusive);
+/**
+ * calculate FNV1-A hash.
+ *
+ * @param buf input buffer.
+ * @param len if 0 (default), treat buf as a C-string; otherwise,
+ * indicates how many bytes of buffer to hash.
+ * @return hash result. note: results are distinct for buffers containing
+ * differing amounts of zero bytes because the hash value is seeded.
+ *
+ * rationale: this algorithm was chosen because it delivers 'good' results
+ * for string data and is relatively simple. other good alternatives exist;
+ * see Ozan Yigit's hash roundup.
+ **/
+extern u32 fnv_hash(const void* buf, size_t len = 0);
+/// 64-bit version of fnv_hash.
+extern u64 fnv_hash64(const void* buf, size_t len = 0);
+
+/**
+ * special version of fnv_hash for strings: first converts to lowercase
+ * (useful for comparing mixed-case filenames)
+ **/
+extern u32 fnv_lc_hash(const char* str, size_t len = 0);
 
 #endif	// #ifndef LIB_H__
Index: ps/trunk/source/lib/debug.cpp
===================================================================
--- ps/trunk/source/lib/debug.cpp	(revision 3910)
+++ ps/trunk/source/lib/debug.cpp	(revision 3911)
@@ -1,662 +1,660 @@
 /**
  * =========================================================================
  * File        : debug.cpp
  * Project     : 0 A.D.
  * Description : platform-independent debug support code.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include <stdarg.h>
 #include <string.h>
 
 #include "lib.h"
 #include "posix.h"
 // some functions here are called from within mmgr; disable its hooks
 // so that our allocations don't cause infinite recursion.
 #include "nommgr.h"
 #include "self_test.h"
 #include "app_hooks.h"
 #include "lib/path_util.h"
 #include "debug_stl.h"
 #include "debug.h"
 
 
 // needed when writing crashlog
 static const size_t LOG_CHARS = 16384;
 wchar_t debug_log[LOG_CHARS];
 wchar_t* debug_log_pos = debug_log;
 
 // write to memory buffer (fast)
 void debug_wprintf_mem(const wchar_t* fmt, ...)
 {
 	const ssize_t chars_left = (ssize_t)LOG_CHARS - (debug_log_pos-debug_log);
 	debug_assert(chars_left >= 0);
 
 	// potentially not enough room for the new string; throw away the
 	// older half of the log. we still protect against overflow below.
 	if(chars_left < 512)
 	{
 		const size_t copy_size = sizeof(wchar_t) * LOG_CHARS/2;
 		wchar_t* const middle = &debug_log[LOG_CHARS/2];
 		memcpy2(debug_log, middle, copy_size);
 		memset(middle, 0, copy_size);
 		debug_log_pos -= LOG_CHARS/2;	// don't assign middle (may leave gap)
 	}
 
 	// write into buffer (in-place)
 	va_list args;
 	va_start(args, fmt);
 	int len = vswprintf(debug_log_pos, chars_left-2, fmt, args);
 	va_end(args);
 	if(len < 0)
 	{
 		debug_warn("vswprintf failed");
 		return;
 	}
 	debug_log_pos += len+2;
 	wcscpy(debug_log_pos-2, L"\r\n");	// safe
 }
 
 
 
 // need to shoehorn printf-style variable params into
 // the OutputDebugString call.
 // - don't want to split into multiple calls - would add newlines to output.
 // - fixing Win32 _vsnprintf to return # characters that would be written,
 //   as required by C99, looks difficult and unnecessary. if any other code
 //   needs that, implement GNU vasprintf.
 // - fixed size buffers aren't nice, but much simpler than vasprintf-style
 //   allocate+expand_until_it_fits. these calls are for quick debug output,
 //   not loads of data, anyway.
 
 // max # characters (including \0) output by debug_(w)printf in one call.
 static const int MAX_CHARS = 512;
 
 
 // rationale: static data instead of std::set to allow setting at any time.
 // we store FNV hash of tag strings for fast comparison; collisions are
 // extremely unlikely and can only result in displaying more/less text.
 static const uint MAX_TAGS = 20;
 static u32 tags[MAX_TAGS];
 static uint num_tags;
 
 void debug_filter_add(const char* tag)
 {
 	const u32 hash = fnv_hash(tag);
 
 	// make sure it isn't already in the list
 	for(uint i = 0; i < MAX_TAGS; i++)
 		if(tags[i] == hash)
 			return;
 
 	// too many already?
 	if(num_tags == MAX_TAGS)
 	{
 		debug_warn("increase MAX_TAGS");
 		return;
 	}
 
 	tags[num_tags++] = hash;
 }
 
 void debug_filter_remove(const char* tag)
 {
 	const u32 hash = fnv_hash(tag);
 
 	for(uint i = 0; i < MAX_TAGS; i++)
 		// found it
 		if(tags[i] == hash)
 		{
 			// replace with last element (avoid holes)
 			tags[i] = tags[MAX_TAGS-1];
 			num_tags--;
 
 			// can only happen once, so we're done.
 			return;
 		}
 }
 
 void debug_filter_clear()
 {
 	for(uint i = 0; i < MAX_TAGS; i++)
 		tags[i] = 0;
 }
 
 static bool filter_allows(const char* text)
 {
 	uint i;
 	for(i = 0; ; i++)
 	{
 		// no | found => no tag => should always be displayed
 		if(text[i] == ' ' || text[i] == '\0')
 			return true;
 		if(text[i] == '|' && i != 0)
 			break;
 	}
 
 	const u32 hash = fnv_hash(text, i);
 
 	// check if entry allowing this tag is found
 	for(i = 0; i < MAX_TAGS; i++)
 		if(tags[i] == hash)
 			return true;
 
 	return false;
 }
 
 
 void debug_printf(const char* fmt, ...)
 {
 	char buf[MAX_CHARS]; buf[ARRAY_SIZE(buf)-1] = '\0';
 
 	va_list ap;
 	va_start(ap, fmt);
 	vsnprintf(buf, MAX_CHARS-1, fmt, ap);
 	va_end(ap);
 
 	if(filter_allows(buf))
 		debug_puts(buf);
 }
 
 void debug_wprintf(const wchar_t* fmt, ...)
 {
 	wchar_t wcs_buf[MAX_CHARS]; wcs_buf[ARRAY_SIZE(wcs_buf)-1] = '\0';
 
 	va_list ap;
 	va_start(ap, fmt);
 	vswprintf(wcs_buf, MAX_CHARS-1, fmt, ap);
 	va_end(ap);
 
 	// convert wchar_t to UTF-8.
 	//
 	// rationale: according to fwide(3) and assorted manpage, FILEs are in
 	// single character or in wide character mode. When a FILE is in
 	// single character mode, wide character writes will fail, and no
 	// conversion is done automatically. Thus the manual conversion.
 	//
 	// it's done here (instead of in OS-specific debug_putws) because
 	// filter_allow requires the conversion also.
 	//
 	// jw: MSDN wcstombs dox say 2 bytes per wchar is enough.
 	// not sure about this; to be on the safe side, we check for overflow.
 	const size_t MAX_BYTES = MAX_CHARS*2;
 	char mbs_buf[MAX_BYTES]; mbs_buf[MAX_BYTES-1] = '\0';
 	size_t bytes_written = wcstombs(mbs_buf, wcs_buf, MAX_BYTES);
 	// .. error
 	if(bytes_written == (size_t)-1)
 		debug_warn("invalid wcs character encountered");
 	// .. exact fit, make sure it's 0-terminated
 	if(bytes_written == MAX_BYTES)
 		mbs_buf[MAX_BYTES-1] = '\0';
 	// .. paranoia: overflow is impossible
 	debug_assert(bytes_written <= MAX_BYTES);
 
 	if(filter_allows(mbs_buf))
 		debug_puts(mbs_buf);
 }
 
 
 //-----------------------------------------------------------------------------
 
 LibError debug_write_crashlog(const wchar_t* text)
 {
 	// note: we go through some gyrations here (strcpy+strcat) to avoid
 	// dependency on file code (path_append).
 	char N_path[PATH_MAX];
 	strcpy_s(N_path, ARRAY_SIZE(N_path), ah_get_log_dir());
 	strcat_s(N_path, ARRAY_SIZE(N_path), "crashlog.txt");
 	FILE* f = fopen(N_path, "w");
 	if(!f)
 		WARN_RETURN(ERR_FILE_ACCESS);
 
 	fputwc(0xfeff, f);	// BOM
 	fwprintf(f, L"%ls\n", text);
 	fwprintf(f, L"\n\n====================================\n\n");
 
 	// allow user to bundle whatever information they want
 	ah_bundle_logs(f);
 
 	fwprintf(f, L"Last known activity:\n\n %ls\n", debug_log);
 
 	fclose(f);
 	return ERR_OK;
 }
 
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // storage for and construction of strings describing a symbol
 //
 //////////////////////////////////////////////////////////////////////////////
 
 // tightly pack strings within one large buffer. we never need to free them,
 // since the program structure / addresses can never change.
 static const size_t STRING_BUF_SIZE = 64*KiB;
 static char* string_buf;
 static char* string_buf_pos;
 
 static const char* symbol_string_build(void* symbol, const char* name, const char* file, int line)
 {
 	// maximum bytes allowed per string (arbitrary).
 	// needed to prevent possible overflows.
 	const size_t STRING_MAX = 1000;
 
 	if(!string_buf)
 	{
 		string_buf = (char*)malloc(STRING_BUF_SIZE);
 		if(!string_buf)
 		{
 			WARN_ERR(ERR_NO_MEM);
 			return 0;
 		}
 		string_buf_pos = string_buf;
 	}
 
 	// make sure there's enough space for a new string
 	char* string = string_buf_pos;
 	if(string + STRING_MAX >= string_buf + STRING_BUF_SIZE)
 	{
 		WARN_ERR(ERR_LIMIT);
 		return 0;
 	}
 
 	// user didn't know name/file/line. attempt to resolve from debug info.
 	char name_buf[DBG_SYMBOL_LEN];
 	char file_buf[DBG_FILE_LEN];
 	if(!name || !file || !line)
 	{
 		int line_buf;
 		(void)debug_resolve_symbol(symbol, name_buf, file_buf, &line_buf);
 
 		// only override the original parameters if value is meaningful;
 		// otherwise, stick with what we got, even if 0.
 		// (obviates test of return value; correctly handles partial failure).
 		if(name_buf[0])
 			name = name_buf;
 		if(file_buf[0])
 			file = file_buf;
 		if(line_buf)
 			line = line_buf;
 	}
 
 	// file and line are available: write them
 	int len;
 	if(file && line)
 	{
 		// strip path from filename (long and irrelevant)
 		const char* fn_only = path_name_only(file);
 
 		len = snprintf(string, STRING_MAX-1, "%s:%05d ", fn_only, line);
 	}
 	// only address is known
 	else
 		len = snprintf(string, STRING_MAX-1, "%p ", symbol);
 
 	// append symbol name
 	if(name)
 	{
 		snprintf(string+len, STRING_MAX-1-len, "%s", name);
 		stl_simplify_name(string+len);
 	}
 
 	return string;
 }
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // cache, mapping symbol address to its description string.
 //
 //////////////////////////////////////////////////////////////////////////////
 
 // note: we don't want to allocate a new string for every symbol -
 // that would waste lots of memory. instead, when a new address is first
 // encountered, allocate a string describing it, and store for later use.
 
 // hash table entry; valid iff symbol != 0. the string pointer must remain
 // valid until the cache is shut down.
 struct Symbol
 {
 	void* symbol;
 	const char* string;
 };
 
 static const uint MAX_SYMBOLS = 2048;
 static Symbol* symbols;
 static uint total_symbols;
 
 
 static uint hash_jumps;
 
 // strip off lower 2 bits, since it's unlikely that 2 symbols are
 // within 4 bytes of one another.
 static uint hash(void* symbol)
 {
 	const uintptr_t address = (uintptr_t)symbol;
 	return (uint)( (address >> 2) % MAX_SYMBOLS );
 }
 
 
 // algorithm: hash lookup with linear probing.
 static const char* symbol_string_from_cache(void* symbol)
 {
 	// hash table not initialized yet, nothing to find
 	if(!symbols)
 		return 0;
 
 	uint idx = hash(symbol);
 	for(;;)
 	{
 		Symbol* c = &symbols[idx];
 
 		// not in table
 		if(!c->symbol)
 			return 0;
 		// found
 		if(c->symbol == symbol)
 			return c->string;
 
 		idx = (idx+1) % MAX_SYMBOLS;
 	}
 }
 
 
 // associate <string> (must remain valid) with <symbol>, for
 // later calls to symbol_string_from_cache.
 static void symbol_string_add_to_cache(const char* string, void* symbol)
 {
 	if(!symbols)
 	{
 		// note: must be zeroed to set each Symbol to "invalid"
 		symbols = (Symbol*)calloc(MAX_SYMBOLS, sizeof(Symbol));
 		if(!symbols)
 			debug_warn("failed to allocate symbols");
 	}
 
 	// hash table is completely full (guard against infinite loop below).
 	// if this happens, the string won't be cached - nothing serious.
 	if(total_symbols >= MAX_SYMBOLS)
 		WARN_ERR_RETURN(ERR_LIMIT);
 	total_symbols++;
 
 	// find Symbol slot in hash table
 	Symbol* c;
 	uint idx = hash(symbol);
 	for(;;)
 	{
 		c = &symbols[idx];
 
 		// found an empty slot
 		if(!c->symbol)
 			break;
 
 		idx = (idx+1) % MAX_SYMBOLS;
 		hash_jumps++;
 	}
 
 	// commit Symbol information
 	c->symbol  = symbol;
 	c->string = string;
 
 	string_buf_pos += strlen(string)+1;
 }
 
 
 
 
 const char* debug_get_symbol_string(void* symbol, const char* name, const char* file, int line)
 {
 	// return it if already in cache
 	const char* string = symbol_string_from_cache(symbol);
 	if(string)
 		return string;
 
 	// try to build a new string
 	string = symbol_string_build(symbol, name, file, line);
 	if(!string)
 		return 0;
 
 	symbol_string_add_to_cache(string, symbol);
 
 	return string;
 }
 
 
 //-----------------------------------------------------------------------------
 // output
 //-----------------------------------------------------------------------------
 
 // translates and displays the given strings in a dialog.
 // this is typically only used when debug_display_error has failed or
 // is unavailable because that function is much more capable.
 // implemented via sys_display_msgw; see documentation there.
 void debug_display_msgw(const wchar_t* caption, const wchar_t* msg)
 {
 	sys_display_msgw(ah_translate(caption), ah_translate(msg));
 }
 
 
 // display the error dialog. shows <description> along with a stack trace.
 // context and skip are as with debug_dump_stack.
 // flags: see DisplayErrorFlags. file and line indicate where the error
 // occurred and are typically passed as __FILE__, __LINE__.
 ErrorReaction debug_display_error(const wchar_t* description,
 	int flags, uint skip, void* context, const char* file, int line)
 {
 	if(!file || file[0] == '\0')
 		file = "unknown";
 	if(line <= 0)
 		line = 0;
 
 	// translate
 	description = ah_translate(description);
 
 	// display in output window; double-click will navigate to error location.
 	const char* fn_only = path_name_only(file);
 	debug_wprintf(L"%hs(%d): %ls\n", fn_only, line, description);
 
 	// allocate memory for the stack trace. this needs to be quite large,
 	// so preallocating is undesirable. it must work even if the heap is
 	// corrupted (since that's an error we might want to display), so
 	// we cannot rely on the heap alloc alone. what we do is try malloc,
 	// fall back to alloca if it failed, and give up after that.
 	wchar_t* text = 0;
 	size_t max_chars = 256*KiB;
 	// .. try allocating from heap
 	void* heap_mem = malloc(max_chars*sizeof(wchar_t));
 	text = (wchar_t*)heap_mem;
 	// .. heap alloc failed; try allocating from stack
 	if(!text)
 	{
 		max_chars = 128*KiB;	// (stack limit is usually 1 MiB)
 		text = (wchar_t*)alloca(max_chars*sizeof(wchar_t));
 	}
 
 	// alloc succeeded; proceed
 	if(text)
 	{
 		static const wchar_t fmt[] = L"%ls\r\n\r\nCall stack:\r\n\r\n";
 		int len = swprintf(text, max_chars, fmt, description);
 		// paranoia - only dump stack if this string output succeeded.
 		if(len >= 0)
 		{
 			if(!context)
 				skip++;	// skip this frame
 			debug_dump_stack(text+len, max_chars-len, skip, context);
 		}
 	}
 	else
 		text = L"(insufficient memory to display error message)";
 
 	debug_write_crashlog(text);
 	ErrorReaction er = sys_display_error(text, flags);
 
 	// note: debug_break-ing here to make sure the app doesn't continue
 	// running is no longer necessary. debug_display_error now determines our
 	// window handle and is modal.
 
 	// handle "break" request unless the caller wants to (doing so here
 	// instead of within the dlgproc yields a correct call stack)
 	if(er == ER_BREAK && !(flags & DE_MANUAL_BREAK))
 	{
 		debug_break();
 		er = ER_CONTINUE;
 	}
 
 	free(heap_mem);	// no-op if not allocated from heap
 		// after debug_break to ease debugging, but before exit to avoid leak.
 
 	// exit requested. do so here to disburden callers.
 	if(er == ER_EXIT)
 	{
 		// disable memory-leak reporting to avoid a flood of warnings
 		// (lots of stuff will leak since we exit abnormally).
 		debug_heap_enable(DEBUG_HEAP_NONE);
 #if CONFIG_USE_MMGR
 		mmgr_set_options(0);
 #endif
 
 		exit(EXIT_FAILURE);
 	}
 
 	return er;
 }
 
 
-// notify the user that an assertion failed; displays a stack trace with
-// local variables.
 ErrorReaction debug_assert_failed(const char* expr,
 	const char* file, int line, const char* func)
 {
 	// for edge cases in some functions, warnings (=asserts) are raised in
 	// addition to returning an error code. self-tests deliberately trigger
 	// these cases and check for the latter but shouldn't cause the former.
 	// we therefore squelch them here.
 	// (note: don't do so in lib.h's CHECK_ERR or debug_assert to reduce
 	// compile-time dependency on self_test.h)
 	if(self_test_active)
 		return ER_CONTINUE;
 
 	// __FILE__ evaluates to the full path (albeit without drive letter)
 	// which is rather long. we only display the base name for clarity.
 	const char* fn_only = path_name_only(file);
 
 	uint skip = 1; void* context = 0;
 	wchar_t buf[400];
 	swprintf(buf, ARRAY_SIZE(buf), L"Assertion failed at %hs:%d (%hs): \"%hs\"", fn_only, line, func, expr);
-	return debug_display_error(buf, DE_ALLOW_SUPPRESS|DE_MANUAL_BREAK, skip,context, fn_only,line);
+	return debug_display_error(buf, DE_ALLOW_SUPPRESS|DE_MANUAL_BREAK, skip, context, fn_only, line);
 }
 
 
 ErrorReaction debug_warn_err(LibError err,
 	const char* file, int line, const char* func)
 {
 	// for edge cases in some functions, warnings (=asserts) are raised in
 	// addition to returning an error code. self-tests deliberately trigger
 	// these cases and check for the latter but shouldn't cause the former.
 	// we therefore squelch them here.
 	// (note: don't do so in lib.h's CHECK_ERR or debug_assert to reduce
 	// compile-time dependency on self_test.h)
 	if(self_test_active)
 		return ER_CONTINUE;
 
 	// __FILE__ evaluates to the full path (albeit without drive letter)
 	// which is rather long. we only display the base name for clarity.
 	const char* fn_only = path_name_only(file);
 
 	uint skip = 1; void* context = 0;
 	wchar_t buf[400];
 	char err_buf[200]; error_description_r(err, err_buf, ARRAY_SIZE(err_buf));
 	swprintf(buf, ARRAY_SIZE(buf), L"Function call failed at %hs:%d (%hs): return value was %d (%hs)", fn_only, line, func, err, err_buf);
 	return debug_display_error(buf, DE_ALLOW_SUPPRESS|DE_MANUAL_BREAK, skip,context, fn_only,line);
 }
 
 
 //-----------------------------------------------------------------------------
 // thread naming
 //-----------------------------------------------------------------------------
 
 // when debugging multithreading problems, logging the currently running
 // thread is helpful; a user-specified name is easier to remember than just
 // the thread handle. to that end, we provide a robust TLS mechanism that is
 // much safer than the previous method of hijacking TIB.pvArbitrary.
 //
 // note: on Win9x thread "IDs" are pointers to the TIB xor-ed with an
 // obfuscation value calculated at boot-time.
 //
 // __declspec(thread) et al. are now available on VC and newer GCC but we
 // implement TLS manually (via pthread_setspecific) to ensure compatibility.
 
 static pthread_key_t tls_key;
 static pthread_once_t tls_once = PTHREAD_ONCE_INIT;
 
 
 // provided for completeness and to avoid displaying bogus resource leaks.
 static void tls_shutdown()
 {
 	WARN_ERR(pthread_key_delete(tls_key));
 	tls_key = 0;
 }
 
 
 // (called via pthread_once from debug_set_thread_name)
 static void tls_init()
 {
 	WARN_ERR(pthread_key_create(&tls_key, 0));	// no dtor
 
 	// note: do not use atexit; this may be called before _cinit.
 }
 
 
 // set the current thread's name; it will be returned by subsequent calls to
 // debug_get_thread_name.
 //
 // the string pointed to by <name> MUST remain valid throughout the
 // entire program; best to pass a string literal. allocating a copy
 // would be quite a bit more work due to cleanup issues.
 //
 // if supported on this platform, the debugger is notified of the new name;
 // it will be displayed there instead of just the handle.
 void debug_set_thread_name(const char* name)
 {
 	WARN_ERR(pthread_once(&tls_once, tls_init));
 
 	WARN_ERR(pthread_setspecific(tls_key, name));
 
 #if OS_WIN
 	wdbg_set_thread_name(name);
 #endif
 }
 
 
 // return the pointer assigned by debug_set_thread_name or 0 if
 // that hasn't been done yet for this thread.
 const char* debug_get_thread_name()
 {
 	return (const char*)pthread_getspecific(tls_key);
 }
 
 
 
 
 void debug_shutdown()
 {
 	tls_shutdown();
 }
Index: ps/trunk/source/lib/sysdep/ia32.h
===================================================================
--- ps/trunk/source/lib/sysdep/ia32.h	(revision 3910)
+++ ps/trunk/source/lib/sysdep/ia32.h	(revision 3911)
@@ -1,165 +1,167 @@
 /**
  * =========================================================================
  * File        : ia32.h
  * Project     : 0 A.D.
  * Description : C++ and inline asm implementations for IA-32.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2003-2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #ifndef IA32_H
 #define IA32_H
 
 #if !CPU_IA32
 #error "including ia32.h without CPU_IA32=1"
 #endif
 
 #include "lib/types.h"
 
 // some of these are implemented in asm, so make sure name mangling is
 // disabled.
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 
 // call before any of the following functions
 extern void ia32_init();
 
 
 //
 // fast implementations of some sysdep.h functions; see documentation there
 //
 
 extern float ia32_rintf(float f);
 extern double ia32_rint(double f);
 
 extern float ia32_fminf(float f1, float f2);
 extern float ia32_fmaxf(float f1, float f2);
 
 extern i32 ia32_i32_from_float(float f);
 extern i32 ia32_i32_from_double(double d);
 extern i64 ia32_i64_from_double(double d);
 
 // fpclassify return values
 #define IA32_FP_NAN       0x0100
 #define IA32_FP_NORMAL    0x0400
 #define IA32_FP_INFINITE  (IA32_FP_NAN | IA32_FP_NORMAL)
 #define IA32_FP_ZERO      0x4000
 #define IA32_FP_SUBNORMAL (IA32_FP_NORMAL | IA32_FP_ZERO)
 
 extern uint ia32_fpclassify(double d);
 extern uint ia32_fpclassifyf(float f);
 
 extern void* ia32_memcpy(void* dst, const void* src, size_t nbytes);	// asm
 
 
 // FPU control word
 // .. Precision Control:
 #define IA32_MCW_PC 0x0300
 #define IA32_PC_24  0x0000
 // .. Rounding Control:
 #define IA32_MCW_RC  0x0C00
 #define IA32_RC_NEAR 0x0000
 #define IA32_RC_DOWN 0x0400
 #define IA32_RC_UP   0x0800
 #define IA32_RC_CHOP 0x0C00
 // .. Exception Mask:
 #define IA32_MCW_EM 0x003f
 #define IA32_EM_INVALID    BIT(0)
 #define IA32_EM_DENORMAL   BIT(1)
 #define IA32_EM_ZERODIVIDE BIT(2)
 #define IA32_EM_OVERFLOW   BIT(3)
 #define IA32_EM_UNDERFLOW  BIT(4)
 #define IA32_EM_INEXACT    BIT(5)
 
 extern uint ia32_control87(uint new_val, uint mask);	// asm
 
 
-extern u64 rdtsc(void);
+extern u64 ia32_rdtsc(void);
 
 extern void ia32_debug_break(void);
 
 
-// CPU caps (128 bits)
+// CPU capability flags (128 bits)
 // do not change the order!
-enum CpuCap
+enum IA32Cap
 {
 	// standard (ecx) - currently only defined by Intel
-	SSE3 = 0+0,		// Streaming SIMD Extensions 3
-	EST  = 0+7,		// Enhanced Speedstep Technology
+	IA32_CAP_SSE3 = 0+0,	// Streaming SIMD Extensions 3
+	IA32_CAP_EST  = 0+7,	// Enhanced Speedstep Technology
 
 	// standard (edx)
-	TSC  = 32+4,	// TimeStamp Counter
-	CMOV = 32+15,	// Conditional MOVe
-	MMX  = 32+23,	// MultiMedia eXtensions
-	SSE  = 32+25,	// Streaming SIMD Extensions
-	SSE2 = 32+26,	// Streaming SIMD Extensions 2
-	HT   = 32+28,	// HyperThreading
+	IA32_CAP_FPU  = 32+0,	// Floating Point Unit
+	IA32_CAP_TSC  = 32+4,	// TimeStamp Counter
+	IA32_CAP_CMOV = 32+15,	// Conditional MOVe
+	IA32_CAP_MMX  = 32+23,	// MultiMedia eXtensions
+	IA32_CAP_SSE  = 32+25,	// Streaming SIMD Extensions
+	IA32_CAP_SSE2 = 32+26,	// Streaming SIMD Extensions 2
+	IA32_CAP_HT   = 32+28,	// HyperThreading
 
 	// extended (ecx)
 
 	// extended (edx) - currently only defined by AMD
-	AMD_MP        = 96+19,	// MultiProcessing capable; reserved on AMD64
-	AMD_MMX_EXT   = 96+22,
-	AMD_3DNOW_PRO = 96+30,
-	AMD_3DNOW     = 96+31
+	IA32_CAP_AMD_MP        = 96+19,	// MultiProcessing capable; reserved on AMD64
+	IA32_CAP_AMD_MMX_EXT   = 96+22,
+	IA32_CAP_AMD_3DNOW_PRO = 96+30,
+	IA32_CAP_AMD_3DNOW     = 96+31
 };
 
-extern bool ia32_cap(CpuCap cap);
+// indicate if the CPU supports the indicated cap.
+extern bool ia32_cap(IA32Cap cap);
 
 
 extern void ia32_get_cpu_info(void);
 
 
 //-----------------------------------------------------------------------------
 // internal use only
 
 // write the current execution state (e.g. all register values) into
 // (Win32::CONTEXT*)pcontext (defined as void* to avoid dependency).
 extern void ia32_get_current_context(void* pcontext);
 
 extern void ia32_asm_init();
 
 // checks if there is an IA-32 CALL instruction right before ret_addr.
 // returns ERR_OK if so and ERR_FAIL if not.
 // also attempts to determine the call target. if that is possible
 // (directly addressed relative or indirect jumps), it is stored in
 // target, which is otherwise 0.
 //
 // this is useful for walking the stack manually.
 extern LibError ia32_get_call_target(void* ret_addr, void** target);
 
 // order in which registers are stored in regs array
 // (do not change! brand string relies on this ordering)
 enum IA32Regs
 {
 	EAX,
 	EBX,
 	ECX,
 	EDX
 };
 
 // try to call the specified CPUID sub-function. returns true on success or
 // false on failure (i.e. CPUID or the specific function not supported).
 // returns eax, ebx, ecx, edx registers in above order.
 extern bool ia32_cpuid(u32 func, u32* regs);
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif	// #ifndef IA32_H
Index: ps/trunk/source/lib/sysdep/win/wtime.cpp
===================================================================
--- ps/trunk/source/lib/sysdep/win/wtime.cpp	(revision 3910)
+++ ps/trunk/source/lib/sysdep/win/wtime.cpp	(revision 3911)
@@ -1,833 +1,833 @@
 /**
  * =========================================================================
  * File        : wtime.cpp
  * Project     : 0 A.D.
  * Description : emulate POSIX high resolution timer on Windows.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2004 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
 #include "lib.h"
 #include "posix.h"
 #include "adts.h"
 #include "sysdep/ia32.h"
 
 #include "win_internal.h"
 
 #include <math.h>
 #include <process.h>	// _beginthreadex
 #include <time.h>
 
 #include <algorithm>
 #include <numeric>
 
 
 // define to disable time sources (useful for simulating other systems)
 //#define NO_QPC
 //#define NO_TSC
 
 static const int CALIBRATION_FREQ = 1;
 
 
 // automatic module init (before main) and shutdown (before termination)
 #pragma data_seg(WIN_CALLBACK_PRE_LIBC(b))
 WIN_REGISTER_FUNC(wtime_init);
 #pragma data_seg(WIN_CALLBACK_POST_ATEXIT(b))
 WIN_REGISTER_FUNC(wtime_shutdown);
 #pragma data_seg()
 
 
 // see http://www.gamedev.net/reference/programming/features/timing/ .
 
 // rationale:
 // we no longer use TGT, due to issues on Win9x; GTC is just as good.
 // (don't want to accelerate the tick rate, because performance will suffer).
 // avoid dependency on WinMM (event timer) to shorten startup time.
 //
 // we go to the trouble of allowing switching time sources at runtime
 // (=> have to be careful to keep the timer continuous) because we want
 // to allow overriding the implementation choice via command line switch,
 // in case a time source turns out to have a serious problem.
 
 
 // (default values for HRT_NONE impl)
 
 // initial measurement of the time source's tick rate. not necessarily
 // correct (e.g. when using TSC; cpu_freq isn't exact).
 static double hrt_nominal_freq = -1.0;
 
 // actual resolution of the time source (may differ from hrt_nominal_freq
 // for timers with adjustment > 1 tick).
 static double hrt_res = -1.0;
 
 // current ticks per second; average of last few values measured in
 // calibrate(). needed to prevent long-term drift, and because
 // hrt_nominal_freq isn't necessarily correct. only affects the ticks since
 // last calibration - don't want to retroactively change the time.
 static double hrt_cur_freq = -1.0;
 
 // ticks at init or last calibration.
 // ticks since then are scaled by 1/hrt_cur_freq and added to hrt_cal_time
 // to yield the current time.
 static i64 hrt_cal_ticks = 0;
 
 // value of hrt_time() at last calibration. needed so that changes to
 // hrt_cur_freq don't affect the previous ticks (example: 72 ticks elapsed,
 // nominal freq = 8 => time = 9.0. if freq is calculated as 9, time would
 // go backwards to 8.0).
 static double hrt_cal_time = 0.0;
 
 
 // possible high resolution timers, in order of preference.
 // see below for timer properties + problems.
 // used as index into overrides[].
 enum HRTImpl
 {
 	// CPU timestamp counter
 	HRT_TSC,
 
 	// Windows QueryPerformanceCounter
 	HRT_QPC,
 
 	// Windows GetTickCount
 	HRT_GTC,
 
 	// there will always be a valid timer in use.
 	// this is only used with hrt_override_impl.
 	HRT_NONE,
 
 	HRT_NUM_IMPLS
 };
 
 static HRTImpl hrt_impl = HRT_NONE;
 
 // while we do our best to work around timer problems or avoid them if unsafe,
 // future requirements and problems may be different:
 // allow the user or app to override our decisions (via hrt_override_impl)
 enum HRTOverride
 {
 	// allow use of this implementation if available,
 	// and we can work around its problems
 	//
 	// HACK: give it value 0 for easier static data initialization
 	HRT_DEFAULT = 0,
 
 	// override our 'safe to use' recommendation
 	// set by hrt_override_impl (via command line arg or console function)
 	HRT_DISABLE,
 	HRT_FORCE
 };
 
 static HRTOverride overrides[HRT_NUM_IMPLS];
 	// HRTImpl enums as index
 	// HACK: no init needed - static data is zeroed (= HRT_DEFAULT)
 cassert((int)HRT_DEFAULT == 0);
 
 
 // convenience
 static const long _1e6 = 1000000;
 static const long _1e7 = 10000000;
 static const i64  _1e9 = 1000000000;
 
 
 static inline void lock(void)
 {
 	win_lock(WTIME_CS);
 }
 static inline void unlock(void)
 {
 	win_unlock(WTIME_CS);
 }
 
 
 // decide upon a HRT implementation, checking if we can work around
 // each timer's issues on this platform, but allow user override
 // in case there are unforeseen problems with one of them.
 // order of preference (due to resolution and speed): TSC, QPC, GTC.
 // split out of reset_impl so we can just return when impl is chosen.
 static LibError choose_impl()
 {
 	bool safe;
 #define SAFETY_OVERRIDE(impl)\
 	if(overrides[impl] == HRT_DISABLE)\
 		safe = false;\
 	if(overrides[impl] == HRT_FORCE)\
 		safe = true;
 
 #if CPU_IA32 && !defined(NO_TSC)
 	// CPU Timestamp Counter (incremented every clock)
 	// ns resolution, moderate precision (poor clock crystal?)
 	//
 	// issues:
 	// - multiprocessor systems: may be inconsistent across CPUs.
 	//   we could discard really bad values, but that's still inaccurate.
 	//   having a high-priority thread with set CPU affinity read the TSC
 	//   might work, but would be rather slow. could fix the problem by
 	//   keeping per-CPU timer state (freq and delta). we'd use the APIC ID
 	//   (cpuid, function 1) or GetCurrentProcessorNumber (only available
 	//   on Win Server 2003) to determine the CPU. however, this is
 	//   too much work for little benefit ATM, so call it unsafe.
 	// - deep sleep modes: TSC may not be advanced.
 	//   not a problem though, because if the TSC is disabled, the CPU
 	//   isn't doing any other work, either.
 	// - SpeedStep/'gearshift' CPUs: frequency may change.
 	//   this happens on notebooks now, but eventually desktop systems
 	//   will do this as well (if not to save power, for heat reasons).
 	//   frequency changes are too often and drastic to correct,
 	//   and we don't want to mess with the system power settings => unsafe.
-	if(cpu_freq > 0.0 && ia32_cap(TSC))
+	if(cpu_freq > 0.0 && ia32_cap(IA32_CAP_TSC))
 	{
 		safe = (cpus == 1 && cpu_speedstep == 0);
 		SAFETY_OVERRIDE(HRT_TSC);
 		if(safe)
 		{
 			hrt_impl = HRT_TSC;
 			hrt_nominal_freq = cpu_freq;
 			hrt_res = (1.0 / hrt_nominal_freq);
 			return ERR_OK;
 		}
 	}
 #endif	// TSC
 
 #if OS_WIN && !defined(NO_QPC)
 	// Windows QueryPerformanceCounter API
 	// implementations:
 	// - PIT on Win2k - 838 ns resolution, slow to read (~3 �s)
 	// - PMT on WinXP - 279 ns ", moderate overhead (700 ns?)
 	//   issues:
 	//   1) Q274323: may jump several seconds under heavy PCI bus load.
 	//      not a problem, because the older systems on which this occurs
 	//      have safe TSCs, so that is used instead.
 	//   2) "System clock problem can inflate benchmark scores":
 	//      incorrect value if not polled every 4.5 seconds? solved
 	//      by calibration thread, which reads timer every second anyway.
 	// - TSC on MP HAL - see TSC above.
 
 	// cache freq because QPF is fairly slow.
 	static i64 qpc_freq = -1;
 
 	// first call - check if QPC is supported
 	if(qpc_freq == -1)
 	{
 		LARGE_INTEGER i;
 		BOOL qpc_ok = QueryPerformanceFrequency(&i);
 		qpc_freq = qpc_ok? i.QuadPart : 0;
 	}
 
 	// QPC is available
 	if(qpc_freq > 0)
 	{
 		// PIT and PMT are safe.
 		if(qpc_freq == 1193182 || qpc_freq == 3579545)
 			safe = true;
 		// make sure QPC doesn't use the TSC
 		// (if it were safe, we would have chosen it above)
 		else
 		{
 			// can't decide yet - assume unsafe
 			if(cpu_freq == 0.0)
 				safe = false;
 			else
 			{
 				// compare QPC freq to CPU clock freq - can't rule out HPET,
 				// because its frequency isn't known (it's at least 10 MHz).
 				double freq_dist = fabs(cpu_freq/qpc_freq - 1.0);
 				safe = freq_dist > 0.05;
 					// safe if freqs not within 5% (i.e. it doesn't use TSC)
 			}
 		}
 
 		SAFETY_OVERRIDE(HRT_QPC);
 		if(safe)
 		{
 			hrt_impl = HRT_QPC;
 			hrt_nominal_freq = (double)qpc_freq;
 			hrt_res = (1.0 / hrt_nominal_freq);
 			return ERR_OK;
 		}
 	}
 #endif	// QPC
 
 	//
 	// GTC
 	//
 	safe = true;
 	SAFETY_OVERRIDE(HRT_GTC);
 	if(safe)
 	{
 		hrt_impl = HRT_GTC;
 		hrt_nominal_freq = 1000.0;	// units returned
 		hrt_res = 1e-2;	// guess, in case the following fails
 
 		// get actual resolution
 		DWORD adj; BOOL adj_disabled; // unused, but must be passed to GSTA
 		DWORD timer_period;	// [hectonanoseconds]
 		if(GetSystemTimeAdjustment(&adj, &timer_period, &adj_disabled))
 			hrt_res = (timer_period / 1e7);
 		return ERR_OK;
 	}
 
 	hrt_impl = HRT_NONE;
 	hrt_nominal_freq = -1.0;
 	WARN_RETURN(ERR_TIMER_NO_SAFE_IMPL);
 }
 
 
 // return ticks (unspecified start point). lock must be held.
 //
 // split to allow calling from reset_impl_lk without recursive locking.
 // (not a problem, but avoids a BoundsChecker warning)
 static i64 ticks_lk()
 {
 	switch(hrt_impl)
 	{
 	// TSC
 #if CPU_IA32 && !defined(NO_TSC)
 	case HRT_TSC:
-		return (i64)rdtsc();
+		return (i64)ia32_rdtsc();
 #endif
 
 	// QPC
 #if OS_WIN && !defined(NO_QPC)
 	case HRT_QPC:
 		{
 		LARGE_INTEGER i;
 		BOOL ok = QueryPerformanceCounter(&i);
 		WARN_IF_FALSE(ok);	// shouldn't fail if it was chosen above
 		return i.QuadPart;
 		}
 #endif
 
 	// TGT
 #if OS_WIN
 	case HRT_GTC:
 		return (i64)GetTickCount();
 #endif
 
 	// add further timers here.
 
 	case HRT_NUM_IMPLS:
 	default:
 		debug_warn("invalid impl");
 		//-fallthrough
 
 	case HRT_NONE:
 		return 0;
 	}	// switch(impl)
 }
 
 
 // return seconds since init. lock must be held.
 //
 // split to allow calling from calibrate without recursive locking.
 // (not a problem, but avoids a BoundsChecker warning)
 static double time_lk()
 {
 	debug_assert(hrt_cur_freq > 0.0);
 	debug_assert(hrt_cal_ticks > 0);
 
 	// elapsed ticks and time since last calibration
 	const i64 delta_ticks = ticks_lk() - hrt_cal_ticks;
 	const double delta_time = delta_ticks / hrt_cur_freq;
 
 	return hrt_cal_time + delta_time;
 }
 
 
 
 
 // this module is dependent upon detect (supplies system information needed to
 // choose a HRT), which in turn uses our timer to detect the CPU clock
 // when running on Windows (clock(), the only cross platform HRT available on
 // Windows, isn't good enough - only 10..15 ms resolution).
 //
 // we first use a safe timer, and choose again after client code calls
 // hrt_override_impl when system information is available.
 // the timer will work without this call, but it won't use certain
 // implementations. we do it this way, instead of polling on each timer use,
 // because a timer implementation change may cause the timer to jump a bit.
 
 
 // choose a HRT implementation and prepare it for use. lock must be held.
 //
 // don't want to saddle timer module with the problem of initializing
 // us on first call - it wouldn't otherwise need to be thread-safe.
 static LibError reset_impl_lk()
 {
 	HRTImpl old_impl = hrt_impl;
 
 	// if changing implementation: get time at which to continue
 	// (when switching, we set everything calibrate() would output)
 	double old_time;
 	// .. first call; hrt_cur_freq not initialized; can't call time_lk.
 	//    setting to 0 will start the timer at 0.
 	if(hrt_cur_freq <= 0.0)
 		old_time = 0.0;
 	// .. timer has been initialized; use current reported time.
 	else
 		old_time = time_lk();		
 
 	RETURN_ERR(choose_impl());
 	debug_assert(hrt_impl != HRT_NONE && hrt_nominal_freq > 0.0);
 
 	// impl has changed; reset timer state.
 	if(old_impl != hrt_impl)
 	{
 		hrt_cur_freq = hrt_nominal_freq;
 		hrt_cal_time = old_time;
 		hrt_cal_ticks = ticks_lk();
 	}
 
 	return ERR_OK;
 }
 
 
 // return ticks (unspecified start point)
 static i64 hrt_ticks()
 {
 	i64 t;
 lock();
 	t = ticks_lk();
 unlock();
 	return t;
 }
 
 
 // return seconds since init.
 static double hrt_time()
 {
 	double t;
 lock();
 	t = time_lk();
 unlock();
 	return t;
 }
 
 
 // return seconds between start and end timestamps (returned by hrt_ticks).
 // negative if end comes before start. not intended to be called for long
 // intervals (start -> end), since the current frequency is used!
 static double hrt_delta_s(i64 start, i64 end)
 {
 	// paranoia: reading double may not be atomic.
 lock();
 	double freq = hrt_cur_freq;
 unlock();
 
 	debug_assert(freq != -1.0 && "hrt_delta_s: hrt_cur_freq not set");
 	return (end - start) / freq;
 }
 
 
 // return current timer implementation and its nominal (rated) frequency.
 // nominal_freq is never 0.
 // implementation only changes after hrt_override_impl.
 //
 // may be called before first hrt_ticks / hrt_time, so do init here also.
 static void hrt_query_impl(HRTImpl& impl, double& nominal_freq, double& res)
 {
 lock();
 
 	impl = hrt_impl;
 	nominal_freq = hrt_nominal_freq;
 	res = hrt_res;
 
 unlock();
 
 	debug_assert(nominal_freq > 0.0 && "hrt_query_impl: invalid hrt_nominal_freq");
 }
 
 
 // override our 'safe to use' decision.
 // resets (and chooses another, if applicable) implementation;
 // the timer may jump after doing so.
 // call with HRT_DEFAULT, HRT_NONE to re-evaluate implementation choice
 // after system info becomes available.
 static LibError hrt_override_impl(HRTOverride ovr, HRTImpl impl)
 {
 	if((ovr != HRT_DISABLE && ovr != HRT_FORCE && ovr != HRT_DEFAULT) ||
 	   (impl != HRT_TSC && impl != HRT_QPC && impl != HRT_GTC && impl != HRT_NONE))
 		WARN_RETURN(ERR_INVALID_PARAM);
 
 lock();
 
 	overrides[impl] = ovr;
 	LibError ret = reset_impl_lk();
 
 unlock();
 
 	return ret;
 }
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // calibration
 //
 //////////////////////////////////////////////////////////////////////////////
 
 
 // 'safe' timer, used to measure HRT freq in calibrate()
 static const long safe_timer_freq = 1000;
 
 static long safe_time()
 {
 #if OS_WIN
 	return (long)GetTickCount();
 #else
 	return (long)(clock() * 1000.0 / CLOCKS_PER_SEC);
 #endif
 }
 
 
 // measure current HRT freq - prevents long-term drift; also useful because
 // hrt_nominal_freq isn't necessarily exact.
 //
 // lock must be held.
 static void calibrate_lk()
 {
 	debug_assert(hrt_cal_ticks > 0);
 
 	// we're called from a WinMM event or after thread wakeup,
 	// so the timer has just been updated.
 	// no need to determine tick / compensate.
 
 	// get elapsed HRT ticks
 	const i64 hrt_cur = ticks_lk();
 	const i64 hrt_d = hrt_cur - hrt_cal_ticks;
 	hrt_cal_ticks = hrt_cur;
 
 	hrt_cal_time += hrt_d / hrt_cur_freq;
 
 	// get elapsed time from safe millisecond timer
 	static long safe_last = LONG_MAX;
 		// chosen so that dt and therefore hrt_est_freq will be negative
 		// on first call => it won't be added to buffer
 	const long safe_cur = safe_time();
 	const double dt = (safe_cur - safe_last) / safe_timer_freq;
 	safe_last = safe_cur;
 
 	double hrt_est_freq = hrt_d / dt;
 
 	// past couple of calculated hrt freqs, for averaging
 	typedef RingBuf<double, 8> SampleBuf;
 	static SampleBuf samples;
 
 	// only add to buffer if within 10% of nominal
 	// (don't want to pollute buffer with flukes / incorrect results)
 	if(fabs(hrt_est_freq/hrt_nominal_freq - 1.0) < 0.10)
 	{
 		samples.push_back(hrt_est_freq);
 
 		// average all samples in buffer
 		double freq_sum = std::accumulate(samples.begin(), samples.end(), 0.0);
 		hrt_cur_freq = freq_sum / (int)samples.size();
 	}
 	else
 	{
 		samples.clear();
 
 		hrt_cur_freq = hrt_nominal_freq;
 	}
 
 	debug_assert(hrt_cur_freq > 0.0);
 }
 
 
 // calibration thread
 // note: winmm event is better than a thread or just checking elapsed time
 // in hrt_ticks, because it's called right after GTC is updated;
 // otherwise, we may be in the middle of a tick.
 // however, we want to avoid dependency on WinMM to shorten startup time.
 // hence, start a thread.
 
 static pthread_t thread;
 static sem_t exit_flag;
 
 static void* calibration_thread(void* UNUSED(data))
 {
 	debug_set_thread_name("wtime");
 
 	for(;;)
 	{
 		// calculate absolute timeout for sem_timedwait
 		struct timespec abs_timeout;
 		clock_gettime(CLOCK_REALTIME, &abs_timeout);
 		abs_timeout.tv_nsec += _1e9 / CALIBRATION_FREQ;
 		// .. handle nanosecond wraparound (must not be > 1000m)
 		if(abs_timeout.tv_nsec >= _1e9)
 		{
 			abs_timeout.tv_nsec -= _1e9;
 			abs_timeout.tv_sec++;
 		}
 
 		errno = 0;
 		// if we acquire the semaphore, exit was requested.
 		if(sem_timedwait(&exit_flag, &abs_timeout) == 0)
 			break;
 		// actual error: warn
 		if(errno != ETIMEDOUT)
 			debug_warn("wtime calibration_thread: sem_timedwait failed");
 
 		lock();
 		calibrate_lk();
 		unlock();
 	}
 
 	return 0;
 }
 
 
 static inline LibError init_calibration_thread()
 {
 	sem_init(&exit_flag, 0, 0);
 	pthread_create(&thread, 0, calibration_thread, 0);
 	return ERR_OK;
 }
 
 
 static inline LibError shutdown_calibration_thread()
 {
 	sem_post(&exit_flag);
 	pthread_join(thread, 0);
 	sem_destroy(&exit_flag);
 	return ERR_OK;
 }
 
 
 
 
 static LibError hrt_init()
 {
 	// no lock needed - calibration thread hasn't yet been created
 	RETURN_ERR(reset_impl_lk());
 	return init_calibration_thread();
 }
 
 
 static LibError hrt_shutdown()
 {
 	// don't take a lock here! race condition:
 	// 1) calibration_thread is about to call clock_gettime
 	// 2) we take the lock and wait for the thread to exit
 	// 3) thread's clock_gettime waits on the lock we're holding => deadlock
 	//
 	// the calibration thread protects itself anyway, so nothing breaks.
 	return shutdown_calibration_thread();
 }
 
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // wtime wrapper: emulates POSIX functions
 //
 //////////////////////////////////////////////////////////////////////////////
 
 // NT system time and FILETIME are hectonanoseconds since Jan. 1, 1601 UTC.
 // SYSTEMTIME is a struct containing month, year, etc.
 
 
 //
 // FILETIME -> time_t routines; used by wposix filetime_to_time_t wrapper.
 //
 
 // hectonanoseconds between Windows and POSIX epoch
 static const u64 posix_epoch_hns = 0x019DB1DED53E8000;
 
 // this function avoids the pitfall of casting FILETIME* to u64*,
 // which is not safe due to differing alignment guarantees!
 // on some platforms, that would result in an exception.
 static u64 u64_from_FILETIME(const FILETIME* ft)
 {
 	return u64_from_u32(ft->dwHighDateTime, ft->dwLowDateTime);
 }
 
 
 // convert UTC FILETIME to seconds-since-1970 UTC:
 // we just have to subtract POSIX epoch and scale down to units of seconds.
 //
 // note: RtlTimeToSecondsSince1970 isn't officially documented,
 // so don't use that.
 time_t utc_filetime_to_time_t(FILETIME* ft)
 {
 	u64 hns = u64_from_FILETIME(ft);
 	u64 s = (hns - posix_epoch_hns) / _1e7;
 	return (time_t)(s & 0xffffffff);
 }
 
 
 // convert local FILETIME (includes timezone bias and possibly DST bias)
 // to seconds-since-1970 UTC.
 //
 // note: splitting into month, year etc. is inefficient,
 //   but much easier than determining whether ft lies in DST,
 //   and ourselves adding the appropriate bias.
 //
 // called for FAT file times; see wposix filetime_to_time_t.
 time_t time_t_from_local_filetime(FILETIME* ft)
 {
 	SYSTEMTIME st;
 	FileTimeToSystemTime(ft, &st);
 
 	struct tm t;
 	t.tm_sec   = st.wSecond;
 	t.tm_min   = st.wMinute;
 	t.tm_hour  = st.wHour;
 	t.tm_mday  = st.wDay;
 	t.tm_mon   = st.wMonth-1;
 	t.tm_year  = st.wYear-1900;
 	t.tm_isdst = -1;
 		// let the CRT determine whether this local time
 		// falls under DST by the US rules.
     return mktime(&t);
 }
 
 
 
 
 // return nanoseconds since posix epoch as reported by system time
 // only 10 or 15 ms resolution!
 static i64 st_time_ns()
 {
 	FILETIME ft;
 	GetSystemTimeAsFileTime(&ft);
 	u64 hns = u64_from_FILETIME(&ft);
 	return (hns - posix_epoch_hns) * 100;
 }
 
 
 // return nanoseconds since posix epoch as reported by HRT.
 // we get system time at init and add HRT elapsed time.
 static i64 time_ns()
 {
 	// we don't really need to get the HRT start time (it starts at 0,
 	// and will be slightly higher when we get here; doesn't matter if the
 	// time returned is a few ms off the real system time). do so anyway,
 	// because we have to get the starting ST value anyway.
 	static double hrt_start_time;
 	static i64 st_start;
 
 	if(!st_start)
 	{
 		hrt_start_time = hrt_time();
 		st_start = st_time_ns();
 	}
 
 	const double dt = hrt_time() - hrt_start_time;
 	const i64 ns = st_start + i64_from_double(dt * _1e9);
 	return ns;
 }
 
 
 static LibError wtime_init()
 {
 	hrt_init();
 
 	// first call latches start times
 	time_ns();
 
 	return ERR_OK;
 }
 
 
 static LibError wtime_shutdown()
 {
 	return hrt_shutdown();
 }
 
 
 void wtime_reset_impl()
 {
 	hrt_override_impl(HRT_DEFAULT, HRT_NONE);
 }
 
 
 
 
 static void sleep_ns(i64 ns)
 {
 	DWORD ms = DWORD(ns / _1e6);
 	if(ms != 0)
 		Sleep(ms);
 	else
 	{
 		i64 t0 = hrt_ticks(), t1;
 		do
 			t1 = hrt_ticks();
 		while(hrt_delta_s(t0, t1) * _1e9 < ns);
 	}
 }
 
 
 int clock_gettime(clockid_t clock, struct timespec* t)
 {
 	debug_assert(clock == CLOCK_REALTIME);
 
 	const i64 ns = time_ns();
 	t->tv_sec  = (time_t)((ns / _1e9) & 0xffffffff);
 	t->tv_nsec = (long)  (ns % _1e9);
 	return 0;
 }
 
 
 int clock_getres(clockid_t clock, struct timespec* ts)
 {
 	debug_assert(clock == CLOCK_REALTIME);
 
 	HRTImpl impl;
 	double nominal_freq, res;
 	hrt_query_impl(impl, nominal_freq, res);
 
 	ts->tv_sec  = 0;
 	ts->tv_nsec = (long)(res * 1e9);
 	return 0;
 }
 
 
 int nanosleep(const struct timespec* rqtp, struct timespec* /* rmtp */)
 {
 	i64 ns = rqtp->tv_sec;	// make sure we don't overflow
 	ns *= _1e9;
 	ns += rqtp->tv_nsec;
 	sleep_ns(ns);
 	return 0;
 }
 
 
 int gettimeofday(struct timeval* tv, void* UNUSED(tzp))
 {
 	const long us = (long)(time_ns() / 1000);
 	tv->tv_sec  = (time_t)     (us / _1e6);
 	tv->tv_usec = (suseconds_t)(us % _1e6);
 	return 0;
 }
 
 
 uint sleep(uint sec)
 {
 	Sleep(sec * 1000);	// don't bother checking for overflow (user's fault)
 	return sec;
 }
 
 
 int usleep(useconds_t us)
 {
 	debug_assert(us < _1e6);
 	sleep_ns(us * 1000);	// can't overflow due to <us> limit
 	return 0;
 }
Index: ps/trunk/source/lib/adts.cpp
===================================================================
--- ps/trunk/source/lib/adts.cpp	(revision 3910)
+++ ps/trunk/source/lib/adts.cpp	(revision 3911)
@@ -1,216 +1,25 @@
 /**
  * =========================================================================
  * File        : adts.cpp
  * Project     : 0 A.D.
  * Description : useful Abstract Data Types not provided by STL.
  *
  * @author Jan.Wassenberg@stud.uni-karlsruhe.de
  * =========================================================================
  */
 
 /*
  * Copyright (c) 2005 Jan Wassenberg
  *
  * Redistribution and/or modification are also permitted under the
  * terms of the GNU General Public License as published by the
  * Free Software Foundation (version 2 or later, at your option).
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
 #include "precompiled.h"
 
-#include <deque>
-
 #include "adts.h"
-#include "posix.h"
-#include "lib/timer.h"
-
-//-----------------------------------------------------------------------------
-// built-in self test
-//-----------------------------------------------------------------------------
-
-#if SELF_TEST_ENABLED
-namespace test {
-
-static void test_ringbuf()
-{
-	const size_t N = 49;	// RingBuf capacity
-	const int S = 100;	// number of test items
-
-	// insert and remove immediately
-	{
-	RingBuf<int, N> buf;
-	for(int i = 1; i < S; i++)
-	{
-		buf.push_back(i);
-		TEST(buf.front() == i);
-		buf.pop_front();
-	}
-	TEST(buf.size() == 0 && buf.empty());
-	}
-
-	// fill buffer and overwrite old items
-	{
-	RingBuf<int, N> buf;
-	for(int i = 1; i < S; i++)
-		buf.push_back(i);
-	TEST(buf.size() == N);
-	int first = buf.front();
-	TEST(first == (int)(S-1 -N +1));
-	for(size_t i = 0; i < N; i++)
-	{
-		TEST(buf.front() == first);
-		first++;
-		buf.pop_front();
-	}
-	TEST(buf.size() == 0 && buf.empty());
-	}
-
-	// randomized insert/remove; must behave as does std::deque
-	{
-	srand(1);
-	RingBuf<int, N> buf;
-	std::deque<int> deq;
-	for(uint rep = 0; rep < 1000; rep++)
-	{
-		uint rnd_op = rand(0, 10);
-		// 70% - insert
-		if(rnd_op >= 3)
-		{
-			int item = rand();
-			buf.push_back(item);
-
-			deq.push_back(item);
-			int excess_items = (int)deq.size() - N;
-			if(excess_items > 0)
-			{
-				for(int i = 0; i < excess_items; i++)
-				{
-					deq.pop_front();
-				}
-			}
-		}
-		// 30% - pop front (only if not empty)
-		else if(!deq.empty())
-		{
-			buf.pop_front();
-			deq.pop_front();
-		}
-	}
-	TEST(buf.size() == deq.size());
-	RingBuf<int, N>::iterator begin = buf.begin(), end = buf.end();
-	TEST(equal(begin, end, deq.begin()));
-	}
-}
-
-
-
-// ensures all 3 variants of Landlord<> behave the same
-static void test_cache_removal()
-{
-	Cache<int, int, Landlord_Naive> c1;
-	Cache<int, int, Landlord_Naive, Divider_Recip> c1r;
-	Cache<int, int, Landlord_Cached> c2;
-	Cache<int, int, Landlord_Cached, Divider_Recip> c2r;
-	Cache<int, int, Landlord_Lazy> c3;
-	Cache<int, int, Landlord_Lazy, Divider_Recip> c3r;
-
-#if defined(ENABLE_CACHE_POLICY_BENCHMARK) || 0
-	// set max priority, to reduce interference while measuring.
-	int old_policy; static sched_param old_param;	// (static => 0-init)
-	pthread_getschedparam(pthread_self(), &old_policy, &old_param);
-	static sched_param max_param;
-	max_param.sched_priority = sched_get_priority_max(SCHED_FIFO);
-	pthread_setschedparam(pthread_self(), SCHED_FIFO, &max_param);
-
-#define MEASURE(c, desc)\
-{\
-srand(1);\
-int cnt = 1;\
-TIMER_BEGIN(desc);\
-for(int i = 0; i < 30000; i++)\
-{\
-	/* 70% add (random objects) */\
-	bool add = rand(1,10) < 7;\
-	if(add)\
-	{\
-		int key = cnt++;\
-		int val = cnt++;\
-		size_t size = (size_t)rand(1,100);\
-		uint cost = (uint)rand(1,100);\
-		c.add(key, val, size, cost);\
-	}\
-	else\
-	{\
-		size_t size;\
-		int value;\
-		c.remove_least_valuable(&value, &size);\
-	}\
-}\
-TIMER_END(desc);\
-}
-	MEASURE(c1, "naive")
-	MEASURE(c1r, "naiverecip")
-	MEASURE(c2, "cached")
-	MEASURE(c2r, "cachedrecip")
-	MEASURE(c3, "lazy")
-	MEASURE(c3r, "lazyrecip")
-
-	// restore previous policy and priority.
-	pthread_setschedparam(pthread_self(), old_policy, &old_param);
-exit(1134);
-#endif
-
-
-	srand(1);
-	int cnt = 1;
-	for(int i = 0; i < 1000; i++)
-	{
-		// 70% add (random objects)
-		bool add = rand(1,10) < 7;
-		if(add)
-		{
-			int key = cnt++;
-			int val = cnt++;
-			size_t size = (size_t)rand(1,100);
-			uint cost = (uint)rand(1,100);
-			c1.add(key, val, size, cost);
-			c2.add(key, val, size, cost);
-			c3.add(key, val, size, cost);
-		}
-		// 30% delete - make sure "least valuable" was same for all
-		else
-		{
-			size_t size1, size2, size3;
-			int value1, value2, value3;
-			bool removed1, removed2, removed3;
-			removed1 = c1.remove_least_valuable(&value1, &size1);
-			removed2 = c2.remove_least_valuable(&value2, &size2);
-			removed3 = c3.remove_least_valuable(&value3, &size3);
-			TEST(removed1 == removed2);
-			TEST(removed2 == removed3);
-			if (removed1)
-			{
-				TEST(size1 == size2);
-				TEST(value1 == value2);
-				TEST(size2 == size3);
-				TEST(value2 == value3);
-			}
-		}	// else
-	}	// for i
-}
-
-
-static void self_test()
-{
-	test_ringbuf();
-	test_cache_removal();
-}
-
-SELF_TEST_REGISTER;
-
-}	// namespace test
-#endif	// #if SELF_TEST_ENABLED