Index: ps/trunk/source/lib/fat_time.cpp =================================================================== --- ps/trunk/source/lib/fat_time.cpp (revision 6535) +++ ps/trunk/source/lib/fat_time.cpp (revision 6536) @@ -1,59 +1,59 @@ /** * ========================================================================= * File : fat_time.cpp * Project : 0 A.D. * Description : timestamp conversion: DOS FAT <-> Unix time_t * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "fat_time.h" #include #include "lib/bits.h" time_t time_t_from_FAT(u32 fat_timedate) { const u32 fat_time = bits(fat_timedate, 0, 15); const u32 fat_date = bits(fat_timedate, 16, 31); struct tm t; // struct tm format: t.tm_sec = bits(fat_time, 0,4) * 2; // [0,59] t.tm_min = bits(fat_time, 5,10); // [0,59] t.tm_hour = bits(fat_time, 11,15); // [0,23] t.tm_mday = bits(fat_date, 0,4); // [1,31] t.tm_mon = bits(fat_date, 5,8) - 1; // [0,11] t.tm_year = bits(fat_date, 9,15) + 80; // since 1900 t.tm_isdst = -1; // unknown - let libc determine // otherwise: totally bogus, and at the limit of 32-bit time_t debug_assert(t.tm_year < 138); time_t ret = mktime(&t); debug_assert(ret != (time_t)-1); // mktime shouldn't fail return ret; } u32 FAT_from_time_t(time_t time) { // (values are adjusted for DST) struct tm* t = localtime(&time); u16 fat_time = 0; - fat_time |= (t->tm_sec/2); // 5 - fat_time |= (t->tm_min) << 5; // 6 - fat_time |= (t->tm_hour) << 11; // 5 + fat_time |= u16(t->tm_sec/2); // 5 + fat_time |= u16(t->tm_min) << 5; // 6 + fat_time |= u16(t->tm_hour) << 11; // 5 u16 fat_date = 0; - fat_date |= (t->tm_mday); // 5 - fat_date |= (t->tm_mon+1) << 5; // 4 - fat_date |= (t->tm_year-80) << 9; // 7 + fat_date |= u16(t->tm_mday); // 5 + fat_date |= u16(t->tm_mon+1) << 5; // 4 + fat_date |= u16(t->tm_year-80) << 9; // 7 u32 fat_timedate = u32_from_u16(fat_date, fat_time); return fat_timedate; } Index: ps/trunk/source/lib/path_util.cpp =================================================================== --- ps/trunk/source/lib/path_util.cpp (revision 6535) +++ ps/trunk/source/lib/path_util.cpp (revision 6536) @@ -1,289 +1,289 @@ /** * ========================================================================= * File : path_util.cpp * Project : 0 A.D. * Description : helper functions for path strings. * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "path_util.h" #include #include ERROR_ASSOCIATE(ERR::PATH_LENGTH, "Path exceeds PATH_MAX characters", ENAMETOOLONG); ERROR_ASSOCIATE(ERR::PATH_EMPTY, "Path is an empty string", -1); ERROR_ASSOCIATE(ERR::PATH_NOT_RELATIVE, "Path is not relative", -1); ERROR_ASSOCIATE(ERR::PATH_NON_PORTABLE, "Path contains OS-specific dir separator", -1); ERROR_ASSOCIATE(ERR::PATH_NON_CANONICAL, "Path contains unsupported .. or ./", -1); ERROR_ASSOCIATE(ERR::PATH_COMPONENT_SEPARATOR, "Path component contains dir separator", -1); bool path_is_dir_sep(char c) { // note: ideally path strings would only contain '/' or even SYS_DIR_SEP. // however, windows-specific code (e.g. the sound driver detection) // uses these routines with '\\' strings. converting them all to // '/' and then back before passing to WinAPI would be annoying. // also, the self-tests verify correct operation of such strings. // it would be error-prone to only test the platform's separator // strings there. hence, we allow all separators here. if(c == '/' || c == '\\') return true; return false; } -bool path_is_dir_sepw(wchar_t c) +static bool path_is_dir_sepw(wchar_t c) { // note: ideally path strings would only contain '/' or even SYS_DIR_SEP. // however, windows-specific code (e.g. the sound driver detection) // uses these routines with '\\' strings. converting them all to // '/' and then back before passing to WinAPI would be annoying. // also, the self-tests verify correct operation of such strings. // it would be error-prone to only test the platform's separator // strings there. hence, we allow all separators here. if(c == L'/' || c == L'\\') return true; return false; } bool path_IsDirectory(const char* path) { if(path[0] == '\0') // root dir return true; const char lastChar = path[strlen(path)-1]; if(path_is_dir_sep(lastChar)) return true; return false; } // is s2 a subpath of s1, or vice versa? // (equal counts as subpath) bool path_is_subpath(const char* s1, const char* s2) { // make sure s1 is the shorter string if(strlen(s1) > strlen(s2)) std::swap(s1, s2); - int c1 = 0, last_c1, c2; + char c1 = 0, last_c1, c2; for(;;) { last_c1 = c1; c1 = *s1++, c2 = *s2++; // end of s1 reached: if(c1 == '\0') { // s1 matched s2 up until: if((c2 == '\0') || // its end (i.e. they're equal length) OR path_is_dir_sep(c2) || // start of next component OR path_is_dir_sep(last_c1)) // ", but both have a trailing slash // => is subpath return true; } // mismatch => is not subpath if(c1 != c2) return false; } } bool path_is_subpathw(const wchar_t* s1, const wchar_t* s2) { // make sure s1 is the shorter string if(wcslen(s1) > wcslen(s2)) std::swap(s1, s2); wchar_t c1 = 0, last_c1, c2; for(;;) { last_c1 = c1; c1 = *s1++, c2 = *s2++; // end of s1 reached: if(c1 == L'\0') { // s1 matched s2 up until: if((c2 == L'\0') || // its end (i.e. they're equal length) OR path_is_dir_sepw(c2) || // start of next component OR path_is_dir_sepw(last_c1)) // ", but both have a trailing slash // => is subpath return true; } // mismatch => is not subpath if(c1 != c2) return false; } } // if path is invalid, return a descriptive error code, otherwise INFO::OK. LibError path_validate(const char* path) { // disallow "/", because it would create a second 'root' (with name = ""). // root dir is "". if(path[0] == '/') WARN_RETURN(ERR::PATH_NOT_RELATIVE); // scan each char in path string; count length. int c = 0; // current char; used for .. detection size_t path_len = 0; for(;;) { const int last_c = c; c = path[path_len++]; // whole path is too long if(path_len >= PATH_MAX) WARN_RETURN(ERR::PATH_LENGTH); // disallow: // - ".." (prevent going above the VFS root dir) // - "./" (security hole when mounting and not supported on Windows). // allow "/.", because CVS backup files include it. if(last_c == '.' && (c == '.' || c == '/')) WARN_RETURN(ERR::PATH_NON_CANONICAL); // disallow OS-specific dir separators if(c == '\\' || c == ':') WARN_RETURN(ERR::PATH_NON_PORTABLE); // end of string, no errors encountered if(c == '\0') break; } return INFO::OK; } // if name is invalid, return a descriptive error code, otherwise INFO::OK. // (name is a path component, i.e. that between directory separators) LibError path_component_validate(const char* name) { // disallow empty strings if(*name == '\0') WARN_RETURN(ERR::PATH_EMPTY); for(;;) { const int c = *name++; // disallow *any* dir separators (regardless of which // platform we're on). if(c == '\\' || c == ':' || c == '/') WARN_RETURN(ERR::PATH_COMPONENT_SEPARATOR); // end of string, no errors encountered if(c == '\0') break; } return INFO::OK; } // copy path strings (provided for convenience). void path_copy(char* dst, const char* src) { strcpy_s(dst, PATH_MAX, src); } // combine and into one path, and write to . // if necessary, a directory separator is added between the paths. // each may be empty, filenames, or full paths. // total path length (including '\0') must not exceed PATH_MAX. void path_append(char* dst, const char* path1, const char* path2, size_t flags) { const size_t len1 = strlen(path1); const size_t len2 = strlen(path2); size_t total_len = len1 + len2 + 1; // includes '\0' const bool no_end_slash1 = (len1 == 0 || !path_is_dir_sep(path1[len1-1])); const bool no_end_slash2 = (len2 == 0 || !path_is_dir_sep(path2[len2-1])); // check if we need to add '/' between path1 and path2 // notes: // - the second can't start with '/' (not allowed by path_validate) // - must check len2 as well - if it's empty, we'd end up // inadvertently terminating the string with '/'. bool need_separator = false; if(len2 != 0 && len1 != 0 && no_end_slash1) { total_len++; // for '/' need_separator = true; } // check if trailing slash requested and not already present bool need_terminator = false; if(flags & PATH_APPEND_SLASH && no_end_slash2) { total_len++; // for '/' need_terminator = true; } if(total_len > PATH_MAX) { DEBUG_WARN_ERR(ERR::PATH_LENGTH); *dst = 0; return; } SAFE_STRCPY(dst, path1); dst += len1; if(need_separator) *dst++ = '/'; SAFE_STRCPY(dst, path2); if(need_terminator) SAFE_STRCPY(dst+len2, "/"); } //----------------------------------------------------------------------------- // return pointer to the name component within path (i.e. skips over all // characters up to the last dir separator, if any). const char* path_name_only(const char* path) { const char* slash1 = strrchr(path, '/'); const char* slash2 = strrchr(path, '\\'); // neither present, it's a filename only if(!slash1 && !slash2) return path; // return name, i.e. component after the last portable or platform slash const char* name = std::max(slash1, slash2)+1; if(name[0] != '\0') // else path_component_validate would complain path_component_validate(name); return name; } // if contains a name component, it is stripped away. void path_strip_fn(char* path) { char* name = (char*)path_name_only(path); *name = '\0'; // cut off string here debug_assert(path_IsDirectory(path)); } // return extension of , or "" if there is none. // NOTE: does not include the period; e.g. "a.bmp" yields "bmp". const char* path_extension(const char* fn) { const char* dot = strrchr(fn, '.'); if(!dot) return ""; const char* ext = dot+1; return ext; } Index: ps/trunk/source/lib/file/archive/archive_zip.cpp =================================================================== --- ps/trunk/source/lib/file/archive/archive_zip.cpp (revision 6535) +++ ps/trunk/source/lib/file/archive/archive_zip.cpp (revision 6536) @@ -1,653 +1,653 @@ /** * ========================================================================= * File : archive_zip.cpp * Project : 0 A.D. * Description : archive backend for Zip files. * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "archive_zip.h" #include #include #include "lib/bits.h" #include "lib/byte_order.h" #include "lib/fat_time.h" #include "lib/path_util.h" #include "lib/allocators/pool.h" #include "lib/sysdep/cpu.h" // cpu_memcpy #include "archive.h" #include "codec_zlib.h" #include "stream.h" #include "lib/file/file.h" #include "lib/file/file_system_posix.h" #include "lib/file/io/io.h" #include "lib/file/io/io_align.h" // BLOCK_SIZE #include "lib/file/io/write_buffer.h" static FileSystem_Posix s_fileSystemPosix; //----------------------------------------------------------------------------- // Zip archive definitions //----------------------------------------------------------------------------- static const u32 cdfh_magic = FOURCC_LE('P','K','\1','\2'); static const u32 lfh_magic = FOURCC_LE('P','K','\3','\4'); static const u32 ecdr_magic = FOURCC_LE('P','K','\5','\6'); enum ZipMethod { ZIP_METHOD_NONE = 0, ZIP_METHOD_DEFLATE = 8 }; #pragma pack(push, 1) class LFH { public: void Init(const FileInfo& fileInfo, off_t csize, ZipMethod method, u32 checksum, const VfsPath& pathname_) { const std::string& pathnameString = pathname_.string(); m_magic = lfh_magic; m_x1 = to_le16(0); m_flags = to_le16(0); m_method = to_le16(u16_from_larger(method)); m_fat_mtime = to_le32(FAT_from_time_t(fileInfo.MTime())); m_crc = to_le32(checksum); m_csize = to_le32(u32_from_larger(csize)); m_usize = to_le32(u32_from_larger(fileInfo.Size())); m_fn_len = to_le16(u16_from_larger(pathnameString.length())); m_e_len = to_le16(0); cpu_memcpy((char*)this + sizeof(LFH), pathnameString.c_str(), pathnameString.length()); } size_t Size() const { debug_assert(m_magic == lfh_magic); size_t size = sizeof(LFH); size += read_le16(&m_fn_len); size += read_le16(&m_e_len); // note: LFH doesn't have a comment field! return size; } private: u32 m_magic; u16 m_x1; // version needed u16 m_flags; u16 m_method; u32 m_fat_mtime; // last modified time (DOS FAT format) u32 m_crc; u32 m_csize; u32 m_usize; u16 m_fn_len; u16 m_e_len; }; cassert(sizeof(LFH) == 30); class CDFH { public: void Init(const FileInfo& fileInfo, off_t ofs, off_t csize, ZipMethod method, u32 checksum, const VfsPath& pathname_, size_t slack) { const std::string& pathnameString = pathname_.string(); m_magic = cdfh_magic; m_x1 = to_le32(0); m_flags = to_le16(0); m_method = to_le16(u16_from_larger(method)); m_fat_mtime = to_le32(FAT_from_time_t(fileInfo.MTime())); m_crc = to_le32(checksum); m_csize = to_le32(u32_from_larger(csize)); m_usize = to_le32(u32_from_larger(fileInfo.Size())); m_fn_len = to_le16(u16_from_larger(pathnameString.length())); m_e_len = to_le16(0); m_c_len = to_le16(u16_from_larger((size_t)slack)); m_x2 = to_le32(0); m_x3 = to_le32(0); m_lfh_ofs = to_le32(ofs); cpu_memcpy((char*)this + sizeof(CDFH), pathnameString.c_str(), pathnameString.length()); } void GetPathname(std::string& pathname) const { const size_t length = (size_t)read_le16(&m_fn_len); const char* fn = (const char*)this + sizeof(CDFH); // not 0-terminated! pathname = std::string(fn, length); } off_t HeaderOffset() const { return read_le32(&m_lfh_ofs); } off_t USize() const { return (off_t)read_le32(&m_usize); } off_t CSize() const { return (off_t)read_le32(&m_csize); } ZipMethod Method() const { return (ZipMethod)read_le16(&m_method); } u32 Checksum() const { return read_le32(&m_crc); } time_t MTime() const { const u32 fat_mtime = read_le32(&m_fat_mtime); return time_t_from_FAT(fat_mtime); } size_t Size() const { size_t size = sizeof(CDFH); size += read_le16(&m_fn_len); size += read_le16(&m_e_len); size += read_le16(&m_c_len); return size; } private: u32 m_magic; u32 m_x1; // versions u16 m_flags; u16 m_method; u32 m_fat_mtime; // last modified time (DOS FAT format) u32 m_crc; u32 m_csize; u32 m_usize; u16 m_fn_len; u16 m_e_len; u16 m_c_len; u32 m_x2; // spanning u32 m_x3; // attributes u32 m_lfh_ofs; }; cassert(sizeof(CDFH) == 46); class ECDR { public: void Init(size_t cd_numEntries, off_t cd_ofs, off_t cd_size) { m_magic = ecdr_magic; m_diskNum = to_le16(0); m_cd_diskNum = to_le16(0); m_cd_numEntriesOnDisk = to_le16(u16_from_larger(cd_numEntries)); m_cd_numEntries = m_cd_numEntriesOnDisk; m_cd_size = to_le32(u32_from_larger(cd_size)); m_cd_ofs = to_le32(u32_from_larger(cd_ofs)); m_comment_len = to_le16(0); } void Decompose(size_t& cd_numEntries, off_t& cd_ofs, off_t& cd_size) const { cd_numEntries = (size_t)read_le16(&m_cd_numEntries); cd_ofs = (off_t)read_le32(&m_cd_ofs); cd_size = (off_t)read_le32(&m_cd_size); } private: u32 m_magic; u16 m_diskNum; u16 m_cd_diskNum; u16 m_cd_numEntriesOnDisk; u16 m_cd_numEntries; u32 m_cd_size; u32 m_cd_ofs; u16 m_comment_len; }; cassert(sizeof(ECDR) == 22); #pragma pack(pop) //----------------------------------------------------------------------------- // ArchiveFile_Zip //----------------------------------------------------------------------------- class ArchiveFile_Zip : public IArchiveFile { public: ArchiveFile_Zip(const PIFile& file, off_t ofs, off_t csize, u32 checksum, ZipMethod method) : m_file(file), m_ofs(ofs) , m_csize(csize), m_checksum(checksum), m_method((u16)method) , m_flags(NeedsFixup) { } virtual size_t Precedence() const { return 2u; } virtual char LocationCode() const { return 'A'; } virtual LibError Load(const std::string& UNUSED(name), const shared_ptr& buf, size_t size) const { AdjustOffset(); PICodec codec; switch(m_method) { case ZIP_METHOD_NONE: codec = CreateCodec_ZLibNone(); break; case ZIP_METHOD_DEFLATE: codec = CreateDecompressor_ZLibDeflate(); break; default: WARN_RETURN(ERR::ARCHIVE_UNKNOWN_METHOD); } Stream stream(codec); stream.SetOutputBuffer(buf.get(), size); RETURN_ERR(io_Scan(m_file, m_ofs, m_csize, FeedStream, (uintptr_t)&stream)); RETURN_ERR(stream.Finish()); #if CODEC_COMPUTE_CHECKSUM debug_assert(m_checksum == stream.Checksum()); #endif return INFO::OK; } private: enum Flags { // indicates m_ofs points to a "local file header" instead of // the file data. a fixup routine is called when reading the file; // it skips past the LFH and clears this flag. // this is somewhat of a hack, but vital to archive open performance. // without it, we'd have to scan through the entire archive file, // which can take *seconds*. // (we cannot use the information in CDFH, because its 'extra' field // has been observed to differ from that of the LFH) // since we read the LFH right before the rest of the file, the block // cache will absorb the IO cost. NeedsFixup = 1 }; struct LFH_Copier { u8* lfh_dst; size_t lfh_bytes_remaining; }; // this code grabs an LFH struct from file block(s) that are // passed to the callback. usually, one call copies the whole thing, // but the LFH may straddle a block boundary. // // rationale: this allows using temp buffers for zip_fixup_lfh, // which avoids involving the file buffer manager and thus // avoids cluttering the trace and cache contents. static LibError lfh_copier_cb(uintptr_t cbData, const u8* block, size_t size) { LFH_Copier* p = (LFH_Copier*)cbData; debug_assert(size <= p->lfh_bytes_remaining); cpu_memcpy(p->lfh_dst, block, size); p->lfh_dst += size; p->lfh_bytes_remaining -= size; return INFO::CB_CONTINUE; } /** * fix up m_ofs (adjust it to point to cdata instead of the LFH). * * note: we cannot use CDFH filename and extra field lengths to skip * past LFH since that may not mirror CDFH (has happened). * * this is called at file-open time instead of while mounting to * reduce seeks: since reading the file will typically follow, the * block cache entirely absorbs the IO cost. **/ void AdjustOffset() const { if(!(m_flags & NeedsFixup)) return; m_flags &= ~NeedsFixup; // performance note: this ends up reading one file block, which is // only in the block cache if the file starts in the same block as a // previously read file (i.e. both are small). LFH lfh; LFH_Copier params = { (u8*)&lfh, sizeof(LFH) }; if(io_Scan(m_file, m_ofs, sizeof(LFH), lfh_copier_cb, (uintptr_t)¶ms) == INFO::OK) m_ofs += (off_t)lfh.Size(); } PIFile m_file; // all relevant LFH/CDFH fields not covered by FileInfo mutable off_t m_ofs; off_t m_csize; u32 m_checksum; u16 m_method; mutable u16 m_flags; }; //----------------------------------------------------------------------------- // ArchiveReader_Zip //----------------------------------------------------------------------------- class ArchiveReader_Zip : public IArchiveReader { public: ArchiveReader_Zip(const Path& pathname) : m_file(CreateFile_Posix()) { m_file->Open(pathname, 'r'); FileInfo fileInfo; s_fileSystemPosix.GetFileInfo(pathname, &fileInfo); m_fileSize = fileInfo.Size(); const size_t minFileSize = sizeof(LFH)+sizeof(CDFH)+sizeof(ECDR); debug_assert(m_fileSize >= off_t(minFileSize)); } virtual LibError ReadEntries(ArchiveEntryCallback cb, uintptr_t cbData) { // locate and read Central Directory off_t cd_ofs; size_t cd_numEntries; off_t cd_size; RETURN_ERR(LocateCentralDirectory(m_file, m_fileSize, cd_ofs, cd_numEntries, cd_size)); shared_ptr buf = io_Allocate(cd_size, cd_ofs); u8* cd; RETURN_ERR(io_Read(m_file, cd_ofs, buf.get(), cd_size, cd)); // iterate over Central Directory const u8* pos = cd; for(size_t i = 0; i < cd_numEntries; i++) { // scan for next CDFH CDFH* cdfh = (CDFH*)FindRecord(cd, cd_size, pos, cdfh_magic, sizeof(CDFH)); if(!cdfh) WARN_RETURN(ERR::CORRUPTED); std::string zipPathname; cdfh->GetPathname(zipPathname); const size_t lastSlashOfs = zipPathname.find_last_of('/'); const size_t nameOfs = (lastSlashOfs == std::string::npos)? 0 : lastSlashOfs+1; if(nameOfs != zipPathname.length()) // ignore paths ending in slash (i.e. representing a directory) { const std::string name = zipPathname.substr(nameOfs, zipPathname.length()-nameOfs); FileInfo fileInfo(name, cdfh->USize(), cdfh->MTime()); shared_ptr archiveFile(new ArchiveFile_Zip(m_file, cdfh->HeaderOffset(), cdfh->CSize(), cdfh->Checksum(), cdfh->Method())); cb(zipPathname, fileInfo, archiveFile, cbData); } pos += cdfh->Size(); } return INFO::OK; } private: /** * scan buffer for a Zip file record. * * @param start position within buffer * @param magic signature of record * @param recordSize size of record (including signature) * @return pointer to record within buffer or 0 if not found. **/ static const u8* FindRecord(const u8* buf, size_t size, const u8* start, u32 magic, size_t recordSize) { // (don't use as the counter - otherwise we can't tell if // scanning within the buffer was necessary.) for(const u8* p = start; p <= buf+size-recordSize; p++) { // found it if(*(u32*)p == magic) { debug_assert(p == start); // otherwise, the archive is a bit broken return p; } } // passed EOF, didn't find it. // note: do not warn - this happens in the initial ECDR search at // EOF if the archive contains a comment field. return 0; } // search for ECDR in the last bytes of the file. // if found, fill with a copy of the (little-endian) ECDR and // return INFO::OK, otherwise IO error or ERR::CORRUPTED. static LibError ScanForEcdr(const PIFile& file, off_t fileSize, u8* buf, off_t maxScanSize, size_t& cd_numEntries, off_t& cd_ofs, off_t& cd_size) { // don't scan more than the entire file const off_t scanSize = std::min(maxScanSize, fileSize); // read desired chunk of file into memory const off_t ofs = fileSize - scanSize; u8* data; RETURN_ERR(io_Read(file, ofs, buf, scanSize, data)); // look for ECDR in buffer const ECDR* ecdr = (const ECDR*)FindRecord(data, scanSize, data, ecdr_magic, sizeof(ECDR)); if(!ecdr) return INFO::CANNOT_HANDLE; ecdr->Decompose(cd_numEntries, cd_ofs, cd_size); return INFO::OK; } static LibError LocateCentralDirectory(const PIFile& file, off_t fileSize, off_t& cd_ofs, size_t& cd_numEntries, off_t& cd_size) { const off_t maxScanSize = 66000u; // see below shared_ptr buf = io_Allocate(maxScanSize, BLOCK_SIZE-1); // assume worst-case for alignment // expected case: ECDR at EOF; no file comment LibError ret = ScanForEcdr(file, fileSize, const_cast(buf.get()), sizeof(ECDR), cd_numEntries, cd_ofs, cd_size); if(ret == INFO::OK) return INFO::OK; // worst case: ECDR precedes 64 KiB of file comment ret = ScanForEcdr(file, fileSize, const_cast(buf.get()), maxScanSize, cd_numEntries, cd_ofs, cd_size); if(ret == INFO::OK) return INFO::OK; // both ECDR scans failed - this is not a valid Zip file. RETURN_ERR(io_ReadAligned(file, 0, const_cast(buf.get()), sizeof(LFH))); // the Zip file has an LFH but lacks an ECDR. this can happen if // the user hard-exits while an archive is being written. // notes: // - return ERR::CORRUPTED so VFS will not include this file. // - we could work around this by scanning all LFHs, but won't bother // because it'd be slow. // - do not warn - the corrupt archive will be deleted on next // successful archive builder run anyway. if(FindRecord(buf.get(), sizeof(LFH), buf.get(), lfh_magic, sizeof(LFH))) return ERR::CORRUPTED; // NOWARN // totally bogus else WARN_RETURN(ERR::ARCHIVE_UNKNOWN_FORMAT); } PIFile m_file; off_t m_fileSize; }; PIArchiveReader CreateArchiveReader_Zip(const Path& archivePathname) { return PIArchiveReader(new ArchiveReader_Zip(archivePathname)); } //----------------------------------------------------------------------------- // ArchiveWriter_Zip //----------------------------------------------------------------------------- class ArchiveWriter_Zip : public IArchiveWriter { public: ArchiveWriter_Zip(const Path& archivePathname) : m_file(CreateFile_Posix()), m_fileSize(0) , m_unalignedWriter(new UnalignedWriter(m_file, 0)) , m_numEntries(0) { THROW_ERR(m_file->Open(archivePathname, 'w')); THROW_ERR(pool_create(&m_cdfhPool, 10*MiB, 0)); } ~ArchiveWriter_Zip() { // append an ECDR to the CDFH list (this allows us to // write out both to the archive file in one burst) const off_t cd_size = (off_t)m_cdfhPool.da.pos; ECDR* ecdr = (ECDR*)pool_alloc(&m_cdfhPool, sizeof(ECDR)); if(!ecdr) throw std::bad_alloc(); const off_t cd_ofs = m_fileSize; ecdr->Init(m_numEntries, cd_ofs, cd_size); m_unalignedWriter->Append(m_cdfhPool.da.base, cd_size+sizeof(ECDR)); m_unalignedWriter->Flush(); m_unalignedWriter.reset(); (void)pool_destroy(&m_cdfhPool); const Path pathname = m_file->Pathname(); m_file.reset(); - m_fileSize += cd_size+sizeof(ECDR); + m_fileSize += off_t(cd_size+sizeof(ECDR)); truncate(pathname.external_directory_string().c_str(), m_fileSize); } LibError AddFile(const Path& pathname) { FileInfo fileInfo; RETURN_ERR(s_fileSystemPosix.GetFileInfo(pathname, &fileInfo)); const off_t usize = fileInfo.Size(); // skip 0-length files. // rationale: zip.cpp needs to determine whether a CDFH entry is // a file or directory (the latter are written by some programs but // not needed - they'd only pollute the file table). // it looks like checking for usize=csize=0 is the safest way - // relying on file attributes (which are system-dependent!) is // even less safe. // we thus skip 0-length files to avoid confusing them with directories. if(!usize) return INFO::SKIPPED; PIFile file = CreateFile_Posix(); RETURN_ERR(file->Open(pathname, 'r')); const size_t pathnameLength = pathname.string().length(); const VfsPath vfsPathname(pathname.string()); // choose method and the corresponding codec ZipMethod method; PICodec codec; if(IsFileTypeIncompressible(pathname)) { method = ZIP_METHOD_NONE; codec = CreateCodec_ZLibNone(); } else { method = ZIP_METHOD_DEFLATE; codec = CreateCompressor_ZLibDeflate(); } // allocate memory const size_t csizeMax = codec->MaxOutputSize(usize); shared_ptr buf = io_Allocate(sizeof(LFH) + pathnameLength + csizeMax); // read and compress file contents size_t csize; u32 checksum; { u8* cdata = (u8*)buf.get() + sizeof(LFH) + pathnameLength; Stream stream(codec); stream.SetOutputBuffer(cdata, csizeMax); RETURN_ERR(io_Scan(file, 0, usize, FeedStream, (uintptr_t)&stream)); RETURN_ERR(stream.Finish()); csize = stream.OutSize(); checksum = stream.Checksum(); } // build LFH { LFH* lfh = (LFH*)buf.get(); lfh->Init(fileInfo, (off_t)csize, method, checksum, vfsPathname); } // append a CDFH to the central directory (in memory) const off_t ofs = m_fileSize; const size_t prev_pos = m_cdfhPool.da.pos; // (required to determine padding size) const size_t cdfhSize = sizeof(CDFH) + pathnameLength; CDFH* cdfh = (CDFH*)pool_alloc(&m_cdfhPool, cdfhSize); if(!cdfh) WARN_RETURN(ERR::NO_MEM); const size_t slack = m_cdfhPool.da.pos - prev_pos - cdfhSize; cdfh->Init(fileInfo, ofs, (off_t)csize, method, checksum, vfsPathname, slack); m_numEntries++; // write LFH, pathname and cdata to file const size_t packageSize = sizeof(LFH) + pathnameLength + csize; RETURN_ERR(m_unalignedWriter->Append(buf.get(), (off_t)packageSize)); m_fileSize += (off_t)packageSize; return INFO::OK; } private: static bool IsFileTypeIncompressible(const Path& pathname) { const char* extension = path_extension(pathname.string().c_str()); // file extensions that we don't want to compress static const char* incompressibleExtensions[] = { "zip", "rar", "jpg", "jpeg", "png", "ogg", "mp3" }; for(size_t i = 0; i < ARRAY_SIZE(incompressibleExtensions); i++) { if(!strcasecmp(extension, incompressibleExtensions[i])) return true; } return false; } PIFile m_file; off_t m_fileSize; PUnalignedWriter m_unalignedWriter; Pool m_cdfhPool; size_t m_numEntries; }; PIArchiveWriter CreateArchiveWriter_Zip(const Path& archivePathname) { return PIArchiveWriter(new ArchiveWriter_Zip(archivePathname)); } Index: ps/trunk/source/lib/file/file_system_util.cpp =================================================================== --- ps/trunk/source/lib/file/file_system_util.cpp (revision 6535) +++ ps/trunk/source/lib/file/file_system_util.cpp (revision 6536) @@ -1,142 +1,142 @@ /** * ========================================================================= * File : file_system_util.cpp * Project : 0 A.D. * Description : helper functions for directory access * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "file_system_util.h" #include #include #include "lib/path_util.h" #include "lib/regex.h" LibError fs_GetPathnames(const PIVFS& fs, const VfsPath& path, const char* filter, VfsPaths& pathnames) { std::vector files; RETURN_ERR(fs->GetDirectoryEntries(path, &files, 0)); pathnames.clear(); pathnames.reserve(files.size()); for(size_t i = 0; i < files.size(); i++) { if(match_wildcard(files[i].Name().c_str(), filter)) pathnames.push_back(path/files[i].Name()); } return INFO::OK; } struct FileInfoNameLess : public std::binary_function { bool operator()(const FileInfo& fileInfo1, const FileInfo& fileInfo2) const { return strcasecmp(fileInfo1.Name().c_str(), fileInfo2.Name().c_str()) < 0; } }; void fs_SortFiles(FileInfos& files) { std::sort(files.begin(), files.end(), FileInfoNameLess()); } struct NameLess : public std::binary_function { bool operator()(const std::string& name1, const std::string& name2) const { return strcasecmp(name1.c_str(), name2.c_str()) < 0; } }; void fs_SortDirectories(DirectoryNames& directories) { std::sort(directories.begin(), directories.end(), NameLess()); } -LibError fs_ForEachFile(const PIVFS& fs, const VfsPath& path, FileCallback cb, uintptr_t cbData, const char* pattern, size_t flags) +LibError fs_ForEachFile(const PIVFS& fs, const VfsPath& startPath, FileCallback cb, uintptr_t cbData, const char* pattern, size_t flags) { - debug_assert(vfs_path_IsDirectory(path)); + debug_assert(vfs_path_IsDirectory(startPath)); // (declare here to avoid reallocations) FileInfos files; DirectoryNames subdirectoryNames; // (a FIFO queue is more efficient than recursion because it uses less // stack space and avoids seeks due to breadth-first traversal.) std::queue pendingDirectories; - pendingDirectories.push(path); + pendingDirectories.push(startPath); while(!pendingDirectories.empty()) { const VfsPath& path = pendingDirectories.front(); RETURN_ERR(fs->GetDirectoryEntries(path/"/", &files, &subdirectoryNames)); for(size_t i = 0; i < files.size(); i++) { const FileInfo fileInfo = files[i]; if(!match_wildcard(fileInfo.Name().c_str(), pattern)) continue; const VfsPath pathname(path/fileInfo.Name()); // (FileInfo only stores the name) cb(pathname, fileInfo, cbData); } if(!(flags & DIR_RECURSIVE)) break; for(size_t i = 0; i < subdirectoryNames.size(); i++) pendingDirectories.push(path/subdirectoryNames[i]); pendingDirectories.pop(); } return INFO::OK; } void fs_NextNumberedFilename(const PIVFS& fs, const VfsPath& pathnameFormat, size_t& nextNumber, VfsPath& nextPathname) { // (first call only:) scan directory and set nextNumber according to // highest matching filename found. this avoids filling "holes" in // the number series due to deleted files, which could be confusing. // example: add 1st and 2nd; [exit] delete 1st; [restart] // add 3rd -> without this measure it would get number 1, not 3. if(nextNumber == 0) { const std::string nameFormat = pathnameFormat.leaf(); const VfsPath path = pathnameFormat.branch_path()/"/"; size_t maxNumber = 0; FileInfos files; fs->GetDirectoryEntries(path, &files, 0); for(size_t i = 0; i < files.size(); i++) { size_t number; if(sscanf(files[i].Name().c_str(), nameFormat.c_str(), &number) == 1) maxNumber = std::max(number, maxNumber); } nextNumber = maxNumber+1; } // now increment number until that file doesn't yet exist. // this is fairly slow, but typically only happens once due // to scan loop above. (we still need to provide for looping since // someone may have added files in the meantime) // we don't bother with binary search - this isn't a bottleneck. do { char pathnameBuf[PATH_MAX]; snprintf(pathnameBuf, PATH_MAX, pathnameFormat.string().c_str(), nextNumber++); nextPathname = VfsPath(pathnameBuf); } while(fs->GetFileInfo(nextPathname, 0) == INFO::OK); } Index: ps/trunk/source/lib/file/io/io.cpp =================================================================== --- ps/trunk/source/lib/file/io/io.cpp (revision 6535) +++ ps/trunk/source/lib/file/io/io.cpp (revision 6536) @@ -1,326 +1,327 @@ /** * ========================================================================= * File : io.cpp * Project : 0 A.D. * Description : * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "io.h" #include "lib/allocators/allocators.h" // AllocatorChecker #include "lib/sysdep/cpu.h" // cpu_memcpy #include "lib/file/file.h" #include "lib/file/common/file_stats.h" #include "block_cache.h" #include "io_align.h" static const size_t ioDepth = 8; // the underlying aio implementation likes buffer and offset to be // sector-aligned; if not, the transfer goes through an align buffer, // and requires an extra cpu_memcpy. // // if the user specifies an unaligned buffer, there's not much we can // do - we can't assume the buffer contains padding. therefore, // callers should let us allocate the buffer if possible. // // if ofs misalign = buffer, only the first and last blocks will need // to be copied by aio, since we read up to the next block boundary. // otherwise, everything will have to be copied; at least we split // the read into blocks, so aio's buffer won't have to cover the // whole file. // we don't do any caching or alignment here - this is just a thin // AIO wrapper. rationale: // - aligning the transfer isn't possible here since we have no control // over the buffer, i.e. we cannot read more data than requested. // instead, this is done in io_manager. // - transfer sizes here are arbitrary (i.e. not block-aligned); // that means the cache would have to handle this or also split them up // into blocks, which would duplicate the abovementioned work. // - if caching here, we'd also have to handle "forwarding" (i.e. // desired block has been issued but isn't yet complete). again, it // is easier to let the synchronous io_manager handle this. // - finally, io_manager knows more about whether the block should be cached // (e.g. whether another block request will follow), but we don't // currently make use of this. // // disadvantages: // - streamed data will always be read from disk. that's not a problem, // because such data (e.g. music, long speech) is unlikely to be used // again soon. // - prefetching (issuing the next few blocks from archive/file during // idle time to satisfy potential future IOs) requires extra buffers; // this is a bit more complicated than just using the cache as storage. //----------------------------------------------------------------------------- // allocator //----------------------------------------------------------------------------- #ifndef NDEBUG static AllocatorChecker allocatorChecker; #endif class IoDeleter { public: IoDeleter(size_t paddedSize) : m_paddedSize(paddedSize) { } void operator()(u8* mem) { debug_assert(m_paddedSize != 0); #ifndef NDEBUG allocatorChecker.OnDeallocate(mem, m_paddedSize); #endif page_aligned_free(mem, m_paddedSize); m_paddedSize = 0; } private: size_t m_paddedSize; }; shared_ptr io_Allocate(size_t size, off_t ofs) { debug_assert(size != 0); const size_t paddedSize = PaddedSize((off_t)size, ofs); u8* mem = (u8*)page_aligned_alloc(paddedSize); if(!mem) throw std::bad_alloc(); #ifndef NDEBUG allocatorChecker.OnAllocate(mem, paddedSize); #endif return shared_ptr(mem, IoDeleter(paddedSize)); } //----------------------------------------------------------------------------- // BlockIo //----------------------------------------------------------------------------- class BlockIo { public: LibError Issue(const PIFile& file, off_t alignedOfs, u8* alignedBuf) { m_file = file; m_blockId = BlockId(file->Pathname(), alignedOfs); if(file->Mode() == 'r' && s_blockCache.Retrieve(m_blockId, m_cachedBlock)) { stats_block_cache(CR_HIT); // copy from cache into user buffer if(alignedBuf) { cpu_memcpy(alignedBuf, m_cachedBlock.get(), BLOCK_SIZE); m_alignedBuf = alignedBuf; } // return cached block else { m_alignedBuf = const_cast(m_cachedBlock.get()); } return INFO::OK; } else { stats_block_cache(CR_MISS); stats_io_check_seek(m_blockId); // transfer directly to/from user buffer if(alignedBuf) { m_alignedBuf = alignedBuf; } // transfer into newly allocated temporary block else { m_tempBlock = io_Allocate(BLOCK_SIZE); m_alignedBuf = const_cast(m_tempBlock.get()); } return file->Issue(m_req, alignedOfs, m_alignedBuf, BLOCK_SIZE); } } LibError WaitUntilComplete(const u8*& block, size_t& blockSize) { if(m_cachedBlock) { block = m_alignedBuf; blockSize = BLOCK_SIZE; return INFO::OK; } RETURN_ERR(m_file->WaitUntilComplete(m_req, const_cast(block), blockSize)); if(m_tempBlock) s_blockCache.Add(m_blockId, m_tempBlock); return INFO::OK; } private: static BlockCache s_blockCache; PIFile m_file; BlockId m_blockId; // the address that WaitUntilComplete will return // (cached or temporary block, or user buffer) u8* m_alignedBuf; shared_ptr m_cachedBlock; shared_ptr m_tempBlock; aiocb m_req; }; BlockCache BlockIo::s_blockCache; //----------------------------------------------------------------------------- // IoSplitter //----------------------------------------------------------------------------- -class IoSplitter : noncopyable +class IoSplitter { + NONCOPYABLE(IoSplitter); public: IoSplitter(off_t ofs, u8* alignedBuf, off_t size) : m_ofs(ofs), m_alignedBuf(alignedBuf), m_size(size) , m_totalIssued(0), m_totalTransferred(0) { m_alignedOfs = AlignedOffset(ofs); m_alignedSize = PaddedSize(size, ofs); m_misalignment = ofs - m_alignedOfs; } LibError Run(const PIFile& file, IoCallback cb = 0, uintptr_t cbData = 0) { ScopedIoMonitor monitor; // (issue even if cache hit because blocks must be processed in order) std::deque pendingIos; for(;;) { while(pendingIos.size() < ioDepth && m_totalIssued < m_alignedSize) { pendingIos.push_back(BlockIo()); const off_t alignedOfs = m_alignedOfs + m_totalIssued; u8* const alignedBuf = m_alignedBuf? m_alignedBuf+m_totalIssued : 0; RETURN_ERR(pendingIos.back().Issue(file, alignedOfs, alignedBuf)); m_totalIssued += BLOCK_SIZE; } if(pendingIos.empty()) break; Process(pendingIos.front(), cb, cbData); pendingIos.pop_front(); } debug_assert(m_totalIssued >= m_totalTransferred && m_totalTransferred >= m_size); monitor.NotifyOfSuccess(FI_AIO, file->Mode(), m_totalTransferred); return INFO::OK; } off_t AlignedOfs() const { return m_alignedOfs; } private: LibError Process(BlockIo& blockIo, IoCallback cb, uintptr_t cbData) const { const u8* block; size_t blockSize; RETURN_ERR(blockIo.WaitUntilComplete(block, blockSize)); // first block: skip past alignment if(m_totalTransferred == 0) { block += m_misalignment; blockSize -= m_misalignment; } // last block: don't include trailing padding if(m_totalTransferred + (off_t)blockSize > m_size) blockSize = m_size - m_totalTransferred; m_totalTransferred += (off_t)blockSize; if(cb) { stats_cb_start(); LibError ret = cb(cbData, block, blockSize); stats_cb_finish(); CHECK_ERR(ret); } return INFO::OK; } off_t m_ofs; u8* m_alignedBuf; off_t m_size; size_t m_misalignment; off_t m_alignedOfs; off_t m_alignedSize; // (useful, raw data: possibly compressed, but doesn't count padding) mutable off_t m_totalIssued; mutable off_t m_totalTransferred; }; LibError io_Scan(const PIFile& file, off_t ofs, off_t size, IoCallback cb, uintptr_t cbData) { u8* alignedBuf = 0; // use temporary block buffers IoSplitter splitter(ofs, alignedBuf, size); return splitter.Run(file, cb, cbData); } LibError io_Read(const PIFile& file, off_t ofs, u8* alignedBuf, size_t size, u8*& data) { IoSplitter splitter(ofs, alignedBuf, (off_t)size); RETURN_ERR(splitter.Run(file)); data = alignedBuf + ofs - splitter.AlignedOfs(); return INFO::OK; } LibError io_WriteAligned(const PIFile& file, off_t alignedOfs, const u8* alignedData, size_t size) { debug_assert(IsAligned_Offset(alignedOfs)); debug_assert(IsAligned_Data(alignedData)); IoSplitter splitter(alignedOfs, const_cast(alignedData), (off_t)size); return splitter.Run(file); } LibError io_ReadAligned(const PIFile& file, off_t alignedOfs, u8* alignedBuf, size_t size) { debug_assert(IsAligned_Offset(alignedOfs)); debug_assert(IsAligned_Data(alignedBuf)); IoSplitter splitter(alignedOfs, alignedBuf, (off_t)size); return splitter.Run(file); } Index: ps/trunk/source/lib/file/io/write_buffer.h =================================================================== --- ps/trunk/source/lib/file/io/write_buffer.h (revision 6535) +++ ps/trunk/source/lib/file/io/write_buffer.h (revision 6536) @@ -1,60 +1,61 @@ #ifndef INCLUDED_WRITE_BUFFER #define INCLUDED_WRITE_BUFFER #include "lib/file/file.h" class WriteBuffer { public: WriteBuffer(); void Append(const void* data, size_t size); void Overwrite(const void* data, size_t size, size_t offset); shared_ptr Data() const { return m_data; } size_t Size() const { return m_size; } private: size_t m_capacity; // must come first (init order) shared_ptr m_data; size_t m_size; }; -class UnalignedWriter : public noncopyable +class UnalignedWriter { + NONCOPYABLE(UnalignedWriter); public: UnalignedWriter(const PIFile& file, off_t ofs); ~UnalignedWriter(); /** * add data to the align buffer, writing it out to disk if full. **/ LibError Append(const u8* data, size_t size) const; /** * zero-initialize any remaining space in the align buffer and write * it to the file. this is called by the destructor. **/ void Flush() const; private: LibError WriteBlock() const; PIFile m_file; shared_ptr m_alignedBuf; mutable off_t m_alignedOfs; mutable size_t m_bytesUsed; }; typedef shared_ptr PUnalignedWriter; #endif // #ifndef INCLUDED_WRITE_BUFFER Index: ps/trunk/source/lib/file/io/block_cache.cpp =================================================================== --- ps/trunk/source/lib/file/io/block_cache.cpp (revision 6535) +++ ps/trunk/source/lib/file/io/block_cache.cpp (revision 6536) @@ -1,145 +1,145 @@ /** * ========================================================================= * File : block_cache.cpp * Project : 0 A.D. * Description : cache for aligned I/O m_blocks. * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "block_cache.h" #include "lib/config2.h" // CONFIG2_CACHE_READ_ONLY #include "lib/file/common/file_stats.h" #include "lib/lockfree.h" #include "lib/allocators/pool.h" #include "lib/fnv_hash.h" #include "io_align.h" //----------------------------------------------------------------------------- BlockId::BlockId() : m_id(0) { } BlockId::BlockId(const Path& pathname, off_t ofs) { m_id = fnv_hash64(pathname.string().c_str(), pathname.string().length()); const size_t indexBits = 16; m_id <<= indexBits; - const off_t blockIndex = ofs / BLOCK_SIZE; - debug_assert(blockIndex < off_t(1ul << indexBits)); + const off_t blockIndex = off_t(ofs / BLOCK_SIZE); + debug_assert(blockIndex < off_t(1) << indexBits); m_id |= blockIndex; } bool BlockId::operator==(const BlockId& rhs) const { return m_id == rhs.m_id; } bool BlockId::operator!=(const BlockId& rhs) const { return !operator==(rhs); } //----------------------------------------------------------------------------- struct Block { Block(BlockId id, const shared_ptr& buf) { this->id = id; this->buf = buf; } // block is "valid" and can satisfy Retrieve() requests if a // (non-default-constructed) ID has been assigned. BlockId id; // this block is "in use" if use_count != 1. shared_ptr buf; }; //----------------------------------------------------------------------------- class BlockCache::Impl { public: Impl(size_t numBlocks) : m_maxBlocks(numBlocks) { } void Add(BlockId id, const shared_ptr& buf) { if(m_blocks.size() > m_maxBlocks) { #if CONFIG2_CACHE_READ_ONLY mprotect((void*)m_blocks.front().buf.get(), BLOCK_SIZE, PROT_READ); #endif m_blocks.pop_front(); // evict oldest block } #if CONFIG2_CACHE_READ_ONLY mprotect((void*)buf.get(), BLOCK_SIZE, PROT_WRITE|PROT_READ); #endif m_blocks.push_back(Block(id, buf)); } bool Retrieve(BlockId id, shared_ptr& buf) { // (linear search is ok since we only expect to manage a few blocks) for(size_t i = 0; i < m_blocks.size(); i++) { Block& block = m_blocks[i]; if(block.id == id) { buf = block.buf; return true; } } return false; } void InvalidateAll() { // note: don't check whether any references are held etc. because // this should only be called at the end of the (test) program. m_blocks.clear(); } private: size_t m_maxBlocks; typedef std::deque Blocks; Blocks m_blocks; }; //----------------------------------------------------------------------------- BlockCache::BlockCache(size_t numBlocks) : impl(new Impl(numBlocks)) { } void BlockCache::Add(BlockId id, const shared_ptr& buf) { impl->Add(id, buf); } bool BlockCache::Retrieve(BlockId id, shared_ptr& buf) { return impl->Retrieve(id, buf); } void BlockCache::InvalidateAll() { return impl->InvalidateAll(); } Index: ps/trunk/source/lib/file/common/trace.cpp =================================================================== --- ps/trunk/source/lib/file/common/trace.cpp (revision 6535) +++ ps/trunk/source/lib/file/common/trace.cpp (revision 6536) @@ -1,194 +1,194 @@ /** * ========================================================================= * File : trace.cpp * Project : 0 A.D. * Description : IO event recording * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "trace.h" #include "lib/allocators/pool.h" #include "lib/timer.h" // timer_Time /*virtual*/ ITrace::~ITrace() { } //----------------------------------------------------------------------------- TraceEntry::TraceEntry(EAction action, const char* pathname, size_t size) -: m_timestamp(timer_Time()) +: m_timestamp((float)timer_Time()) , m_action(action) , m_pathname(strdup(pathname)) , m_size(size) { } TraceEntry::TraceEntry(const char* text) { const char* fmt = "%f: %c \"" STRINGIZE(PATH_MAX) "[^\"]\" %d\n"; char pathname[PATH_MAX]; char action; const int fieldsRead = sscanf_s(text, fmt, &m_timestamp, &action, pathname, &m_size); debug_assert(fieldsRead == 4); debug_assert(action == 'L' || action == 'S'); m_action = (EAction)action; m_pathname = strdup(pathname); } TraceEntry::~TraceEntry() { SAFE_FREE(m_pathname); } void TraceEntry::EncodeAsText(char* text, size_t maxTextChars) const { - const char action = m_action; + const char action = (char)m_action; sprintf_s(text, maxTextChars, "%#010f: %c \"%s\" %d\n", m_timestamp, action, m_pathname, m_size); } //----------------------------------------------------------------------------- class Trace_Dummy : public ITrace { public: Trace_Dummy(size_t UNUSED(maxSize)) { } virtual void NotifyLoad(const char* UNUSED(pathname), size_t UNUSED(size)) { } virtual void NotifyStore(const char* UNUSED(pathname), size_t UNUSED(size)) { } virtual LibError Load(const char* UNUSED(pathname)) { return INFO::OK; } virtual LibError Store(const char* UNUSED(pathname)) const { return INFO::OK; } virtual const TraceEntry* Entries() const { return 0; } virtual size_t NumEntries() const { return 0; } }; //----------------------------------------------------------------------------- class Trace : public ITrace { public: Trace(size_t maxSize) { (void)pool_create(&m_pool, maxSize, sizeof(TraceEntry)); } virtual ~Trace() { TraceEntry* entries = (TraceEntry*)m_pool.da.base; for(TraceEntry* entry = entries; entry < entries+NumEntries(); entry++) entry->~TraceEntry(); (void)pool_destroy(&m_pool); } virtual void NotifyLoad(const char* pathname, size_t size) { new(Allocate()) TraceEntry(TraceEntry::Load, pathname, size); } virtual void NotifyStore(const char* pathname, size_t size) { new(Allocate()) TraceEntry(TraceEntry::Store, pathname, size); } virtual LibError Load(const char* osPathname) { pool_free_all(&m_pool); errno = 0; FILE* file = fopen(osPathname, "rt"); if(!file) return LibError_from_errno(); for(;;) { char text[500]; if(!fgets(text, ARRAY_SIZE(text)-1, file)) break; new(Allocate()) TraceEntry(text); } fclose(file); return INFO::OK; } virtual LibError Store(const char* osPathname) const { errno = 0; FILE* file = fopen(osPathname, "at"); if(!file) return LibError_from_errno(); for(size_t i = 0; i < NumEntries(); i++) { char text[500]; Entries()[i].EncodeAsText(text, ARRAY_SIZE(text)); fputs(text, file); } (void)fclose(file); return INFO::OK; } virtual const TraceEntry* Entries() const { return (const TraceEntry*)m_pool.da.base; } virtual size_t NumEntries() const { return m_pool.da.pos / sizeof(TraceEntry); } private: void* Allocate() { void* p = pool_alloc(&m_pool, 0); debug_assert(p); return p; } Pool m_pool; }; PITrace CreateDummyTrace(size_t maxSize) { return PITrace(new Trace_Dummy(maxSize)); } PITrace CreateTrace(size_t maxSize) { return PITrace(new Trace(maxSize)); } Index: ps/trunk/source/lib/file/common/trace.h =================================================================== --- ps/trunk/source/lib/file/common/trace.h (revision 6535) +++ ps/trunk/source/lib/file/common/trace.h (revision 6536) @@ -1,111 +1,111 @@ /** * ========================================================================= * File : trace.h * Project : 0 A.D. * Description : IO event recording * ========================================================================= */ // license: GPL; see lib/license.txt // traces are useful for determining the optimal ordering of archived files // and can also serve as a repeatable IO benchmark. // note: since FileContents are smart pointers, the trace can't easily // be notified when they are released (relevant for cache simulation). // we have to assume that users process one file at a time -- as they // should. #ifndef INCLUDED_TRACE #define INCLUDED_TRACE // stores information about an IO event. class TraceEntry { public: enum EAction { Load = 'L', - Store = 'S', + Store = 'S' }; TraceEntry(EAction action, const char* pathname, size_t size); TraceEntry(const char* textualRepresentation); ~TraceEntry(); EAction Action() const { return m_action; } const char* Pathname() const { return m_pathname; } size_t Size() const { return m_size; } void EncodeAsText(char* text, size_t maxTextChars) const; private: // note: keep an eye on the class size because all instances are kept // in memory (see ITrace) // time (as returned by timer_Time) after the operation completes. // rationale: when loading, the VFS doesn't know file size until // querying the cache or retrieving file information. float m_timestamp; EAction m_action; const char* m_pathname; // size of file. // rationale: other applications using this trace format might not // have access to the VFS and its file information. size_t m_size; }; // note: to avoid interfering with measurements, this trace container // does not cause any IOs (except of course in Load/Store) struct ITrace { virtual ~ITrace(); virtual void NotifyLoad(const char* pathname, size_t size) = 0; virtual void NotifyStore(const char* pathname, size_t size) = 0; /** * store all entries into a file. * * @param osPathname native (absolute) pathname * * note: the file format is text-based to allow human inspection and * because storing filename strings in a binary format would be a * bit awkward. **/ virtual LibError Store(const char* osPathname) const = 0; /** * load entries from file. * * @param osPathname native (absolute) pathname * * replaces any existing entries. **/ virtual LibError Load(const char* osPathname) = 0; virtual const TraceEntry* Entries() const = 0; virtual size_t NumEntries() const = 0; }; typedef shared_ptr PITrace; extern PITrace CreateDummyTrace(size_t maxSize); extern PITrace CreateTrace(size_t maxSize); #endif // #ifndef INCLUDED_TRACE Index: ps/trunk/source/lib/file/vfs/vfs_populate.cpp =================================================================== --- ps/trunk/source/lib/file/vfs/vfs_populate.cpp (revision 6535) +++ ps/trunk/source/lib/file/vfs/vfs_populate.cpp (revision 6536) @@ -1,138 +1,139 @@ /** * ========================================================================= * File : vfs_populate.cpp * Project : 0 A.D. * Description : populate VFS directories with files * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "vfs_populate.h" #include "lib/path_util.h" #include "lib/file/file_system_posix.h" #include "lib/file/archive/archive_zip.h" #include "vfs_tree.h" #include "vfs_lookup.h" #include "vfs.h" // error codes static FileSystem_Posix s_fileSystemPosix; static std::vector s_looseFiles; static size_t s_numArchivedFiles; // helper class that allows breaking up the logic into sub-functions without // always having to pass directory/realDirectory as parameters. -class PopulateHelper : noncopyable +class PopulateHelper { + NONCOPYABLE(PopulateHelper); public: PopulateHelper(VfsDirectory* directory, const PRealDirectory& realDirectory) : m_directory(directory), m_realDirectory(realDirectory) { } LibError AddEntries() const { FileInfos files; files.reserve(100); DirectoryNames subdirectoryNames; subdirectoryNames.reserve(20); RETURN_ERR(s_fileSystemPosix.GetDirectoryEntries(m_realDirectory->GetPath(), &files, &subdirectoryNames)); RETURN_ERR(AddFiles(files)); AddSubdirectories(subdirectoryNames); return INFO::OK; } private: void AddFile(const FileInfo& fileInfo) const { const VfsFile file(fileInfo.Name(), fileInfo.Size(), fileInfo.MTime(), m_realDirectory->Priority(), m_realDirectory); const VfsFile* pfile = m_directory->AddFile(file); // notify archive builder that this file could be archived but // currently isn't; if there are too many of these, archive will // be rebuilt. // note: check if archivable to exclude stuff like screenshots // from counting towards the threshold. if(m_realDirectory->Flags() & VFS_MOUNT_ARCHIVABLE) s_looseFiles.push_back(pfile); } static void AddArchiveFile(const VfsPath& pathname, const FileInfo& fileInfo, PIArchiveFile archiveFile, uintptr_t cbData) { PopulateHelper* this_ = (PopulateHelper*)cbData; // (we have to create missing subdirectoryNames because archivers // don't always place directory entries before their files) const size_t flags = VFS_LOOKUP_ADD; VfsDirectory* directory; WARN_ERR(vfs_Lookup(pathname, this_->m_directory, directory, 0, flags)); const VfsFile file(fileInfo.Name(), fileInfo.Size(), fileInfo.MTime(), this_->m_realDirectory->Priority(), archiveFile); directory->AddFile(file); s_numArchivedFiles++; } LibError AddFiles(const FileInfos& files) const { const Path path(m_realDirectory->GetPath()); for(size_t i = 0; i < files.size(); i++) { const std::string& name = files[i].Name(); const char* extension = path_extension(name.c_str()); if(strcasecmp(extension, "zip") == 0) { PIArchiveReader archiveReader = CreateArchiveReader_Zip(path/name); RETURN_ERR(archiveReader->ReadEntries(AddArchiveFile, (uintptr_t)this)); } else // regular (non-archive) file AddFile(files[i]); } return INFO::OK; } void AddSubdirectories(const DirectoryNames& subdirectoryNames) const { for(size_t i = 0; i < subdirectoryNames.size(); i++) { // skip version control directories - this avoids cluttering the // VFS with hundreds of irrelevant files. if(strcasecmp(subdirectoryNames[i].c_str(), ".svn") == 0) continue; VfsDirectory* subdirectory = m_directory->AddSubdirectory(subdirectoryNames[i]); PRealDirectory realDirectory = CreateRealSubdirectory(m_realDirectory, subdirectoryNames[i]); vfs_Attach(subdirectory, realDirectory); } } VfsDirectory* const m_directory; PRealDirectory m_realDirectory; }; LibError vfs_Populate(VfsDirectory* directory) { if(!directory->ShouldPopulate()) return INFO::OK; const PRealDirectory& realDirectory = directory->AssociatedDirectory(); if(realDirectory->Flags() & VFS_MOUNT_WATCH) realDirectory->Watch(); PopulateHelper helper(directory, realDirectory); RETURN_ERR(helper.AddEntries()); return INFO::OK; } LibError vfs_Attach(VfsDirectory* directory, const PRealDirectory& realDirectory) { RETURN_ERR(vfs_Populate(directory)); directory->SetAssociatedDirectory(realDirectory); return INFO::OK; } Index: ps/trunk/source/lib/lib_errors.h =================================================================== --- ps/trunk/source/lib/lib_errors.h (revision 6535) +++ ps/trunk/source/lib/lib_errors.h (revision 6536) @@ -1,441 +1,441 @@ /** * ========================================================================= * File : lib_errors.h * Project : 0 A.D. * Description : error handling system: defines error codes, associates * : them with descriptive text, simplifies error notification. * ========================================================================= */ // license: GPL; see lib/license.txt /** Error handling system Introduction ------------ This module defines error codes, translates them to/from other systems (e.g. errno), provides several macros that simplify returning errors / checking if a function failed, and associates codes with descriptive text. Why Error Codes? ---------------- To convey information about what failed, the alternatives are unique integral codes and direct pointers to descriptive text. Both occupy the same amount of space, but codes are easier to internationalize. Method of Propagating Errors ---------------------------- When a low-level function has failed, this must be conveyed to the higher-level application logic across several functions on the call stack. There are two alternatives: 1) check at each call site whether a function failed; if so, return to the caller. 2) throw an exception. We will discuss the advantages and disadvantages of exceptions, which mirror those of call site checking. - performance: they shouldn't be used in time-critical code. - predictability: exceptions can come up almost anywhere, so it is hard to say what execution path will be taken. - interoperability: not compatible with other languages. + readability: cleans up code by separating application logic and error handling. however, this is also a disadvantage because it may be difficult to see at a glance if a piece of code does error checking at all. + visibility: errors are more likely to be seen than relying on callers to check return codes; less reliant on discipline. Both have their place. Our recommendation is to throw error code exceptions when checking call sites and propagating errors becomes tedious. However, inter-module boundaries should always return error codes for interoperability with other languages. Simplifying Call-Site Checking ------------------------------ As mentioned above, this approach requires discipline. We provide macros to simplify this task: function calls can be wrapped in an "enforcer" that checks whether they succeeded and can take action (e.g. returning to caller or warning user) as appropriate. Consider the following example: LibError ret = doWork(); if(ret != INFO::OK) { warnUser(ret); return ret; } This can be replaced by: CHECK_ERR(doWork()); This provides a visible sign that the code handles errors, automatically propagates errors back to the caller, and most importantly, allows warning the user whenever an error occurs. Thus, no errors can be swept under the carpet by failing to check return value or catch(...) all exceptions. When to warn the user? ---------------------- When a function fails, there are 2 places we can raise a warning: as soon as the error condition is known, or in the higher-level caller. The former is the WARN_RETURN(ERR::FAIL) approach, while the latter corresponds to the example above. We prefer the former because it is easier to ensure that all possible return paths have been covered: search for all "return ERR::*" that are not followed by a "// NOWARN" comment. Also, the latter approach raises the question of where exactly to issue the warning. Clearly API-level routines must raise the warning, but sometimes they will want to call each other. Multiple warnings along the call stack ensuing from the same root cause are not nice. Note the special case of "validator" functions that e.g. verify the state of an object: we now discuss pros/cons of just returning errors without warning, and having their callers take care of that. + they typically have many return paths (-> increased code size) - this is balanced by validators that have many call sites. - we want all return statements wrapped for consistency and easily checking if any were forgotten - adding // NOWARN to each validator return statement would be tedious. - there is no advantage to checking at the call site; call stack indicates which caller of the validator failed anyway. Validator functions should therefore also use WARN_RETURN. Numbering Scheme ---------------- Each module header defines its own error codes to avoid a full rebuild whenever a new obscure code is added. Error codes start at -100000 (warnings are positive, but reserves a negative value; absolute values are unique). This avoids collisions with all known error code schemes. Each header gets 100 possible values; the tens value may be used to denote groups within that header. The subsystem is denoted by the ten-thousands digit: 1 for file, 2 for other resources (e.g. textures), 3 for sysdep, .. To summarize: +/-1SHHCC (S=subsystem, HH=header, CC=code number) Notes: - file is called lib_errors.h because 0ad has another errors.cpp and the MS linker isn't smart enough to deal with object files of the same name but in different paths. **/ #ifndef INCLUDED_LIB_ERRORS #define INCLUDED_LIB_ERRORS // note: this loses compiler type safety (being able to prevent // return 1 when a LibError is the return value), but allows splitting // up the error namespace into separate headers. // Lint's 'strong type' checking can be used to find errors. typedef long LibError; // opaque - do not access its fields! // note: must be defined here because clients instantiate them; // fields cannot be made private due to POD requirement. struct LibErrorAssociation { LibError err; // must remain valid until end of program. const char* description; // POSIX errno, or -1 int errno_equivalent; LibErrorAssociation* next; }; /** * associate a LibError with a description and errno equivalent. * @return dummy integer to allow calling via static initializer. **/ extern int error_AddAssociation(LibErrorAssociation*); // associate a LibError with a description and errno equivalent. // Invoke this at file or function scope. #define ERROR_ASSOCIATE(err, description, errno_equivalent)\ static LibErrorAssociation UID__ = { err, description, errno_equivalent };\ - static int UID2__ = error_AddAssociation(&UID__); + static int UID2__ = error_AddAssociation(&UID__) /** * generate textual description of an error code. * * @param err LibError to be translated. if despite type checking we * get an invalid enum value, the string will be something like * "Unknown error (65536, 0x10000)". * @param buf destination buffer * @param max_chars size of buffer [characters] * @return buf (allows using this function in expressions) **/ extern char* error_description_r(LibError err, char* buf, size_t max_chars); //----------------------------------------------------------------------------- // conversion to/from other error code definitions. // note: other conversion routines (e.g. to/from Win32) are implemented in // the corresponding modules to keep this header portable. /** * translate errno to LibError. * * should only be called directly after a POSIX function indicates failure; * errno may otherwise still be set from another error cause. * * @param warn_if_failed if set, raise a warning when returning an error * (i.e. ERR::*, but not INFO::OK). this avoids having to wrap all * call sites in WARN_ERR etc. * @return LibError equivalent of errno, or ERR::FAIL if there's no equal. **/ extern LibError LibError_from_errno(bool warn_if_failed = true); /** * translate a POSIX function's return/error indication to LibError. * * you should set errno to 0 before calling the POSIX function to * make sure we do not return any stale errors. typical usage: * errno = 0; * int ret = posix_func(..); * return LibError_from_posix(ret); * * @param ret return value of a POSIX function: 0 indicates success, * -1 is error. * @param warn_if_failed if set, raise a warning when returning an error * (i.e. ERR::*, but not INFO::OK). this avoids having to wrap all * call sites in WARN_ERR etc. * @return INFO::OK if the POSIX function succeeded, else the LibError * equivalent of errno, or ERR::FAIL if there's no equal. **/ extern LibError LibError_from_posix(int ret, bool warn_if_failed = true); /** * set errno to the equivalent of a LibError. * * used in wposix - underlying functions return LibError but must be * translated to errno at e.g. the mmap interface level. higher-level code * that calls mmap will in turn convert back to LibError. * * @param err error code to set **/ extern void LibError_set_errno(LibError err); //----------------------------------------------------------------------------- // be careful here. the given expression (e.g. variable or // function return value) may be a Handle (=i64), so it needs to be // stored and compared as such. (very large but legitimate Handle values // casted to int can end up negative) // all functions using this return LibError (instead of i64) for // efficiency and simplicity. if the input was negative, it is an // error code and is therefore known to fit; we still mask with // UINT_MAX to avoid VC cast-to-smaller-type warnings. // if expression evaluates to a negative error code, warn user and // return the number. #define CHECK_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ DEBUG_WARN_ERR(err__);\ return (LibError)(err__ & ULONG_MAX);\ }\ ) // just pass on errors without any kind of annoying warning // (useful for functions that can legitimately fail). #define RETURN_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ return err__;\ }\ ) // return an error and warn about it (replaces debug_warn+return) #define WARN_RETURN(err)\ STMT(\ DEBUG_WARN_ERR(err);\ return err;\ ) // if expression evaluates to a negative error code, warn user and // throw that number. #define THROW_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ DEBUG_WARN_ERR(err__);\ throw err__;\ }\ ) // if expression evaluates to a negative error code, warn user and just return // (useful for void functions that must bail and complain) #define WARN_ERR_RETURN(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ DEBUG_WARN_ERR(err__);\ return;\ }\ ) // if expression evaluates to a negative error code, warn user // (this is similar to debug_assert but also works in release mode) #define WARN_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ DEBUG_WARN_ERR(err__);\ }\ ) // if expression evaluates to a negative error code, return 0. #define RETURN0_IF_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ return 0;\ ) // if ok evaluates to false or FALSE, warn user and return -1. #define WARN_RETURN_IF_FALSE(ok)\ STMT(\ if(!(ok))\ {\ debug_warn("FYI: WARN_RETURN_IF_FALSE reports that a function failed."\ "feel free to ignore or suppress this warning.");\ return ERR::FAIL;\ }\ ) // if ok evaluates to false or FALSE, return -1. #define RETURN_IF_FALSE(ok)\ STMT(\ if(!(ok))\ return ERR::FAIL;\ ) // if ok evaluates to false or FALSE, warn user. #define WARN_IF_FALSE(ok)\ STMT(\ if(!(ok))\ debug_warn("FYI: WARN_IF_FALSE reports that a function failed."\ "feel free to ignore or suppress this warning.");\ ) //----------------------------------------------------------------------------- namespace INFO { const LibError OK = 0; // note: these values are > 100 to allow multiplexing them with // coroutine return values, which return completion percentage. // function is a callback and indicates that it can (but need not // necessarily) be called again. const LibError CB_CONTINUE = +100000; // notify caller that nothing was done. const LibError SKIPPED = +100001; // function is incapable of doing the requested task with the given inputs. // this implies SKIPPED, but also conveys a bit more information. const LibError CANNOT_HANDLE = +100002; // function is meant to be called repeatedly, and now indicates that // all jobs are complete. const LibError ALL_COMPLETE = +100003; // (returned e.g. when inserting into container) const LibError ALREADY_EXISTS = +100004; } namespace ERR { const LibError FAIL = -1; // general const LibError LOGIC = -100010; const LibError TIMED_OUT = -100011; const LibError REENTERED = -100012; const LibError CORRUPTED = -100013; const LibError ASSERTION_FAILED = -100014; // function arguments const LibError INVALID_PARAM = -100020; const LibError INVALID_HANDLE = -100021; const LibError BUF_SIZE = -100022; // system limitations const LibError AGAIN = -100030; const LibError LIMIT = -100031; const LibError NO_SYS = -100032; const LibError NOT_IMPLEMENTED = -100033; const LibError NOT_SUPPORTED = -100034; const LibError NO_MEM = -100035; // these are for cases where we just want a distinct value to display and // a symbolic name + string would be overkill (e.g. the various // test cases in a validate() call). they are shared between multiple // functions; when something fails, the stack trace will show in which // one it was => these errors are unambiguous. // there are 3 tiers - 1..9 are used in most functions, 11..19 are // used in a function that calls another validator and 21..29 are // for for functions that call 2 other validators (this avoids // ambiguity as to which error actually happened where) const LibError _1 = -100101; const LibError _2 = -100102; const LibError _3 = -100103; const LibError _4 = -100104; const LibError _5 = -100105; const LibError _6 = -100106; const LibError _7 = -100107; const LibError _8 = -100108; const LibError _9 = -100109; const LibError _11 = -100111; const LibError _12 = -100112; const LibError _13 = -100113; const LibError _14 = -100114; const LibError _15 = -100115; const LibError _16 = -100116; const LibError _17 = -100117; const LibError _18 = -100118; const LibError _19 = -100119; const LibError _21 = -100121; const LibError _22 = -100122; const LibError _23 = -100123; const LibError _24 = -100124; const LibError _25 = -100125; const LibError _26 = -100126; const LibError _27 = -100127; const LibError _28 = -100128; const LibError _29 = -100129; } // namespace ERR #endif // #ifndef INCLUDED_LIB_ERRORS Index: ps/trunk/source/lib/precompiled.h =================================================================== --- ps/trunk/source/lib/precompiled.h (revision 6535) +++ ps/trunk/source/lib/precompiled.h (revision 6536) @@ -1,176 +1,179 @@ /** * ========================================================================= * File : precompiled.h * Project : 0 A.D. * Description : precompiled header. must be the first non-comment part * : of every source file (VC6..8 requirement). * ========================================================================= */ // license: GPL; see lib/license.txt #define _SECURE_SCL 0 #include "lib/sysdep/compiler.h" // MSC_VERSION, HAVE_PCH // disable some common and annoying warnings // (done as soon as possible so that headers below are covered) #if MSC_VERSION // .. temporarily disabled W4 (unimportant but ought to be fixed) # pragma warning(disable:4201) // nameless struct (Matrix3D) # pragma warning(disable:4244) // conversion from uintN to uint8 // .. permanently disabled W4 # pragma warning(disable:4127) // conditional expression is constant; rationale: see STMT in lib.h. # pragma warning(disable:4996) // function is deprecated # pragma warning(disable:4786) // identifier truncated to 255 chars # pragma warning(disable:4351) // yes, default init of array entries is desired // .. disabled only for the precompiled headers # pragma warning(disable:4702) // unreachable code (frequent in STL) // .. VS2005 code analysis (very frequent ones) # if MSC_VERSION >= 1400 # pragma warning(disable:6011) // dereferencing NULL pointer # pragma warning(disable:6246) // local declaration hides declaration of the same name in outer scope # endif # if ICC_VERSION +# pragma warning(disable:383) // value copied to temporary, reference to temporary used +# pragma warning(disable:981) // operands are evaluted in unspecified order +# pragma warning(disable:1418) // external function definition with no prior declaration (raised for all non-static function templates) +# pragma warning(disable:1572) // floating-point equality and inequality comparisons are unreliable # pragma warning(disable:1786) // function is deprecated (disabling 4996 isn't sufficient) # pragma warning(disable:1684) // conversion from pointer to same-sized integral type # endif #endif // // headers made available everywhere for convenience // #include "lib/sysdep/os.h" #include "lib/sysdep/stl.h" #include "lib/sysdep/arch.h" #include "lib/lib_api.h" #include "lib/types.h" #include "lib/lib.h" #include "lib/lib_errors.h" #include "lib/secure_crt.h" #include "lib/debug.h" #include "lib/code_annotation.h" // Boost // .. if this package isn't going to be statically linked, we're better off // using Boost via DLL. (otherwise, we would have to ensure the exact same // compiler is used, which is a pain because MSC8, MSC9 and ICC 10 are in use) #ifndef LIB_STATIC_LINK # define BOOST_ALL_DYN_LINK #endif -#include // noncopyable // the following boost libraries have been included in TR1 and are // thus deemed usable: #include using boost::shared_ptr; #include using boost::array; #include using boost::mem_fn; #include using boost::function; #include using boost::bind; #include "lib/external_libraries/boost_filesystem.h" // (this must come after boost and common lib headers) #include "lib/posix/posix.h" // // precompiled headers // // if PCHs are supported and enabled, we make an effort to include all // system headers. otherwise, only a few central headers (e.g. types) // are pulled in and source files must include all the system headers // they use. this policy ensures good compile performance whether or not // PCHs are being used. #include "lib/config.h" // CONFIG_ENABLE_PCH #include "lib/sysdep/compiler.h" // HAVE_PCH #if CONFIG_ENABLE_PCH && HAVE_PCH // anything placed here won't need to be compiled in each translation unit, // but will cause a complete rebuild if they change. // all new-form C library headers #include #include #include #include //#include // defines e.g. "and" to "&". unnecessary and causes trouble with asm. #include #include #include //#include // incompatible with libpng on Debian/Ubuntu #include #include #include #include #include #include #include #include #include // all C++98 STL headers #include #include #include #include #include #include #include #include #include #include #include #include #include // all other C++98 headers #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // STL extensions #if GCC_VERSION >= 402 // (see comment in stl.h about GCC versions) # include # include #elif GCC_VERSION # include # include #else # include # include #endif #endif // #if CONFIG_PCH // restore temporarily-disabled warnings #if MSC_VERSION # pragma warning(default:4702) #endif Index: ps/trunk/source/lib/sysdep/os/win/winit.h =================================================================== --- ps/trunk/source/lib/sysdep/os/win/winit.h (revision 6535) +++ ps/trunk/source/lib/sysdep/os/win/winit.h (revision 6536) @@ -1,156 +1,156 @@ /** * ========================================================================= * File : winit.h * Project : 0 A.D. * Description : windows-specific module init and shutdown mechanism * ========================================================================= */ // license: GPL; see lib/license.txt #ifndef INCLUDED_WINIT #define INCLUDED_WINIT /* Overview -------- This facility allows registering init and shutdown functions with only one line of code and zero runtime overhead. It provides for dependencies between modules, allowing groups of functions to run before others. Details ------- Participating modules store function pointer(s) to their init and/or shutdown function in a specific COFF section. The sections are grouped according to the desired notification and the order in which functions are to be called (useful if one module depends on another). They are then gathered by the linker and arranged in alphabetical order. Placeholder variables in the sections indicate where the series of functions begins and ends for a given notification time. At runtime, all of the function pointers between the markers are invoked. Example ------- (at file scope:) WINIT_REGISTER_MAIN_INIT(InitCallback); Rationale --------- Several methods of module init are possible: (see Large Scale C++ Design) - on-demand initialization: each exported function would have to check if init already happened. that would be brittle and hard to verify. - singleton: variant of the above, but not applicable to a procedural interface (and quite ugly to boot). - registration: static constructors call a central notification function. module dependencies would be quite difficult to express - this would require a graph or separate lists for each priority (clunky). worse, a fatal flaw is that other C++ constructors may depend on the modules we are initializing and already have run. there is no way to influence ctor call order between separate source files, so this is out of the question. - linker-based registration: same as above, but the linker takes care of assembling various functions into one sorted table. the list of init functions is available before C++ ctors have run. incidentally, zero runtime overhead is incurred. unfortunately, this approach is MSVC-specific. however, the MS CRT uses a similar method for its init, so this is expected to remain supported. */ //----------------------------------------------------------------------------- // section declarations // section names are of the format ".WINIT${type}{group}". // {type} is I for initialization- or S for shutdown functions. // {group} is [0, 9] - see below. // note: __declspec(allocate) requires declaring segments in advance via // #pragma section. #pragma section(".WINIT$I$", read) #pragma section(".WINIT$I0", read) #pragma section(".WINIT$I1", read) #pragma section(".WINIT$I2", read) #pragma section(".WINIT$I6", read) #pragma section(".WINIT$I7", read) #pragma section(".WINIT$IZ", read) #pragma section(".WINIT$S$", read) #pragma section(".WINIT$S0", read) #pragma section(".WINIT$S1", read) #pragma section(".WINIT$S6", read) #pragma section(".WINIT$S7", read) #pragma section(".WINIT$S8", read) #pragma section(".WINIT$SZ", read) #pragma comment(linker, "/merge:.WINIT=.rdata") //----------------------------------------------------------------------------- // Function groups // to allow correct ordering of module init in the face of dependencies, // we introduce 'groups'. all functions in one are called before those in // the next higher group, but order within the group is undefined. // (this is because the linker sorts sections alphabetically but doesn't // specify the order in which object files are processed.) // these macros register a function to be called at the given time. // usage: invoke at file scope, passing a function identifier/symbol. // rationale: // - __declspec(allocate) requires section declarations, but allows users to // write only one line (instead of needing an additional #pragma data_seg) // - fixed groups instead of passing a group number are more clear and // encourage thinking about init order. (__declspec(allocate) requires // a single string literal anyway and doesn't support string merging) // - why EXTERN_C and __pragma? VC8's link-stage optimizer believes // the static function pointers defined by WINIT_REGISTER_* to be unused; // unless action is taken, they would be removed. to prevent this, we // forcibly include the function pointer symbols. this means the variable // must be extern, not static. the linker needs to know the decorated // symbol name, so we disable mangling via EXTERN_C. // very early init; must not fail, since error handling code *crashes* // if called before these have completed. -#define WINIT_REGISTER_CRITICAL_INIT(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I0")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) +#define WINIT_REGISTER_CRITICAL_INIT(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I0")) LibError (*p##func)(void) = func // meant for modules with dependents but whose init is complicated and may // raise error/warning messages (=> can't go in WINIT_REGISTER_CRITICAL_INIT) -#define WINIT_REGISTER_EARLY_INIT(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I1")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) +#define WINIT_REGISTER_EARLY_INIT(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I1")) LibError (*p##func)(void) = func // available for dependents of WINIT_REGISTER_EARLY_INIT-modules that // must still come before WINIT_REGISTER_MAIN_INIT. -#define WINIT_REGISTER_EARLY_INIT2(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I2")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) +#define WINIT_REGISTER_EARLY_INIT2(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I2")) LibError (*p##func)(void) = func // most modules will go here unless they are often used or // have many dependents. -#define WINIT_REGISTER_MAIN_INIT(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I6")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) +#define WINIT_REGISTER_MAIN_INIT(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I6")) LibError (*p##func)(void) = func // available for any modules that may need to come after // WINIT_REGISTER_MAIN_INIT (unlikely) -#define WINIT_REGISTER_LATE_INIT(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I7")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) +#define WINIT_REGISTER_LATE_INIT(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$I7")) LibError (*p##func)(void) = func -#define WINIT_REGISTER_EARLY_SHUTDOWN(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S0")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) -#define WINIT_REGISTER_EARLY_SHUTDOWN2(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S1")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) -#define WINIT_REGISTER_MAIN_SHUTDOWN(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S6")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) -#define WINIT_REGISTER_LATE_SHUTDOWN(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S7")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) -#define WINIT_REGISTER_LATE_SHUTDOWN2(func) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S8")) LibError (*p##func)(void) = func; __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) +#define WINIT_REGISTER_EARLY_SHUTDOWN(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S0")) LibError (*p##func)(void) = func +#define WINIT_REGISTER_EARLY_SHUTDOWN2(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S1")) LibError (*p##func)(void) = func +#define WINIT_REGISTER_MAIN_SHUTDOWN(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S6")) LibError (*p##func)(void) = func +#define WINIT_REGISTER_LATE_SHUTDOWN(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S7")) LibError (*p##func)(void) = func +#define WINIT_REGISTER_LATE_SHUTDOWN2(func) __pragma(comment(linker, "/include:" STRINGIZE(DECORATED_NAME(p##func)))) static LibError func(void); EXTERN_C __declspec(allocate(".WINIT$S8")) LibError (*p##func)(void) = func //----------------------------------------------------------------------------- /** * call each registered function. * * if this is called before CRT initialization, callbacks must not use any * non-stateless CRT functions such as atexit. see wstartup.h for the * current status on this issue. **/ extern void winit_CallInitFunctions(); extern void winit_CallShutdownFunctions(); #endif // #ifndef INCLUDED_WINIT Index: ps/trunk/source/lib/sysdep/os/win/wposix/wfilesystem.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wposix/wfilesystem.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/os/win/wposix/wfilesystem.cpp (revision 6536) @@ -1,368 +1,368 @@ #include "precompiled.h" #include "wfilesystem.h" #include "lib/allocators/allocators.h" // single_calloc #include "wposix_internal.h" #include "wtime_internal.h" // wtime_utc_filetime_to_time_t #include "crt_posix.h" // _rmdir, _access // // determine file system type on the current drive - // needed to work around incorrect FAT time translation. // static enum Filesystem { FS_INVALID, // detect_filesystem() not yet called FS_FAT, // FAT12, FAT16, or FAT32 FS_NTFS, // (most common) FS_UNKNOWN // newer FS we don't know about } filesystem; // rationale: the previous method of checking every path was way too slow // (taking ~800ms total during init). instead, we only determine the FS once. // this is quite a bit easier than intercepting chdir() calls and/or // caching FS type per drive letter, but not foolproof. // // if some data files are on a different volume that is set up as FAT, // the workaround below won't be triggered (=> timestamps may be off by // 1 hour when DST is in effect). oh well, that is not a supported. // // the common case (everything is on a single NTFS volume) is more important // and must run without penalty. // called from the first filetime_to_time_t() call, not win.cpp init; // this means we can rely on the current directory having been set to // the app's directory (and therefore its appendant volume - see above). static void detect_filesystem() { char root_path[MAX_PATH] = "c:\\"; // default in case GCD fails DWORD gcd_ret = GetCurrentDirectory(sizeof(root_path), root_path); debug_assert(gcd_ret != 0); // if this fails, no problem - we have the default from above. root_path[3] = '\0'; // cut off after "c:\" char fs_name[32] = {0}; BOOL ret = GetVolumeInformation(root_path, 0,0,0,0,0, fs_name, sizeof(fs_name)); fs_name[ARRAY_SIZE(fs_name)-1] = '\0'; debug_assert(ret != 0); // if this fails, no problem - we really only care if fs is FAT, // and will assume that's not the case (since fs_name != "FAT"). filesystem = FS_UNKNOWN; if(!strncmp(fs_name, "FAT", 3)) // e.g. FAT32 filesystem = FS_FAT; else if(!strcmp(fs_name, "NTFS")) filesystem = FS_NTFS; } // convert local FILETIME (includes timezone bias and possibly DST bias) // to seconds-since-1970 UTC. // // note: splitting into month, year etc. is inefficient, // but much easier than determining whether ft lies in DST, // and ourselves adding the appropriate bias. // // called for FAT file times; see wposix filetime_to_time_t. time_t time_t_from_local_filetime(FILETIME* ft) { SYSTEMTIME st; FileTimeToSystemTime(ft, &st); struct tm t; t.tm_sec = st.wSecond; t.tm_min = st.wMinute; t.tm_hour = st.wHour; t.tm_mday = st.wDay; t.tm_mon = st.wMonth-1; t.tm_year = st.wYear-1900; t.tm_isdst = -1; // let the CRT determine whether this local time // falls under DST by the US rules. return mktime(&t); } // convert Windows FILETIME to POSIX time_t (seconds-since-1970 UTC); // used by stat and readdir_stat_np for st_mtime. // // works around a documented Windows bug in converting FAT file times // (correct results are desired since VFS mount logic considers // files 'equal' if their mtime and size are the same). static time_t filetime_to_time_t(FILETIME* ft) { ONCE(detect_filesystem()); // the FAT file system stores local file times, while // NTFS records UTC. Windows does convert automatically, // but uses the current DST settings. (boo!) // we go back to local time, and convert properly. if(filesystem == FS_FAT) { FILETIME local_ft; FileTimeToLocalFileTime(ft, &local_ft); return time_t_from_local_filetime(&local_ft); } return wtime_utc_filetime_to_time_t(ft); } /* // currently only sets st_mode (file or dir) and st_size. int stat(const char* fn, struct stat* s) { memset(s, 0, sizeof(struct stat)); WIN32_FILE_ATTRIBUTE_DATA fad; if(!GetFileAttributesEx(fn, GetFileExInfoStandard, &fad)) return -1; s->st_mtime = filetime_to_time_t(fad.ftLastAccessTime) // dir if(fad.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) s->st_mode = S_IFDIR; else { s->st_mode = S_IFREG; s->st_size = (off_t)((((u64)fad.nFileSizeHigh) << 32) | fad.nFileSizeLow); } return 0; } */ int access(const char* path, int mode) { return _access(path, mode); } -#if !HAVE_MKDIR +#ifndef HAVE_MKDIR int mkdir(const char* path, mode_t UNUSED(mode)) { if(!CreateDirectory(path, (LPSECURITY_ATTRIBUTES)NULL)) { return -1; } return 0; } -#endif // #if !HAVE_MKDIR +#endif int rmdir(const char* path) { return _rmdir(path); } //----------------------------------------------------------------------------- // readdir //----------------------------------------------------------------------------- // note: we avoid opening directories or returning entries that have // hidden or system attributes set. this is to prevent returning something // like "\System Volume Information", which raises an error upon opening. // 0-initialized by wdir_alloc for safety; this is required for // num_entries_scanned. struct WDIR { HANDLE hFind; // the dirent returned by readdir. // note: having only one global instance is not possible because // multiple independent opendir/readdir sequences must be supported. struct dirent ent; WIN32_FIND_DATA fd; // since opendir calls FindFirstFile, we need a means of telling the // first call to readdir that we already have a file. // that's the case iff this is == 0; we use a counter rather than a // flag because that allows keeping statistics. int num_entries_scanned; }; // suballocator - satisfies most requests with a reusable static instance, // thus speeding up allocation and avoiding heap fragmentation. // thread-safe. static WDIR global_wdir; static uintptr_t global_wdir_is_in_use; // zero-initializes the WDIR (code below relies on this) static inline WDIR* wdir_alloc() { return (WDIR*)single_calloc(&global_wdir, &global_wdir_is_in_use, sizeof(WDIR)); } static inline void wdir_free(WDIR* d) { single_free(&global_wdir, &global_wdir_is_in_use, d); } static const DWORD hs = FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM; // make sure path exists and is a normal (according to attributes) directory. static bool is_normal_dir(const char* path) { const DWORD fa = GetFileAttributes(path); // path not found if(fa == INVALID_FILE_ATTRIBUTES) return false; // not a directory if((fa & FILE_ATTRIBUTE_DIRECTORY) == 0) return false; // hidden or system attribute(s) set // this check is now disabled because wsnd's add_oal_dlls_in_dir // needs to open the Windows system directory, which sometimes has // these attributes set. //if((fa & hs) != 0) // return false; return true; } DIR* opendir(const char* path) { if(!is_normal_dir(path)) { errno = ENOENT; return 0; } WDIR* d = wdir_alloc(); if(!d) { errno = ENOMEM; return 0; } // build search path for FindFirstFile. note: "path\\dir" only returns // information about that directory; trailing slashes aren't allowed. // for dir entries to be returned, we have to append "\\*". char search_path[PATH_MAX]; snprintf(search_path, ARRAY_SIZE(search_path), "%s\\*", path); // note: we could store search_path and defer FindFirstFile until // readdir. this way is a bit more complex but required for // correctness (we must return a valid DIR iff is valid). d->hFind = FindFirstFileA(search_path, &d->fd); if(d->hFind == INVALID_HANDLE_VALUE) { // not an error - the directory is just empty. if(GetLastError() == ERROR_NO_MORE_FILES) return d; // translate Win32 error to errno. LibError err = LibError_from_win32(FALSE); LibError_set_errno(err); // release the WDIR allocated above. // unfortunately there's no way around this; we need to allocate // d before FindFirstFile because it uses d->fd. copying from a // temporary isn't nice either (this free doesn't happen often) wdir_free(d); return 0; } return d; } struct dirent* readdir(DIR* d_) { WDIR* const d = (WDIR*)d_; // avoid polluting the last error. DWORD prev_err = GetLastError(); // first call - skip FindNextFile (see opendir). if(d->num_entries_scanned == 0) { // this directory is empty. if(d->hFind == INVALID_HANDLE_VALUE) return 0; goto already_have_file; } // until end of directory or a valid entry was found: for(;;) { if(!FindNextFileA(d->hFind, &d->fd)) goto fail; already_have_file: d->num_entries_scanned++; // not a hidden or system entry -> it's valid. if((d->fd.dwFileAttributes & hs) == 0) break; } // this entry has passed all checks; return information about it. // (note: d_name is a pointer; see struct dirent definition) d->ent.d_name = d->fd.cFileName; return &d->ent; fail: // FindNextFile failed; determine why and bail. // .. legit, end of dir reached. don't pollute last error code. if(GetLastError() == ERROR_NO_MORE_FILES) SetLastError(prev_err); else debug_assert(0); // readdir: FindNextFile failed return 0; } // return status for the dirent returned by the last successful // readdir call from the given directory stream. // currently sets st_size, st_mode, and st_mtime; the rest are zeroed. // non-portable, but considerably faster than stat(). used by dir_ForEachSortedEntry. int readdir_stat_np(DIR* d_, struct stat* s) { WDIR* d = (WDIR*)d_; memset(s, 0, sizeof(*s)); s->st_size = (off_t)u64_from_u32(d->fd.nFileSizeHigh, d->fd.nFileSizeLow); - s->st_mode = (d->fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)? S_IFDIR : S_IFREG; + s->st_mode = (unsigned short)((d->fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)? S_IFDIR : S_IFREG); s->st_mtime = filetime_to_time_t(&d->fd.ftLastWriteTime); return 0; } int closedir(DIR* d_) { WDIR* const d = (WDIR*)d_; FindClose(d->hFind); wdir_free(d); return 0; } //----------------------------------------------------------------------------- char* realpath(const char* fn, char* path) { if(!GetFullPathName(fn, PATH_MAX, path, 0)) return 0; return path; } Index: ps/trunk/source/lib/sysdep/os/win/wsdl.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wsdl.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/os/win/wsdl.cpp (revision 6536) @@ -1,1263 +1,1265 @@ /** * ========================================================================= * File : wsdl.cpp * Project : 0 A.D. * Description : emulate SDL on Windows. * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "lib/external_libraries/sdl.h" #if CONFIG2_WSDL #include #include #include #include #include "win.h" #include // _beginthreadex #include // message crackers #include "lib/posix/posix_pthread.h" #include "lib/module_init.h" #include "wutil.h" #include "winit.h" #include "lib/sysdep/os/win/wmi.h" // for SDL_GetVideoInfo #if MSC_VERSION #pragma comment(lib, "user32.lib") #pragma comment(lib, "gdi32.lib") #endif #include "lib/ogl.h" // needed to pull in the delay-loaded opengl32.dll WINIT_REGISTER_LATE_INIT(wsdl_Init); WINIT_REGISTER_EARLY_SHUTDOWN(wsdl_Shutdown); // in fullscreen mode, i.e. not windowed. // video mode will be restored when app is deactivated. static bool fullscreen; // the app is shutting down. // if set, ignore further Windows messages for clean shutdown. static bool is_quitting; static HWND g_hWnd = (HWND)INVALID_HANDLE_VALUE; static HDC g_hDC = (HDC)INVALID_HANDLE_VALUE; // needed by gamma code //---------------------------------------------------------------------------- // gamma class GammaRamp { public: GammaRamp() : m_hasChanged(false) { } bool Change(float gamma_r, float gamma_g, float gamma_b) { // get current ramp (once) so we can later restore it. if(!m_hasChanged) { debug_assert(wutil_IsValidHandle(g_hDC)); if(!GetDeviceGammaRamp(g_hDC, m_original)) return false; } Compute(gamma_r, m_changed+0*256); Compute(gamma_g, m_changed+1*256); Compute(gamma_b, m_changed+2*256); if(!Upload(m_changed)) return false; m_hasChanged = true; return true; } void Latch() { if(m_hasChanged) Upload(m_changed); } void RestoreOriginal() { if(m_hasChanged) Upload(m_original); } private: static void Compute(float gamma, u16* ramp) { // assume identity if invalid if(gamma <= 0.0f) gamma = 1.0f; // identity: special-case to make sure we get exact values if(gamma == 1.0f) { for(u16 i = 0; i < 256; i++) - ramp[i] = (i << 8); + ramp[i] = u16(i << 8); return; } for(int i = 0; i < 256; i++) { const double val = pow(i/255.0, (double)gamma); const double clamped = std::max(0.0, std::min(val, 1.0-DBL_EPSILON)); ramp[i] = u16_from_double(clamped); } debug_assert(ramp[0] == 0); debug_assert(ramp[255] == 0xFFFF); } bool Upload(u16* ramps) { WinScopedPreserveLastError s; SetLastError(0); debug_assert(wutil_IsValidHandle(g_hDC)); const BOOL ok = SetDeviceGammaRamp(g_hDC, ramps); debug_assert(ok); return !!ok; } bool m_hasChanged; // values are 8.8 fixed point u16 m_original[3*256]; u16 m_changed[3*256]; }; static GammaRamp gammaRamp; // note: any component gamma = 0 is assumed to be identity. int SDL_SetGamma(float r, float g, float b) { return gammaRamp.Change(r, g, b)? 0 : -1; } //---------------------------------------------------------------------------- // video //---------------------------------------------------------------------------- static DEVMODE dm; // current video mode static HGLRC hGLRC = (HGLRC)INVALID_HANDLE_VALUE; static int depth_bits = 24; // depth buffer size; set via SDL_GL_SetAttribute // check if resolution needs to be changed static bool video_need_change(int w, int h, int cur_w, int cur_h, bool fullscreen) { // invalid: keep current settings if(w <= 0 || h <= 0) return false; // higher resolution mode needed if(w > cur_w || h > cur_h) return true; // fullscreen requested and not exact same mode set if(fullscreen && (w != cur_w || h != cur_h)) return true; return false; } static inline void video_enter_game_mode() { ShowWindow(g_hWnd, SW_RESTORE); ChangeDisplaySettings(&dm, CDS_FULLSCREEN); } static inline void video_leave_game_mode() { ChangeDisplaySettings(0, 0); ShowWindow(g_hWnd, SW_MINIMIZE); } int SDL_GL_SetAttribute(SDL_GLattr attr, int value) { if(attr == SDL_GL_DEPTH_SIZE) depth_bits = value; return 0; } static LRESULT CALLBACK wndproc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam); static HWND wsdl_CreateWindow(int w, int h) { // (create new window every time (instead of once at startup), because // pixel format isn't supposed to be changed more than once) // app instance. // returned by GetModuleHandle and used in kbd hook and window creation. const HINSTANCE hInst = GetModuleHandle(0); // register window class WNDCLASS wc; memset(&wc, 0, sizeof(wc)); wc.style = 0; wc.lpfnWndProc = wndproc; wc.lpszClassName = "WSDL"; wc.hInstance = hInst; ATOM class_atom = RegisterClass(&wc); if(!class_atom) { debug_assert(0); // SDL_SetVideoMode: RegisterClass failed return 0; } DWORD windowStyle = fullscreen? WS_POPUP : WS_POPUPWINDOW|WS_CAPTION|WS_MINIMIZEBOX; windowStyle |= WS_VISIBLE; windowStyle |= WS_CLIPCHILDREN|WS_CLIPSIBLINGS; // MSDN SetPixelFormat says this is required // Calculate the size of the outer window, so that the client area has // the desired dimensions. RECT r; r.left = r.top = 0; r.right = w; r.bottom = h; if (AdjustWindowRectEx(&r, windowStyle, FALSE, 0)) { w = r.right - r.left; h = r.bottom - r.top; } // note: you can override the hardcoded window name via SDL_WM_SetCaption. return CreateWindowEx(WS_EX_APPWINDOW, (LPCSTR)(uintptr_t)class_atom, "wsdl", windowStyle, 0, 0, w, h, 0, 0, hInst, 0); } static void SetPixelFormat(HDC g_hDC, int bpp) { const DWORD dwFlags = PFD_SUPPORT_OPENGL|PFD_DRAW_TO_WINDOW|PFD_DOUBLEBUFFER; BYTE cColourBits = (BYTE)bpp; BYTE cAlphaBits = 0; if(bpp == 32) { cColourBits = 24; cAlphaBits = 8; } const BYTE cAccumBits = 0; const BYTE cDepthBits = (BYTE)depth_bits; const BYTE cStencilBits = 0; const BYTE cAuxBuffers = 0; PIXELFORMATDESCRIPTOR pfd = { sizeof(PIXELFORMATDESCRIPTOR), 1, // version dwFlags, PFD_TYPE_RGBA, cColourBits, 0, 0, 0, 0, 0, 0, // c*Bits, c*Shift are unused cAlphaBits, 0, // cAlphaShift is unused cAccumBits, 0, 0, 0, 0, // cAccum*Bits are unused cDepthBits, cStencilBits, cAuxBuffers, PFD_MAIN_PLANE, 0, 0, 0, 0 // bReserved, dw*Mask are unused }; // note: the GDI pixel format functions require opengl32.dll to be loaded. // a deadlock on the next line is probably due to VLD's LdrLoadDll hook. const int pf = ChoosePixelFormat(g_hDC, &pfd); debug_assert(pf >= 1); WARN_IF_FALSE(SetPixelFormat(g_hDC, pf, &pfd)); } // set video mode wxh:bpp if necessary. // w = h = bpp = 0 => no change. int SDL_SetVideoMode(int w, int h, int bpp, unsigned long flags) { WinScopedPreserveLastError s; // OpenGL and GDI fullscreen = (flags & SDL_FULLSCREEN) != 0; // get current mode settings memset(&dm, 0, sizeof(dm)); dm.dmSize = sizeof(dm); EnumDisplaySettings(0, ENUM_CURRENT_SETTINGS, &dm); const int cur_w = (int)dm.dmPelsWidth, cur_h = (int)dm.dmPelsHeight; // independent of resolution; app must always get bpp it wants dm.dmBitsPerPel = bpp; dm.dmFields = DM_BITSPERPEL; if(video_need_change(w,h, cur_w,cur_h, fullscreen)) { dm.dmPelsWidth = (DWORD)w; dm.dmPelsHeight = (DWORD)h; dm.dmFields |= DM_PELSWIDTH|DM_PELSHEIGHT; } // the (possibly changed) mode will be (re)set at next WM_ACTIVATE g_hWnd = wsdl_CreateWindow(w, h); if(!wutil_IsValidHandle(g_hWnd)) return 0; g_hDC = GetDC(g_hWnd); SetPixelFormat(g_hDC, bpp); hGLRC = wglCreateContext(g_hDC); if(!hGLRC) return 0; if(!wglMakeCurrent(g_hDC, hGLRC)) return 0; return 1; } static void video_shutdown() { if(fullscreen) { LONG status = ChangeDisplaySettings(0, 0); debug_assert(status == DISP_CHANGE_SUCCESSFUL); } if(hGLRC != INVALID_HANDLE_VALUE) { WARN_IF_FALSE(wglMakeCurrent(0, 0)); WARN_IF_FALSE(wglDeleteContext(hGLRC)); hGLRC = (HGLRC)INVALID_HANDLE_VALUE; } } void SDL_GL_SwapBuffers() { SwapBuffers(g_hDC); } SDL_VideoInfo* SDL_GetVideoInfo() { static SDL_VideoInfo video_info; if(video_info.video_mem == 0) { WmiMap videoAdapter; wmi_GetClass("Win32_VideoController", videoAdapter); VARIANT vTotalMemory = videoAdapter[L"AdapterRAM"]; video_info.video_mem = vTotalMemory.lVal; } return &video_info; } SDL_Surface* SDL_GetVideoSurface() { return 0; } //---------------------------------------------------------------------------- // event queue // note: we only use winit to redirect stdout; this queue won't be used // before _cinit. typedef std::queue Queue; static Queue queue; static void queue_event(const SDL_Event& ev) { debug_assert(!is_quitting); queue.push(ev); } static bool dequeue_event(SDL_Event* ev) { debug_assert(!is_quitting); if(queue.empty()) return false; *ev = queue.front(); queue.pop(); return true; } //---------------------------------------------------------------------------- // app activation enum SdlActivationType { LOSE = 0, GAIN = 1 }; static inline void queue_active_event(SdlActivationType type, size_t changed_app_state) { // SDL says this event is not generated when the window is created, // but skipping the first event may confuse things. SDL_Event ev; ev.type = SDL_ACTIVEEVENT; ev.active.state = (u8)changed_app_state; - ev.active.gain = (type == GAIN)? 1 : 0; + ev.active.gain = (u8)((type == GAIN)? 1 : 0); queue_event(ev); } // SDL_APP* bitflags indicating whether we are active. // note: responsibility for yielding lies with SDL apps - // they control the main loop. static Uint8 app_state; -static void active_change_state(SdlActivationType type, size_t changed_app_state) +static void active_change_state(SdlActivationType type, Uint8 changed_app_state) { - size_t old_app_state = app_state; + Uint8 old_app_state = app_state; if(type == GAIN) app_state |= changed_app_state; else app_state &= ~changed_app_state; // generate an event - but only if the given state flags actually changed. if((old_app_state & changed_app_state) != (app_state & changed_app_state)) queue_active_event(type, changed_app_state); } static void reset_all_keys(); static LRESULT OnActivate(HWND hWnd, UINT state, HWND UNUSED(hWndActDeact), BOOL fMinimized) { SdlActivationType type; - size_t changed_app_state; + Uint8 changed_app_state; // went active and not minimized if(state != WA_INACTIVE && !fMinimized) { type = GAIN; changed_app_state = SDL_APPINPUTFOCUS|SDL_APPACTIVE; // grab keyboard focus (we previously had DefWindowProc do this). SetFocus(hWnd); gammaRamp.Latch(); if(fullscreen) video_enter_game_mode(); } // deactivated (Alt+Tab out) or minimized else { type = LOSE; changed_app_state = SDL_APPINPUTFOCUS; if(fMinimized) changed_app_state |= SDL_APPACTIVE; reset_all_keys(); gammaRamp.RestoreOriginal(); if(fullscreen) video_leave_game_mode(); } active_change_state(type, changed_app_state); return 0; } Uint8 SDL_GetAppState() { return app_state; } static void queue_quit_event() { SDL_Event ev; ev.type = SDL_QUIT; queue_event(ev); } //---------------------------------------------------------------------------- // keyboard // note: keysym.unicode is only returned for SDL_KEYDOWN, and is otherwise 0. static void queue_key_event(Uint8 type, SDLKey sdlk, WCHAR unicode_char) { SDL_Event ev; ev.type = type; ev.key.keysym.sym = sdlk; ev.key.keysym.unicode = (Uint16)unicode_char; queue_event(ev); } static Uint8 keys[SDLK_LAST]; // winuser.h promises VK_0..9 and VK_A..Z etc. match ASCII value. #define VK_0 '0' #define VK_A 'A' static void init_vkmap(SDLKey (&VK_keymap)[256]) { int i; // Map the VK keysyms for ( i=0; i 0) { for(int i = 0; i < output_count; i++) queue_key_event(SDL_KEYDOWN, sdlk, wchars[i]); } // dead-char; do nothing else if(output_count == -1) { } // translation failed; just generate a regular (non-unicode) event else if(output_count == 0) queue_key_event(SDL_KEYDOWN, sdlk, 0); else UNREACHABLE; } return 0; } Uint8* SDL_GetKeyState(int* num_keys) { if(num_keys) *num_keys = SDLK_LAST; return keys; } // always on (we don't care about the extra overhead) int SDL_EnableUNICODE(int UNUSED(enable)) { return 1; } //---------------------------------------------------------------------------- // mouse // background: there are several types of coordinates. // - screen coords are relative to the primary desktop and may therefore be // negative on multi-monitor systems (e.g. if secondary monitor is left of // primary). they are prefixed with screen_*. // - "client" coords are simply relative to the parent window's origin and // can also be negative (e.g. in the window's NC area). // these are prefixed with client_*. // - "idealized" coords are what the app sees. these range from 0 to // windowDimensions-1. they are returned by GetCoords and have no prefix. static void queue_mouse_event(int x, int y) { SDL_Event ev; ev.type = SDL_MOUSEMOTION; debug_assert(unsigned(x|y) <= USHRT_MAX); ev.motion.x = (Uint16)x; ev.motion.y = (Uint16)y; queue_event(ev); } static void queue_button_event(int button, int state, int x, int y) { SDL_Event ev; ev.type = (state == SDL_PRESSED)? SDL_MOUSEBUTTONDOWN : SDL_MOUSEBUTTONUP; ev.button.button = (u8)button; ev.button.state = (u8)state; debug_assert(unsigned(x|y) <= USHRT_MAX); ev.button.x = (Uint16)x; ev.button.y = (Uint16)y; queue_event(ev); } static int mouse_x, mouse_y; // generate a mouse move message and update our notion of the mouse position. // x, y are client pixel coordinates. // notes: // - does not actually move the OS cursor; // - called from mouse_update and SDL_WarpMouse. static void mouse_moved(int x, int y) { // nothing to do if it hasn't changed since last time if(mouse_x == x && mouse_y == y) return; mouse_x = x; mouse_y = y; queue_mouse_event(x, y); } static POINT ScreenFromClient(int client_x, int client_y) { POINT screen_pt; screen_pt.x = (LONG)client_x; screen_pt.y = (LONG)client_y; WARN_IF_FALSE(ClientToScreen(g_hWnd, &screen_pt)); return screen_pt; } // get idealized client coordinates or return false if outside our window. static bool GetCoords(int screen_x, int screen_y, int& x, int& y) { debug_assert(wutil_IsValidHandle(g_hWnd)); POINT screen_pt; screen_pt.x = (LONG)screen_x; screen_pt.y = (LONG)screen_y; POINT client_pt; { // note: MapWindowPoints has a really stupid interface, returning 0 // on failure or if no shift was needed (i.e. window is fullscreen). // we must use GetLastError to detect error conditions. WinScopedPreserveLastError s; SetLastError(0); client_pt = screen_pt; // translated below const int ret = MapWindowPoints(HWND_DESKTOP, g_hWnd, &client_pt, 1); debug_assert(ret != 0 || GetLastError() == 0); } { RECT client_rect; WARN_IF_FALSE(GetClientRect(g_hWnd, &client_rect)); if(!PtInRect(&client_rect, client_pt)) return false; } if(WindowFromPoint(screen_pt) != g_hWnd) return false; x = client_pt.x; y = client_pt.y; debug_assert(x >= 0 && y >= 0); return true; } static void mouse_update() { // window not created yet or already shut down. no sense reporting // mouse position, and bail now to avoid ScreenToClient failing. if(!wutil_IsValidHandle(g_hWnd)) return; // don't use DirectInput, because we want to respect the user's mouse // sensitivity settings. Windows messages are laggy, so query current // position directly. POINT screen_pt; WARN_IF_FALSE(GetCursorPos(&screen_pt)); int x, y; if(GetCoords(screen_pt.x, screen_pt.y, x, y)) { active_change_state(GAIN, SDL_APPMOUSEFOCUS); mouse_moved(x, y); } // moved outside of window else active_change_state(LOSE, SDL_APPMOUSEFOCUS); } static size_t mouse_buttons; // (we define a new function signature since the windowsx.h message crackers // don't provide for passing uMsg) static LRESULT OnMouseButton(HWND UNUSED(hWnd), UINT uMsg, int client_x, int client_y, UINT UNUSED(flags)) { int button; int state; switch(uMsg) { case WM_LBUTTONDOWN: button = SDL_BUTTON_LEFT; state = SDL_PRESSED; break; case WM_LBUTTONUP: button = SDL_BUTTON_LEFT; state = SDL_RELEASED; break; case WM_RBUTTONDOWN: button = SDL_BUTTON_RIGHT; state = SDL_PRESSED; break; case WM_RBUTTONUP: button = SDL_BUTTON_RIGHT; state = SDL_RELEASED; break; case WM_MBUTTONDOWN: button = SDL_BUTTON_MIDDLE; state = SDL_PRESSED; break; case WM_MBUTTONUP: button = SDL_BUTTON_MIDDLE; state = SDL_RELEASED; break; NODEFAULT; } // mouse capture static int outstanding_press_events; if(state == SDL_PRESSED) { // grab mouse to ensure we get up events if(++outstanding_press_events > 0) SetCapture(g_hWnd); } else { // release after all up events received if(--outstanding_press_events <= 0) { ReleaseCapture(); outstanding_press_events = 0; } } // update button bitfield if(state == SDL_PRESSED) mouse_buttons |= SDL_BUTTON(button); else mouse_buttons &= ~SDL_BUTTON(button); const POINT screen_pt = ScreenFromClient(client_x, client_y); int x, y; if(GetCoords(screen_pt.x, screen_pt.y, x, y)) queue_button_event(button, state, x, y); return 0; } // (note: this message is sent even if the cursor is outside our window) static LRESULT OnMouseWheel(HWND UNUSED(hWnd), int screen_x, int screen_y, int zDelta, UINT UNUSED(fwKeys)) { int x, y; if(GetCoords(screen_x, screen_y, x, y)) { int button = (zDelta < 0)? SDL_BUTTON_WHEELDOWN : SDL_BUTTON_WHEELUP; // SDL says this sends a down message followed by up. queue_button_event(button, SDL_PRESSED, x, y); queue_button_event(button, SDL_RELEASED, x, y); } return 0; // handled } Uint8 SDL_GetMouseState(int* x, int* y) { if(x) *x = (int)mouse_x; if(y) *y = (int)mouse_y; return (Uint8)mouse_buttons; } inline void SDL_WarpMouse(int x, int y) { // SDL interface provides for int, but the values should be // idealized client coords (>= 0) debug_assert(x >= 0 && y >= 0); mouse_moved(x, y); const int client_x = x, client_y = y; const POINT screen_pt = ScreenFromClient(client_x, client_y); WARN_IF_FALSE(SetCursorPos(screen_pt.x, screen_pt.y)); } int SDL_ShowCursor(int toggle) { static int cursor_visible = SDL_ENABLE; if(toggle != SDL_QUERY) { // only call Windows ShowCursor if changing the visibility - // it maintains a counter. if(cursor_visible != toggle) { ShowCursor(toggle); cursor_visible = toggle; } } return cursor_visible; } //---------------------------------------------------------------------------- static LRESULT OnDestroy(HWND hWnd) { debug_assert(hWnd == g_hWnd); WARN_IF_FALSE(ReleaseDC(g_hWnd, g_hDC)); g_hDC = (HDC)INVALID_HANDLE_VALUE; g_hWnd = (HWND)INVALID_HANDLE_VALUE; queue_quit_event(); PostQuitMessage(0); +#ifdef _DEBUG // see http://www.adrianmccarthy.com/blog/?p=51 // with WM_QUIT in the message queue, MessageBox will immediately // return IDABORT. to ensure any subsequent CRT error reports are // at least somewhat visible, we redirect them to debug output. _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_DEBUG); +#endif return 0; } static LRESULT CALLBACK wndproc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { if(is_quitting) return DefWindowProc(hWnd, uMsg, wParam, lParam); switch(uMsg) { case WM_PAINT: PAINTSTRUCT ps; BeginPaint(hWnd, &ps); EndPaint(hWnd, &ps); return 0; case WM_ERASEBKGND: // this indicates we allegedly erased the background; // PAINTSTRUCT.fErase is then FALSE. return 1; // prevent selecting menu in fullscreen mode case WM_NCHITTEST: if(fullscreen) return HTCLIENT; break; HANDLE_MSG(hWnd, WM_ACTIVATE, OnActivate); HANDLE_MSG(hWnd, WM_DESTROY, OnDestroy); case WM_SYSCOMMAND: switch(wParam) { // prevent moving, sizing, screensaver, and power-off in fullscreen mode case SC_MOVE: case SC_SIZE: case SC_MAXIMIZE: case SC_MONITORPOWER: if(fullscreen) return 1; break; // Alt+F4 or system menu doubleclick/exit case SC_CLOSE: queue_quit_event(); break; } break; HANDLE_MSG(hWnd, WM_SYSKEYUP , OnKey); HANDLE_MSG(hWnd, WM_KEYUP , OnKey); HANDLE_MSG(hWnd, WM_SYSKEYDOWN, OnKey); HANDLE_MSG(hWnd, WM_KEYDOWN , OnKey); HANDLE_MSG(hWnd, WM_MOUSEWHEEL, OnMouseWheel); // (can't use message crackers: they do not provide for passing uMsg) case WM_LBUTTONDOWN: case WM_LBUTTONUP: case WM_RBUTTONDOWN: case WM_RBUTTONUP: case WM_MBUTTONDOWN: case WM_MBUTTONUP: return OnMouseButton(hWnd, uMsg, GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam), (UINT)wParam); default: // can't call DefWindowProc here: some messages // are only conditionally 'grabbed' (e.g. NCHITTEST) break; } return DefWindowProc(hWnd, uMsg, wParam, lParam); } void SDL_PumpEvents(void) { // rationale: we would like to reduce CPU usage automatically if // possible. blocking here until a message arrives would accomplish // that, but might potentially freeze the app too long. // instead, they should check active state and call SDL_Delay etc. // if our window is minimized. mouse_update(); // polled MSG msg; while(PeekMessageW(&msg, 0, 0, 0, PM_REMOVE)) { DispatchMessageW(&msg); } } int SDL_PollEvent(SDL_Event* ev) { SDL_PumpEvents(); if(dequeue_event(ev)) return 1; return 0; } int SDL_PushEvent(SDL_Event* ev) { queue_event(*ev); return 0; } //----------------------------------------------------------------------------- // byte swapping // implement only if the header hasn't mapped SDL_Swap* to intrinsics #ifndef SDL_Swap16 u16 SDL_Swap16(const u16 x) { return (u16)(((x & 0xff) << 8) | (x >> 8)); } #endif #ifndef SDL_Swap32 u32 SDL_Swap32(const u32 x) { return (x << 24) | (x >> 24) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00); } #endif #ifndef SDL_Swap64 u64 SDL_Swap64(const u64 x) { const u32 lo = (u32)(x & 0xFFFFFFFF); const u32 hi = (u32)(x >> 32); u64 ret = SDL_Swap32(lo); ret <<= 32; // careful: must shift var of type u64, not u32 ret |= SDL_Swap32(hi); return ret; } #endif //----------------------------------------------------------------------------- // multithread support // semaphores // note: implementing these in terms of pthread sem_t doesn't help; // this wrapper is very close to the Win32 routines. static HANDLE HANDLE_from_sem(SDL_sem* s) { return (HANDLE)s; } static SDL_sem* sem_from_HANDLE(HANDLE h) { return (SDL_sem*)h; } SDL_sem* SDL_CreateSemaphore(int cnt) { HANDLE h = CreateSemaphore(0, cnt, 0x7fffffff, 0); return sem_from_HANDLE(h); } inline void SDL_DestroySemaphore(SDL_sem* sem) { HANDLE h = HANDLE_from_sem(sem); CloseHandle(h); } int SDL_SemPost(SDL_sem* sem) { HANDLE h = HANDLE_from_sem(sem); return ReleaseSemaphore(h, 1, 0); } int SDL_SemWait(SDL_sem* sem) { HANDLE h = HANDLE_from_sem(sem); return WaitForSingleObject(h, INFINITE); } // threads // users don't need to allocate SDL_Thread variables, so type = void // API returns SDL_Thread*, which is the HANDLE value itself. // // we go through hoops to avoid type cast warnings; // a simple union { pthread_t; SDL_Thread* } yields "uninitialized" // warnings in VC2005, so we coerce values directly. cassert(sizeof(pthread_t) == sizeof(SDL_Thread*)); SDL_Thread* SDL_CreateThread(int (*func)(void*), void* param) { pthread_t thread = 0; if(pthread_create(&thread, 0, (void* (*)(void*))func, param) < 0) return 0; return *(SDL_Thread**)&thread; } int SDL_KillThread(SDL_Thread* thread) { pthread_cancel(*(pthread_t*)&thread); return 0; } //----------------------------------------------------------------------------- // misc API void SDL_WM_SetCaption(const char* title, const char* icon) { WARN_IF_FALSE(SetWindowText(g_hWnd, title)); // real SDL ignores this parameter, so we will follow suit. UNUSED2(icon); } inline u32 SDL_GetTicks() { return GetTickCount(); } inline void SDL_Delay(Uint32 ms) { Sleep(ms); } inline void* SDL_GL_GetProcAddress(const char* name) { return wglGetProcAddress(name); } //----------------------------------------------------------------------------- // init/shutdown // defend against calling SDL_Quit twice (GameSetup does this to work // around ATI driver breakage) static ModuleInitState initState; int SDL_Init(Uint32 UNUSED(flags)) { if(!ModuleShouldInitialize(&initState)) return 0; return 0; } void SDL_Quit() { if(!ModuleShouldShutdown(&initState)) return; is_quitting = true; if(g_hDC != INVALID_HANDLE_VALUE) gammaRamp.RestoreOriginal(); if(g_hWnd != INVALID_HANDLE_VALUE) WARN_IF_FALSE(DestroyWindow(g_hWnd)); video_shutdown(); } // note: we go to the trouble of hooking stdout via winit because SDL_Init // is called fairly late (or even not at all in the case of Atlas) and // we would otherwise lose some printfs. // this is possible because wstartup calls winit after _cinit. static LibError wsdl_Init() { // redirect stdout to file (otherwise it's simply ignored on Win32). // notes: // - use full path for safety (works even if someone does chdir) // - the real SDL does this in its WinMain hook char path[MAX_PATH]; snprintf(path, ARRAY_SIZE(path), "%s\\stdout.txt", win_exe_dir); // ignore BoundsChecker warnings here. subsystem is set to "Windows" // to avoid the OS opening a console on startup (ugly). that means // stdout isn't associated with a lowio handle; _close ends up // getting called with fd = -1. oh well, nothing we can do. FILE* f = freopen(path, "wt", stdout); debug_assert(f); #if CONFIG_PARANOIA // disable buffering, so that no writes are lost even if the program // crashes. only enabled in full debug mode because this is really slow! setvbuf(stdout, 0, _IONBF, 0); #endif return INFO::OK; } static LibError wsdl_Shutdown() { // was redirected to stdout.txt; closing avoids a BoundsChecker warning. fclose(stdout); return INFO::OK; } #endif // #if CONFIG2_WSDL Index: ps/trunk/source/lib/sysdep/os/win/wdbg_heap.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wdbg_heap.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/os/win/wdbg_heap.cpp (revision 6536) @@ -1,944 +1,948 @@ #include "precompiled.h" #include "wdbg_heap.h" #include "lib/sysdep/os/win/win.h" #include #include #include #include "lib/sysdep/cpu.h" // cpu_AtomicAdd #include "winit.h" #include "wdbg.h" // wdbg_printf #include "wdbg_sym.h" // wdbg_sym_WalkStack WINIT_REGISTER_EARLY_INIT2(wdbg_heap_Init); // wutil -> wdbg_heap WINIT_REGISTER_LATE_SHUTDOWN2(wdbg_heap_Shutdown); // last - no leaks are detected after this void wdbg_heap_Enable(bool enable) { +#ifdef _DEBUG // (avoid "expression has no effect" warning in release builds) int flags = 0; if(enable) { flags |= _CRTDBG_ALLOC_MEM_DF; // enable checks at deallocation time flags |= _CRTDBG_LEAK_CHECK_DF; // report leaks at exit #if CONFIG_PARANOIA flags |= _CRTDBG_CHECK_ALWAYS_DF; // check during every heap operation (slow!) flags |= _CRTDBG_DELAY_FREE_MEM_DF; // blocks cannot be reused #endif } _CrtSetDbgFlag(flags); // Send output to stdout as well as the debug window, so it works during // the normal build process as well as when debugging the test .exe _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG); _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDOUT); +#else + UNUSED2(enable); +#endif } void wdbg_heap_Validate() { int ok = TRUE; __try { // NB: this is a no-op if !_CRTDBG_ALLOC_MEM_DF. // we could call _heapchk but that would catch fewer errors. ok = _CrtCheckMemory(); } __except(EXCEPTION_EXECUTE_HANDLER) { ok = FALSE; } wdbg_assert(ok == TRUE); // else: heap is corrupt! } //----------------------------------------------------------------------------- // improved leak detection //----------------------------------------------------------------------------- // (this relies on the debug CRT; not compiling it at all in release builds // avoids unreferenced local function warnings) // (this has only been tested on IA32 and seems to have trouble with larger // pointers and is horribly expensive, so it's disabled for now.) #if !defined(NDEBUG) && ARCH_IA32 && 0 # define ENABLE_LEAK_INSTRUMENTATION 1 #else # define ENABLE_LEAK_INSTRUMENTATION 0 #endif #if ENABLE_LEAK_INSTRUMENTATION // leak detectors often rely on macro redirection to determine the file and // line of allocation owners (see _CRTDBG_MAP_ALLOC). unfortunately this // breaks code that uses placement new or functions called free() etc. // // we avoid this problem by using stack traces. this implementation differs // from other approaches, e.g. Visual Leak Detector (the safer variant // before DLL hooking was used) in that no auxiliary storage is needed. // instead, the trace is stashed within the memory block header. // // to avoid duplication of effort, the CRT's leak detection code is not // modified; we only need an allocation and report hook. the latter // mixes the improved file/line information into the normal report. //----------------------------------------------------------------------------- // memory block header // the one disadvantage of our approach is that it requires knowledge of // the internal memory block header structure. it is hoped that IsValid will // uncover any changes. the following definition was adapted from dbgint.h: struct _CrtMemBlockHeader { struct _CrtMemBlockHeader* next; struct _CrtMemBlockHeader* prev; char* file; int line; // fields reversed on Win64 to ensure size % 16 == 0 #if OS_WIN64 int blockType; size_t userDataSize; #else size_t userDataSize; int blockType; #endif long allocationNumber; unsigned char gap[4]; bool IsValid() const { __try { if(prev && prev->next != this) return false; if(next && next->prev != this) return false; if((unsigned)blockType > 4) return false; if(userDataSize > 1*GiB) return false; if(allocationNumber == 0) return false; for(int i = 0; i < 4; i++) { if(gap[i] != 0xFD) return false; } // this is a false alarm if there is exactly one extant allocation, // but also a valuable indication of a block that has been removed // from the list (i.e. freed). if(prev == next) return false; } __except(EXCEPTION_EXECUTE_HANDLER) { return false; } return true; } }; static _CrtMemBlockHeader* HeaderFromData(void* userData) { _CrtMemBlockHeader* const header = ((_CrtMemBlockHeader*)userData)-1; wdbg_assert(header->IsValid()); return header; } /** * update our idea of the head of the linked list of heap blocks. * called from the allocation hook (see explanation there) * * @return the current head (most recent allocation). * @param operation the current heap operation * @param userData allocation address (if reallocating or deallocating) * @param hasChanged a convenient indication of whether the return value is * different than that of the last call. **/ static _CrtMemBlockHeader* GetHeapListHead(int operation, void* userData, bool& hasChanged) { static _CrtMemBlockHeader* s_heapListHead; // first call: get the heap block list head // notes: // - there is no O(1) accessor for this, so we maintain a copy. // - must be done here instead of in an initializer to guarantee // consistency, since we are now under the _HEAP_LOCK. if(!s_heapListHead) { _CrtMemState state = {0}; _CrtMemCheckpoint(&state); // O(N) s_heapListHead = state.pBlockHeader; wdbg_assert(s_heapListHead->IsValid()); } // the last operation was an allocation or expanding reallocation; // exactly one block has been prepended to the list. if(s_heapListHead->prev) { s_heapListHead = s_heapListHead->prev; // set to new head of list wdbg_assert(s_heapListHead->IsValid()); wdbg_assert(s_heapListHead->prev == 0); hasChanged = true; } // the list head remained unchanged, so the last operation was a // non-expanding reallocation or free. else hasChanged = false; // special case: handle invalidation of the list head // note: even shrinking reallocations cause deallocation. if(operation != _HOOK_ALLOC && userData == s_heapListHead+1) { s_heapListHead = s_heapListHead->next; wdbg_assert(s_heapListHead->IsValid()); hasChanged = false; // (head is now the same as last time) } return s_heapListHead; } //----------------------------------------------------------------------------- // call stack filter // we need to make the most out of the limited amount of frames. to that end, // only user functions are stored; we skip known library and helper functions. // these are determined by recording frames encountered in a backtrace. /** * extents of a module in memory; used to ignore callers that lie within * the C runtime library. **/ class ModuleExtents { public: ModuleExtents() : m_address(0), m_length(0) { } ModuleExtents(const char* dllName) { HMODULE hModule = GetModuleHandle(dllName); PIMAGE_NT_HEADERS ntHeaders = (PIMAGE_NT_HEADERS)((u8*)hModule + ((PIMAGE_DOS_HEADER)hModule)->e_lfanew); m_address = (uintptr_t)hModule + ntHeaders->OptionalHeader.BaseOfCode; MEMORY_BASIC_INFORMATION mbi = {0}; VirtualQuery((void*)m_address, &mbi, sizeof(mbi)); m_length = mbi.RegionSize; } uintptr_t Address() const { return m_address; } uintptr_t Length() const { return m_length; } bool Contains(uintptr_t address) const { return (address - m_address) < m_length; } private: uintptr_t m_address; size_t m_length; }; /** * set data structure that avoids dynamic allocations because they would * cause the allocation hook to be reentered (bad). **/ template class ArraySet { public: ArraySet() { m_arrayEnd = m_array; } void Add(T t) { if(m_arrayEnd == m_array+maxItems) { RemoveDuplicates(); wdbg_assert(m_arrayEnd < m_array+maxItems); } *m_arrayEnd++ = t; } bool Find(T t) const { return std::find(m_array, const_cast(m_arrayEnd), t) != m_arrayEnd; } void RemoveDuplicates() { std::sort(m_array, m_arrayEnd); m_arrayEnd = std::unique(m_array, m_arrayEnd); } private: T m_array[maxItems]; T* m_arrayEnd; }; class CallerFilter { public: CallerFilter() { AddRuntimeLibraryToIgnoreList(); m_isRecordingKnownCallers = true; CallHeapFunctions(); m_isRecordingKnownCallers = false; m_knownCallers.RemoveDuplicates(); } LibError NotifyOfCaller(uintptr_t pc) { if(!m_isRecordingKnownCallers) return INFO::SKIPPED; // do not affect the stack walk // last 'known' function has been reached if(pc == (uintptr_t)&CallerFilter::CallHeapFunctions) return INFO::OK; // stop stack walk // pc is a 'known' function on the allocation hook's back-trace // (e.g. _malloc_dbg and other helper functions) m_knownCallers.Add(pc); return INFO::CB_CONTINUE; } bool IsKnownCaller(uintptr_t pc) const { for(size_t i = 0; i < numModules; i++) { if(m_moduleIgnoreList[i].Contains(pc)) return true; } return m_knownCallers.Find(pc); } private: static const size_t numModules = 2; void AddRuntimeLibraryToIgnoreList() { #if MSC_VERSION && _DLL // DLL runtime library #ifdef NDEBUG static const char* dllNameFormat = "msvc%c%d" ".dll"; #else static const char* dllNameFormat = "msvc%c%d" "d" ".dll"; #endif const int dllVersion = (MSC_VERSION-600)/10; // VC2005: 1400 => 80 wdbg_assert(0 < dllVersion && dllVersion <= 999); for(int i = 0; i < numModules; i++) { static const char modules[numModules] = { 'r', 'p' }; // C and C++ runtime libraries char dllName[20]; sprintf_s(dllName, ARRAY_SIZE(dllName), dllNameFormat, modules[i], dllVersion); m_moduleIgnoreList[i] = ModuleExtents(dllName); } #endif } static void CallHeapFunctions() { { void* p1 = malloc(1); void* p2 = realloc(p1, 111); if(p2) free(p2); else free(p1); } { char* p = new char; delete p; } { char* p = new char[2]; delete[] p; } } ModuleExtents m_moduleIgnoreList[numModules]; // note: this mechanism cannot hope to exclude every single STL helper // function, which is why we need the module ignore list. // however, it is still useful when compiling against the static CRT. bool m_isRecordingKnownCallers; ArraySet m_knownCallers; }; //----------------------------------------------------------------------------- // stash (part of) a stack trace within _CrtMemBlockHeader // this avoids the need for a mapping between allocation number and the // caller information, which is slow, requires locking and consumes memory. // // callers := array of addresses inside functions that constitute the // stack back-trace. static const size_t numQuantizedPcBits = sizeof(uintptr_t)*CHAR_BIT - 2; static uintptr_t Quantize(uintptr_t pc) { // postcondition: the return value lies within the same function as // pc but can be stored in fewer bits. this is possible because: // - linkers typically align functions to at least four bytes // - pc is a return address and thus preceded by a call instruction and // function prolog, which requires at least four bytes. return pc/4; } static uintptr_t Expand(uintptr_t pc) { return pc*4; } static const size_t numEncodedLengthBits = 2; static const size_t maxCallers = (sizeof(char*)+sizeof(int))*CHAR_BIT / (2+14); static size_t NumBitsForEncodedLength(size_t encodedLength) { static const size_t numBitsForEncodedLength[1u << numEncodedLengthBits] = { 8, // 1K 14, // 64K 20, // 4M numQuantizedPcBits // a full pointer }; return numBitsForEncodedLength[encodedLength]; } static size_t EncodedLength(uintptr_t quantizedOffset) { for(size_t encodedLength = 0; encodedLength < 1u << numEncodedLengthBits; encodedLength++) { const size_t numBits = NumBitsForEncodedLength(encodedLength); const uintptr_t maxValue = (1u << numBits)-1; if(quantizedOffset <= maxValue) return encodedLength; } wdbg_assert(0); // unreachable return 0; } static uintptr_t codeSegmentAddress; static uintptr_t quantizedCodeSegmentAddress; static uintptr_t quantizedCodeSegmentLength; static void FindCodeSegment() { const char* dllName = 0; // current module ModuleExtents extents(dllName); codeSegmentAddress = extents.Address(); quantizedCodeSegmentAddress = Quantize(codeSegmentAddress); quantizedCodeSegmentLength = Quantize(extents.Length()); } class BitStream { public: BitStream(u8* storage, size_t storageSize) : m_remainderBits(0), m_numRemainderBits(0) , m_pos(storage), m_bitsLeft((size_t)storageSize*8) { } size_t BitsLeft() const { return m_bitsLeft; } void Write(const size_t numOutputBits, uintptr_t outputValue) { wdbg_assert(numOutputBits <= m_bitsLeft); wdbg_assert(outputValue < ((uintptr_t)1u << numOutputBits)); size_t outputBitsLeft = numOutputBits; while(outputBitsLeft > 0) { const size_t numBits = std::min(outputBitsLeft, size_t(8)); m_bitsLeft -= numBits; // (NB: there is no need to extract exactly numBits because // outputValue's MSBs were verified to be zero) const uintptr_t outputByte = outputValue & 0xFF; outputValue >>= 8; outputBitsLeft -= numBits; m_remainderBits |= outputByte << m_numRemainderBits; m_numRemainderBits += numBits; if(m_numRemainderBits >= 8) { const u8 remainderByte = (m_remainderBits & 0xFF); m_remainderBits >>= 8; m_numRemainderBits -= 8; *m_pos++ = remainderByte; } } } void Finish() { const size_t partialBits = m_numRemainderBits % 8; if(partialBits) { m_bitsLeft -= 8-partialBits; m_numRemainderBits += 8-partialBits; } while(m_numRemainderBits) { const u8 remainderByte = (m_remainderBits & 0xFF); *m_pos++ = remainderByte; m_remainderBits >>= 8; m_numRemainderBits -= 8; } wdbg_assert(m_bitsLeft % 8 == 0); while(m_bitsLeft) { *m_pos++ = 0; m_bitsLeft -= 8; } } uintptr_t Read(const size_t numInputBits) { wdbg_assert(numInputBits <= m_bitsLeft); uintptr_t inputValue = 0; size_t inputBitsLeft = numInputBits; while(inputBitsLeft > 0) { const size_t numBits = std::min(inputBitsLeft, size_t(8)); m_bitsLeft -= numBits; if(m_numRemainderBits < numBits) { const size_t inputByte = *m_pos++; m_remainderBits |= inputByte << m_numRemainderBits; m_numRemainderBits += 8; } const uintptr_t remainderByte = (m_remainderBits & ((1u << numBits)-1)); m_remainderBits >>= numBits; m_numRemainderBits -= numBits; inputValue |= remainderByte << (numInputBits-inputBitsLeft); inputBitsLeft -= numBits; } return inputValue; } private: uintptr_t m_remainderBits; size_t m_numRemainderBits; u8* m_pos; size_t m_bitsLeft; }; static void StashCallers(_CrtMemBlockHeader* header, const uintptr_t* callers, size_t numCallers) { // transform an array of callers into a (sorted and unique) set. uintptr_t quantizedPcSet[maxCallers]; std::transform(callers, callers+numCallers, quantizedPcSet, Quantize); std::sort(quantizedPcSet, quantizedPcSet+numCallers); uintptr_t* const end = std::unique(quantizedPcSet, quantizedPcSet+numCallers); const size_t quantizedPcSetSize = end-quantizedPcSet; // transform the set into a sequence of quantized offsets. uintptr_t quantizedOffsets[maxCallers]; if(quantizedPcSet[0] >= quantizedCodeSegmentAddress) quantizedOffsets[0] = quantizedPcSet[0] - quantizedCodeSegmentAddress; else { quantizedOffsets[0] = quantizedPcSet[0]; // make sure RetrieveCallers can differentiate between pointers and code-segment-offsets wdbg_assert(quantizedOffsets[0] >= quantizedCodeSegmentLength); } for(size_t i = 1; i < numCallers; i++) quantizedOffsets[i] = quantizedPcSet[i] - quantizedPcSet[i-1]; // write quantized offsets to stream BitStream bitStream((u8*)&header->file, sizeof(header->file)+sizeof(header->line)); for(size_t i = 0; i < quantizedPcSetSize; i++) { const uintptr_t quantizedOffset = quantizedOffsets[i]; const size_t encodedLength = EncodedLength(quantizedOffset); const size_t numBits = NumBitsForEncodedLength(encodedLength); if(bitStream.BitsLeft() < numEncodedLengthBits+numBits) break; bitStream.Write(numEncodedLengthBits, encodedLength); bitStream.Write(numBits, quantizedOffset); } bitStream.Finish(); } static void RetrieveCallers(_CrtMemBlockHeader* header, uintptr_t* callers, size_t& numCallers) { // read quantized offsets from stream uintptr_t quantizedOffsets[maxCallers]; numCallers = 0; BitStream bitStream((u8*)&header->file, sizeof(header->file)+sizeof(header->line)); for(;;) { if(bitStream.BitsLeft() < numEncodedLengthBits) break; const size_t encodedLength = bitStream.Read(numEncodedLengthBits); const size_t numBits = NumBitsForEncodedLength(encodedLength); if(bitStream.BitsLeft() < numBits) break; const uintptr_t quantizedOffset = bitStream.Read(numBits); if(!quantizedOffset) break; quantizedOffsets[numCallers++] = quantizedOffset; } if(!numCallers) return; // expand offsets into a set of callers if(quantizedOffsets[0] <= quantizedCodeSegmentLength) callers[0] = Expand(quantizedOffsets[0] + quantizedCodeSegmentAddress); else callers[0] = Expand(quantizedOffsets[0]); for(size_t i = 1; i < numCallers; i++) callers[i] = callers[i-1] + Expand(quantizedOffsets[i]); } //----------------------------------------------------------------------------- // find out who called an allocation function /** * gather and store a (filtered) list of callers. **/ class CallStack { public: void Gather() { m_numCallers = 0; (void)wdbg_sym_WalkStack(OnFrame_Trampoline, (uintptr_t)this); std::fill(m_callers+m_numCallers, m_callers+maxCallers, 0); } const uintptr_t* Callers() const { return m_callers; } size_t NumCallers() const { return m_numCallers; } private: LibError OnFrame(const STACKFRAME64* frame) { const uintptr_t pc = frame->AddrPC.Offset; // skip invalid frames if(pc == 0) return INFO::CB_CONTINUE; LibError ret = m_filter.NotifyOfCaller(pc); // (CallerFilter provokes stack traces of heap functions; if that is // what happened, then we must not continue) if(ret != INFO::SKIPPED) return ret; // stop the stack walk if frame storage is full if(m_numCallers >= maxCallers) return INFO::OK; if(!m_filter.IsKnownCaller(pc)) m_callers[m_numCallers++] = pc; return INFO::CB_CONTINUE; } static LibError OnFrame_Trampoline(const STACKFRAME64* frame, uintptr_t cbData) { CallStack* this_ = (CallStack*)cbData; return this_->OnFrame(frame); } CallerFilter m_filter; uintptr_t m_callers[maxCallers]; size_t m_numCallers; }; //----------------------------------------------------------------------------- // RAII wrapper for installing a CRT allocation hook class AllocationHook { public: AllocationHook() { wdbg_assert(s_instance == 0 && s_previousHook == 0); s_instance = this; s_previousHook = _CrtSetAllocHook(Hook); } ~AllocationHook() { _CRT_ALLOC_HOOK removedHook = _CrtSetAllocHook(s_previousHook); wdbg_assert(removedHook == Hook); // warn if we removed someone else's hook s_instance = 0; s_previousHook = 0; } /** * @param operation either _HOOK_ALLOC, _HOOK_REALLOC or _HOOK_FREE * @param userData is only valid (nonzero) for realloc and free because * we are called BEFORE the actual heap operation. **/ virtual void OnHeapOperation(int operation, void* userData, size_t size, long allocationNumber) = 0; private: static int __cdecl Hook(int operation, void* userData, size_t size, int blockType, long allocationNumber, const unsigned char* file, int line) { static bool busy = false; wdbg_assert(!busy); busy = true; s_instance->OnHeapOperation(operation, userData, size, allocationNumber); busy = false; if(s_previousHook) return s_previousHook(operation, userData, size, blockType, allocationNumber, file, line); return 1; // continue as if the hook had never been called } // unfortunately static because we can't pass our `this' pointer through // the allocation hook. static AllocationHook* s_instance; static _CRT_ALLOC_HOOK s_previousHook; }; AllocationHook* AllocationHook::s_instance; _CRT_ALLOC_HOOK AllocationHook::s_previousHook; //----------------------------------------------------------------------------- // our allocation hook // ideally we would just stash the callers in the newly created header. // unfortunately we are called BEFORE it (and the allocation) are actually // created, so we need to keep the information around until the next call to // AllocHook; only then can it be stored. // // unfortunately the CRT does not provide an O(1) means of getting at the // most recent block header. instead, we do so once and then keep it // up-to-date in the allocation hook. this is safe because we run under // the _HEAP_LOCK and ensure the allocation numbers match. static intptr_t s_numAllocations; intptr_t wdbg_heap_NumberOfAllocations() { return s_numAllocations; } class AllocationTracker : public AllocationHook { public: AllocationTracker() : m_pendingAllocationNumber(0) { } virtual void OnHeapOperation(int operation, void* userData, size_t size, long allocationNumber) { UNUSED2(size); if(operation == _HOOK_ALLOC || operation == _HOOK_REALLOC) cpu_AtomicAdd(&s_numAllocations, 1); bool hasChanged; _CrtMemBlockHeader* head = GetHeapListHead(operation, userData, hasChanged); // if the head changed, the last operation was a (re)allocation and // we now have its header; stash the pending call stack there. if(hasChanged) { wdbg_assert(head->allocationNumber == m_pendingAllocationNumber); // note: overwrite existing file/line info (even if valid) to avoid // special cases in the report hook. StashCallers(head, m_pendingCallStack.Callers(), m_pendingCallStack.NumCallers()); } // remember the current caller for next time m_pendingCallStack.Gather(); // NB: called for each operation, as required by the filter recording step m_pendingAllocationNumber = allocationNumber; } private: long m_pendingAllocationNumber; CallStack m_pendingCallStack; }; //----------------------------------------------------------------------------- static void PrintCallStack(const uintptr_t* callers, size_t numCallers) { if(!numCallers || callers[0] == 0) { wdbg_printf("\n call stack not available.\n"); return; } wdbg_printf("\n partial call stack:\n"); for(size_t i = 0; i < numCallers; i++) { char name[DBG_SYMBOL_LEN] = {'\0'}; char file[DBG_FILE_LEN] = {'\0'}; int line = -1; LibError err = debug_ResolveSymbol((void*)callers[i], name, file, &line); wdbg_printf(" "); if(err != INFO::OK) wdbg_printf("(error %d resolving PC=%p) ", err, callers[i]); if(file[0] != '\0') wdbg_printf("%s(%d) : ", file, line); wdbg_printf("%s\n", name); } } static int __cdecl ReportHook(int reportType, char* message, int* out) { UNUSED2(reportType); // set up return values to reduce the chance of mistakes below *out = 0; // alternatives are failure (-1) and breakIntoDebugger (1) const int ret = 0; // not "handled", continue calling other hooks // note: this hook is transparent in that it never affects the CRT. // we can't suppress parts of a leak report because that causes the // rest of it to be skipped. static enum { WaitingForDump, WaitingForBlock, IsBlock } state = WaitingForDump; switch(state) { case WaitingForDump: if(!strcmp(message, "Dumping objects ->\n")) state = WaitingForBlock; return ret; case IsBlock: { // common case: "normal block at 0xPPPPPPPP, N bytes long". const char* addressString = strstr(message, "0x"); if(addressString) { const uintptr_t address = strtoul(addressString, 0, 0); _CrtMemBlockHeader* header = HeaderFromData((void*)address); uintptr_t callers[maxCallers]; size_t numCallers; RetrieveCallers(header, callers, numCallers); PrintCallStack(callers, numCallers); state = WaitingForBlock; return ret; } // else: for reasons unknown, there's apparently no information // about the block; fall through to the previous state. } case WaitingForBlock: if(message[0] == '{') state = IsBlock; // suppress messages containing "file" and "line" since the normal // interpretation of those header fields is invalid. else if(strchr(message, '(')) message[0] = '\0'; return ret; default: wdbg_assert(0); // unreachable } wdbg_assert(0); // unreachable return 0; } #else intptr_t wdbg_heap_NumberOfAllocations() { return 0; } #endif //----------------------------------------------------------------------------- #if ENABLE_LEAK_INSTRUMENTATION static AllocationTracker* s_tracker; #endif static LibError wdbg_heap_Init() { #if ENABLE_LEAK_INSTRUMENTATION FindCodeSegment(); // load symbol information now (fails if it happens during shutdown) char name[DBG_SYMBOL_LEN]; char file[DBG_FILE_LEN]; int line; (void)debug_ResolveSymbol(wdbg_heap_Init, name, file, &line); int ret = _CrtSetReportHook2(_CRT_RPTHOOK_INSTALL, ReportHook); if(ret == -1) abort(); s_tracker = new AllocationTracker; #endif wdbg_heap_Enable(true); return INFO::OK; } static LibError wdbg_heap_Shutdown() { #if ENABLE_LEAK_INSTRUMENTATION SAFE_DELETE(s_tracker); #endif return INFO::OK; } Index: ps/trunk/source/lib/sysdep/os/win/wsysdep.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wsysdep.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/os/win/wsysdep.cpp (revision 6536) @@ -1,360 +1,357 @@ /** * ========================================================================= * File : wsysdep.cpp * Project : 0 A.D. * Description : Windows backend of the sysdep interface * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "lib/sysdep/sysdep.h" #include "win.h" // includes windows.h; must come before shlobj #include // pick_dir #include "lib/sysdep/clipboard.h" #include "error_dialog.h" #include "wutil.h" #if MSC_VERSION #pragma comment(lib, "shell32.lib") // for sys_pick_directory SH* calls #endif void sys_display_msg(const wchar_t* caption, const wchar_t* msg) { MessageBoxW(0, msg, caption, MB_ICONEXCLAMATION|MB_TASKMODAL|MB_SETFOREGROUND); } //----------------------------------------------------------------------------- // "program error" dialog (triggered by debug_assert and exception) //----------------------------------------------------------------------------- // support for resizing the dialog / its controls // (have to do this manually - grr) static POINTS dlg_client_origin; static POINTS dlg_prev_client_size; static const size_t ANCHOR_LEFT = 0x01; static const size_t ANCHOR_RIGHT = 0x02; static const size_t ANCHOR_TOP = 0x04; static const size_t ANCHOR_BOTTOM = 0x08; static const size_t ANCHOR_ALL = 0x0f; static void dlg_resize_control(HWND hDlg, int dlg_item, int dx,int dy, size_t anchors) { HWND hControl = GetDlgItem(hDlg, dlg_item); RECT r; GetWindowRect(hControl, &r); int w = r.right - r.left, h = r.bottom - r.top; int x = r.left - dlg_client_origin.x, y = r.top - dlg_client_origin.y; if(anchors & ANCHOR_RIGHT) { // right only if(!(anchors & ANCHOR_LEFT)) x += dx; // horizontal (stretch width) else w += dx; } if(anchors & ANCHOR_BOTTOM) { // bottom only if(!(anchors & ANCHOR_TOP)) y += dy; // vertical (stretch height) else h += dy; } SetWindowPos(hControl, 0, x,y, w,h, SWP_NOZORDER); } static void dlg_resize(HWND hDlg, WPARAM wParam, LPARAM lParam) { // 'minimize' was clicked. we need to ignore this, otherwise // dx/dy would reduce some control positions to less than 0. // since Windows clips them, we wouldn't later be able to // reconstruct the previous values when 'restoring'. if(wParam == SIZE_MINIMIZED) return; // first call for this dialog instance. WM_MOVE hasn't been sent yet, // so dlg_client_origin are invalid => must not call resize_control(). // we need to set dlg_prev_client_size for the next call before exiting. bool first_call = (dlg_prev_client_size.y == 0); POINTS dlg_client_size = MAKEPOINTS(lParam); int dx = dlg_client_size.x - dlg_prev_client_size.x; int dy = dlg_client_size.y - dlg_prev_client_size.y; dlg_prev_client_size = dlg_client_size; if(first_call) return; dlg_resize_control(hDlg, IDC_CONTINUE, dx,dy, ANCHOR_LEFT|ANCHOR_BOTTOM); dlg_resize_control(hDlg, IDC_SUPPRESS, dx,dy, ANCHOR_LEFT|ANCHOR_BOTTOM); dlg_resize_control(hDlg, IDC_BREAK , dx,dy, ANCHOR_LEFT|ANCHOR_BOTTOM); dlg_resize_control(hDlg, IDC_EXIT , dx,dy, ANCHOR_LEFT|ANCHOR_BOTTOM); dlg_resize_control(hDlg, IDC_COPY , dx,dy, ANCHOR_RIGHT|ANCHOR_BOTTOM); dlg_resize_control(hDlg, IDC_EDIT1 , dx,dy, ANCHOR_ALL); } struct DialogParams { const wchar_t* text; size_t flags; }; static INT_PTR CALLBACK error_dialog_proc(HWND hDlg, unsigned int msg, WPARAM wParam, LPARAM lParam) { switch(msg) { case WM_INITDIALOG: { const DialogParams* params = (const DialogParams*)lParam; HWND hWnd; // need to reset for new instance of dialog dlg_client_origin.x = dlg_client_origin.y = 0; dlg_prev_client_size.x = dlg_prev_client_size.y = 0; if(!(params->flags & DE_ALLOW_SUPPRESS)) { hWnd = GetDlgItem(hDlg, IDC_SUPPRESS); EnableWindow(hWnd, FALSE); } // set fixed font for readability hWnd = GetDlgItem(hDlg, IDC_EDIT1); HGDIOBJ hObj = (HGDIOBJ)GetStockObject(SYSTEM_FIXED_FONT); LPARAM redraw = FALSE; SendMessage(hWnd, WM_SETFONT, (WPARAM)hObj, redraw); SetDlgItemTextW(hDlg, IDC_EDIT1, params->text); return TRUE; // set default keyboard focus } case WM_SYSCOMMAND: // close dialog if [X] is clicked (doesn't happen automatically) // note: lower 4 bits are reserved if((wParam & 0xFFF0) == SC_CLOSE) { EndDialog(hDlg, 0); return 0; // processed } break; // return 0 if processed, otherwise break case WM_COMMAND: switch(wParam) { case IDC_COPY: { // note: allocating on the stack would be easier+safer, // but this is too big. const size_t max_chars = 128*KiB; wchar_t* buf = new wchar_t[max_chars]; GetDlgItemTextW(hDlg, IDC_EDIT1, buf, max_chars); sys_clipboard_set(buf); delete[] buf; return 0; } case IDC_CONTINUE: EndDialog(hDlg, ER_CONTINUE); return 0; case IDC_SUPPRESS: EndDialog(hDlg, ER_SUPPRESS); return 0; case IDC_BREAK: EndDialog(hDlg, ER_BREAK); return 0; case IDC_EXIT: EndDialog(hDlg, ER_EXIT); return 0; default: break; } break; case WM_MOVE: dlg_client_origin = MAKEPOINTS(lParam); break; case WM_GETMINMAXINFO: { // we must make sure resize_control will never set negative coords - // Windows would clip them, and its real position would be lost. // restrict to a reasonable and good looking minimum size [pixels]. MINMAXINFO* mmi = (MINMAXINFO*)lParam; mmi->ptMinTrackSize.x = 407; mmi->ptMinTrackSize.y = 159; // determined experimentally return 0; } case WM_SIZE: dlg_resize(hDlg, wParam, lParam); break; default: break; } // we didn't process the message; caller will perform default action. return FALSE; } ErrorReaction sys_display_error(const wchar_t* text, size_t flags) { // note: other threads might still be running, crash and take down the // process before we have a chance to display this error message. // ideally we would suspend them all and resume when finished; however, // they may be holding system-wide locks (e.g. heap or loader) that // are potentially needed by DialogBoxParam. in that case, deadlock // would result; this is much worse than a crash because no error // at all is displayed to the end-user. therefore, do nothing here. // temporarily remove any pending quit message from the queue because // it would prevent the dialog from being displayed (DialogBoxParam // returns IDOK without doing anything). will be restored below. // notes: // - this isn't only relevant at exit - Windows also posts one if // window init fails. therefore, it is important that errors can be // displayed regardless. // - by passing hWnd=0, we check all windows belonging to the current // thread. there is no reason to use hWndParent below. MSG msg; BOOL quit_pending = PeekMessage(&msg, 0, WM_QUIT, WM_QUIT, PM_REMOVE); const HINSTANCE hInstance = wutil_LibModuleHandle; LPCSTR lpTemplateName = MAKEINTRESOURCE(IDD_DIALOG1); const DialogParams params = { text, flags }; // get the enclosing app's window handle. we can't just pass 0 or // the desktop window because the dialog must be modal (if the app // continues running, it may crash and take down the process before // we've managed to show the dialog). const HWND hWndParent = wutil_AppWindow(); INT_PTR ret = DialogBoxParam(hInstance, lpTemplateName, hWndParent, error_dialog_proc, (LPARAM)¶ms); if(quit_pending) PostQuitMessage((int)msg.wParam); // failed; warn user and make sure we return an ErrorReaction. if(ret == 0 || ret == -1) { debug_DisplayMessage(L"Error", L"Unable to display detailed error dialog."); return ER_CONTINUE; } return (ErrorReaction)ret; } //----------------------------------------------------------------------------- // misc //----------------------------------------------------------------------------- LibError sys_error_description_r(int user_err, char* buf, size_t max_chars) { // validate user_err - Win32 doesn't have negative error numbers if(user_err < 0) return ERR::FAIL; // NOWARN const DWORD err = user_err? (DWORD)user_err : GetLastError(); // no one likes to see "The operation completed successfully" in // error messages, so return more descriptive text instead. if(err == 0) { strcpy_s(buf, max_chars, "0 (no error code was set)"); return INFO::OK; } const LPCVOID source = 0; // ignored (we're not using FROM_HMODULE etc.) const DWORD lang_id = 0; // look for neutral, then current locale va_list* args = 0; // we don't care about "inserts" const DWORD chars_output = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, source, err, lang_id, buf, (DWORD)max_chars, args); if(!chars_output) WARN_RETURN(ERR::FAIL); debug_assert(chars_output < max_chars); return INFO::OK; } void sys_get_module_filename(void* addr, wchar_t* path, size_t max_chars) { path[0] = '\0'; // in case either API call below fails - wchar_t* module_filename = path; MEMORY_BASIC_INFORMATION mbi; if(VirtualQuery(addr, &mbi, sizeof(mbi))) { HMODULE hModule = (HMODULE)mbi.AllocationBase; - if(GetModuleFileNameW(hModule, path, (DWORD)max_chars)) - module_filename = wcsrchr(path, '\\')+1; - // note: GetModuleFileName returns full path => a '\\' exists + GetModuleFileNameW(hModule, path, (DWORD)max_chars); } } LibError sys_get_executable_name(char* n_path, size_t max_chars) { const DWORD num_chars = GetModuleFileName(0, n_path, (DWORD)max_chars); return num_chars? INFO::OK : ERR::FAIL; } // callback for shell directory picker: used to set // starting directory to the current directory (for user convenience). static int CALLBACK browse_cb(HWND hWnd, unsigned int msg, LPARAM UNUSED(lParam), LPARAM ldata) { if(msg == BFFM_INITIALIZED) { const char* cur_dir = (const char*)ldata; SendMessage(hWnd, BFFM_SETSELECTIONA, 1, (LPARAM)cur_dir); return 1; } return 0; } LibError sys_pick_directory(char* path, size_t max_chars) { // bring up dialog; set starting directory to current working dir. WARN_IF_FALSE(GetCurrentDirectory((DWORD)max_chars, path)); BROWSEINFOA bi; memset(&bi, 0, sizeof(bi)); bi.ulFlags = BIF_RETURNONLYFSDIRS; bi.lpfn = (BFFCALLBACK)browse_cb; bi.lParam = (LPARAM)path; LPITEMIDLIST pidl = SHBrowseForFolderA(&bi); // translate ITEMIDLIST to string. note: SHGetPathFromIDList doesn't // support a user-specified char limit *sigh* debug_assert(max_chars >= MAX_PATH); BOOL ok = SHGetPathFromIDList(pidl, path); // free the ITEMIDLIST IMalloc* p_malloc; SHGetMalloc(&p_malloc); p_malloc->Free(pidl); p_malloc->Release(); return LibError_from_win32(ok); } Index: ps/trunk/source/lib/sysdep/os/win/whrt/qpc.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/whrt/qpc.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/os/win/whrt/qpc.cpp (revision 6536) @@ -1,140 +1,140 @@ /** * ========================================================================= * File : qpc.cpp * Project : 0 A.D. * Description : Timer implementation using QueryPerformanceCounter * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "qpc.h" #include "counter.h" #include "lib/sysdep/os_cpu.h" #include "lib/sysdep/os/win/win.h" #include "lib/sysdep/os/win/wutil.h" // wutil_argv #include "pit.h" // PIT_FREQ #include "pmt.h" // PMT_FREQ class CounterQPC : public ICounter { public: CounterQPC() : m_frequency(-1) { } virtual const char* Name() const { return "QPC"; } LibError Activate() { // note: QPC is observed to be universally supported, but the API // provides for failure, so play it safe. LARGE_INTEGER qpcFreq, qpcValue; const BOOL ok1 = QueryPerformanceFrequency(&qpcFreq); const BOOL ok2 = QueryPerformanceCounter(&qpcValue); WARN_RETURN_IF_FALSE(ok1 && ok2); if(!qpcFreq.QuadPart || !qpcValue.QuadPart) WARN_RETURN(ERR::FAIL); m_frequency = (i64)qpcFreq.QuadPart; return INFO::OK; } void Shutdown() { } bool IsSafe() const { // note: we have separate modules that directly access some of the // counters potentially used by QPC. disabling the redundant counters // would be ugly (increased coupling). instead, we'll make sure our // implementations could (if necessary) coexist with QPC, but it // shouldn't come to that since only one counter is needed/used. // the PIT is entirely safe (even if annoyingly slow to read) if(m_frequency == PIT_FREQ) return true; // the PMT is generally safe (see discussion in CounterPmt::IsSafe), // but older QPC implementations had problems with 24-bit rollover. // "System clock problem can inflate benchmark scores" // (http://www.lionbridge.com/bi/cont2000/200012/perfcnt.asp ; no longer // online, nor findable in Google Cache / archive.org) tells of // incorrect values every 4.6 seconds (i.e. 24 bits @ 3.57 MHz) unless // the timer is polled in the meantime. fortunately, this is guaranteed // by our periodic updates (which come at least that often). if(m_frequency == PMT_FREQ) return true; // the TSC has been known to be buggy (even mentioned in MSDN). it is // used on MP HAL systems and can be detected by comparing QPF with the // CPU clock. we consider it unsafe unless the user promises (via // command line) that it's patched and thus reliable on their system. - bool usesTsc = IsSimilarMagnitude(m_frequency, os_cpu_ClockFrequency()); + bool usesTsc = IsSimilarMagnitude((double)m_frequency, os_cpu_ClockFrequency()); // unconfirmed reports indicate QPC sometimes uses 1/3 of the // CPU clock frequency, so check that as well. - usesTsc |= IsSimilarMagnitude(m_frequency, os_cpu_ClockFrequency()/3); + usesTsc |= IsSimilarMagnitude((double)m_frequency, os_cpu_ClockFrequency()/3); if(usesTsc) { const bool isTscSafe = wutil_HasCommandLineArgument("-wQpcTscSafe"); return isTscSafe; } // the HPET is reliable and used on Vista. it can't easily be recognized // since its frequency is variable (the spec says > 10 MHz; the master // 14.318 MHz oscillator is often used). considering frequencies in // [10, 100 MHz) to be a HPET would be dangerous because it may actually // be faster or RDTSC slower. we have to exclude all other cases and // assume it's a HPET - and thus safe - if we get here. return true; } u64 Counter() const { // fairly time-critical here, don't check the return value // (IsSupported made sure it succeeded initially) LARGE_INTEGER qpc_value; (void)QueryPerformanceCounter(&qpc_value); return qpc_value.QuadPart; } size_t CounterBits() const { // there are reports of incorrect rollover handling in the PMT // implementation of QPC (see CounterPMT::IsSafe). however, other // counters would be used on those systems, so it's irrelevant. // we'll report the full 64 bits. return 64; } double NominalFrequency() const { return (double)m_frequency; } double Resolution() const { return 1.0 / m_frequency; } private: // used in several places and QPF is a bit slow+cumbersome. // (i64 allows easier conversion to double) i64 m_frequency; }; ICounter* CreateCounterQPC(void* address, size_t size) { debug_assert(sizeof(CounterQPC) <= size); return new(address) CounterQPC(); } Index: ps/trunk/source/lib/sysdep/os/win/wutil.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wutil.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/os/win/wutil.cpp (revision 6536) @@ -1,508 +1,508 @@ /** * ========================================================================= * File : wutil.cpp * Project : 0 A.D. * Description : various Windows-specific utilities * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "wutil.h" #include #include // __argc #include "lib/path_util.h" #include "lib/posix/posix.h" #include "win.h" #include "winit.h" WINIT_REGISTER_EARLY_INIT(wutil_Init); WINIT_REGISTER_LATE_SHUTDOWN(wutil_Shutdown); //----------------------------------------------------------------------------- // safe allocator // // safe allocator that may be used independently of libc malloc // (in particular, before _cinit and while calling static dtors). // used by wpthread critical section code. // void* win_alloc(size_t size) { const DWORD flags = HEAP_ZERO_MEMORY; return HeapAlloc(GetProcessHeap(), flags, size); } void win_free(void* p) { const DWORD flags = 0; HeapFree(GetProcessHeap(), flags, p); } //----------------------------------------------------------------------------- // locks // several init functions are before called before _cinit. // POSIX static mutex init may not have been done by then, // so we need our own lightweight functions. static CRITICAL_SECTION cs[NUM_CS]; static bool cs_valid; void win_lock(WinLockId id) { if(!cs_valid) return; EnterCriticalSection(&cs[id]); } void win_unlock(WinLockId id) { if(!cs_valid) return; LeaveCriticalSection(&cs[id]); } bool win_is_locked(WinLockId id) { if(!cs_valid) return false; const BOOL successfullyEntered = TryEnterCriticalSection(&cs[id]); if(!successfullyEntered) return true; // still locked LeaveCriticalSection(&cs[id]); return false; // probably not locked } static void InitLocks() { for(int i = 0; i < NUM_CS; i++) InitializeCriticalSection(&cs[i]); cs_valid = true; } static void ShutdownLocks() { cs_valid = false; for(int i = 0; i < NUM_CS; i++) DeleteCriticalSection(&cs[i]); memset(cs, 0, sizeof(cs)); } //----------------------------------------------------------------------------- // error codes // only call after a Win32 function indicates failure. LibError LibError_from_GLE(bool warn_if_failed) { LibError err = ERR::FAIL; switch(GetLastError()) { case ERROR_OUTOFMEMORY: err = ERR::NO_MEM; break; case ERROR_INVALID_PARAMETER: err = ERR::INVALID_PARAM; break; case ERROR_INSUFFICIENT_BUFFER: err = ERR::BUF_SIZE; break; /* case ERROR_ACCESS_DENIED: err = ERR::FILE_ACCESS; break; case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND: err = ERR::TNODE_NOT_FOUND; break; */ } if(warn_if_failed) DEBUG_WARN_ERR(err); return err; } // return the LibError equivalent of GetLastError(), or ERR::FAIL if // there's no equal. // you should SetLastError(0) before calling whatever will set ret // to make sure we do not return any stale errors. LibError LibError_from_win32(DWORD ret, bool warn_if_failed) { if(ret != FALSE) return INFO::OK; return LibError_from_GLE(warn_if_failed); } //----------------------------------------------------------------------------- // command line // copy of GetCommandLine string. will be tokenized and then referenced by // the argv pointers. static char* argvContents; int wutil_argc = 0; char** wutil_argv = 0; static void ReadCommandLine() { const char* commandLine = GetCommandLine(); // (this changes as quotation marks are removed) size_t numChars = strlen(commandLine); argvContents = (char*)HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, numChars+1); strcpy_s(argvContents, numChars+1, commandLine); // first pass: tokenize string and count number of arguments bool ignoreSpace = false; for(size_t i = 0; i < numChars; i++) { switch(argvContents[i]) { case '"': ignoreSpace = !ignoreSpace; // strip the " character memmove(argvContents+i, argvContents+i+1, numChars-i); numChars--; i--; break; case ' ': if(!ignoreSpace) { argvContents[i] = '\0'; wutil_argc++; } break; } } wutil_argc++; // have argv entries point into the tokenized string wutil_argv = (char**)HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, wutil_argc*sizeof(char*)); char* nextArg = argvContents; for(int i = 0; i < wutil_argc; i++) { wutil_argv[i] = nextArg; nextArg += strlen(nextArg)+1; } } static void FreeCommandLine() { HeapFree(GetProcessHeap(), 0, wutil_argv); HeapFree(GetProcessHeap(), 0, argvContents); } bool wutil_HasCommandLineArgument(const char* arg) { for(int i = 0; i < wutil_argc; i++) { if(!strcmp(wutil_argv[i], arg)) return true; } return false; } //----------------------------------------------------------------------------- // directories char win_sys_dir[MAX_PATH+1]; char win_exe_dir[MAX_PATH+1]; static void GetDirectories() { GetSystemDirectory(win_sys_dir, sizeof(win_sys_dir)); const DWORD len = GetModuleFileName(GetModuleHandle(0), win_exe_dir, MAX_PATH); debug_assert(len != 0); // strip EXE filename and trailing slash char* slash = strrchr(win_exe_dir, '\\'); if(slash) *slash = '\0'; else debug_assert(0); // directory name invalid?! } //----------------------------------------------------------------------------- // user32 fix // HACK: make sure a reference to user32 is held, even if someone // decides to delay-load it. this fixes bug #66, which was the // Win32 mouse cursor (set via user32!SetCursor) appearing as a // black 32x32(?) rectangle. underlying cause was as follows: // powrprof.dll was the first client of user32, causing it to be // loaded. after we were finished with powrprof, we freed it, in turn // causing user32 to unload. later code would then reload user32, // which apparently terminally confused the cursor implementation. // // since we hold a reference here, user32 will never unload. // of course, the benefits of delay-loading are lost for this DLL, // but that is unavoidable. it is safer to force loading it, rather // than documenting the problem and asking it not be delay-loaded. static HMODULE hUser32Dll; static void ForciblyLoadUser32Dll() { hUser32Dll = LoadLibrary("user32.dll"); } // avoids Boundschecker warning static void FreeUser32Dll() { FreeLibrary(hUser32Dll); } //----------------------------------------------------------------------------- // memory static void EnableLowFragmentationHeap() { #if WINVER >= 0x0501 const HMODULE hKernel32Dll = GetModuleHandle("kernel32.dll"); typedef BOOL (WINAPI* PHeapSetInformation)(HANDLE, HEAP_INFORMATION_CLASS, void*, size_t); PHeapSetInformation pHeapSetInformation = (PHeapSetInformation)GetProcAddress(hKernel32Dll, "HeapSetInformation"); if(!pHeapSetInformation) return; ULONG flags = 2; // enable LFH pHeapSetInformation(GetProcessHeap(), HeapCompatibilityInformation, &flags, sizeof(flags)); #endif // #if WINVER >= 0x0501 } //----------------------------------------------------------------------------- // version static char windowsVersionString[20]; static size_t windowsVersion; // see WUTIL_VERSION_* static void DetectWindowsVersion() { // note: don't use GetVersion[Ex] because it gives the version of the // emulated OS when running an app with compatibility shims enabled. HKEY hKey; if(RegOpenKeyEx(HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion", 0, KEY_QUERY_VALUE, &hKey) == ERROR_SUCCESS) { DWORD size = ARRAY_SIZE(windowsVersionString); (void)RegQueryValueEx(hKey, "CurrentVersion", 0, 0, (LPBYTE)windowsVersionString, &size); int major = 0, minor = 0; int ret = sscanf(windowsVersionString, "%d.%d", &major, &minor); debug_assert(ret == 2); debug_assert(major <= 0xFF && minor <= 0xFF); windowsVersion = (major << 8) | minor; RegCloseKey(hKey); } else debug_assert(0); } const char* wutil_WindowsFamily() { debug_assert(windowsVersion != 0); switch(windowsVersion) { case WUTIL_VERSION_2K: return "Win2k"; case WUTIL_VERSION_XP: return "WinXP"; case WUTIL_VERSION_XP64: return "WinXP64"; case WUTIL_VERSION_VISTA: return "Vista"; default: return "Windows"; } } const char* wutil_WindowsVersionString() { debug_assert(windowsVersionString[0] != '\0'); return windowsVersionString; } size_t wutil_WindowsVersion() { debug_assert(windowsVersion != 0); return windowsVersion; } //----------------------------------------------------------------------------- // Wow64 // Wow64 'helpfully' redirects all 32-bit apps' accesses of // %windir%\\system32\\drivers to %windir%\\system32\\drivers\\SysWOW64. // that's bad, because the actual drivers are not in the subdirectory. to // work around this, provide for temporarily disabling redirection. typedef BOOL (WINAPI *PIsWow64Process)(HANDLE, PBOOL); typedef BOOL (WINAPI *PWow64DisableWow64FsRedirection)(PVOID*); typedef BOOL (WINAPI *PWow64RevertWow64FsRedirection)(PVOID); static PIsWow64Process pIsWow64Process; static PWow64DisableWow64FsRedirection pWow64DisableWow64FsRedirection; static PWow64RevertWow64FsRedirection pWow64RevertWow64FsRedirection; static bool isWow64; static void ImportWow64Functions() { const HMODULE hKernel32Dll = GetModuleHandle("kernel32.dll"); pIsWow64Process = (PIsWow64Process)GetProcAddress(hKernel32Dll, "IsWow64Process"); pWow64DisableWow64FsRedirection = (PWow64DisableWow64FsRedirection)GetProcAddress(hKernel32Dll, "Wow64DisableWow64FsRedirection"); pWow64RevertWow64FsRedirection = (PWow64RevertWow64FsRedirection)GetProcAddress(hKernel32Dll, "Wow64RevertWow64FsRedirection"); } static void DetectWow64() { // function not found => running on 32-bit Windows if(!pIsWow64Process) { isWow64 = false; return; } BOOL isWow64Process = FALSE; const BOOL ok = pIsWow64Process(GetCurrentProcess(), &isWow64Process); WARN_IF_FALSE(ok); isWow64 = (isWow64Process == TRUE); } bool wutil_IsWow64() { return isWow64; } WinScopedDisableWow64Redirection::WinScopedDisableWow64Redirection() { // note: don't just check if the function pointers are valid. 32-bit // Vista includes them but isn't running Wow64, so calling the functions // would fail. since we have to check if actually on Wow64, there's no // more need to verify the pointers (their existence is implied). if(!wutil_IsWow64()) return; BOOL ok = pWow64DisableWow64FsRedirection(&m_wasRedirectionEnabled); WARN_IF_FALSE(ok); } WinScopedDisableWow64Redirection::~WinScopedDisableWow64Redirection() { if(!wutil_IsWow64()) return; BOOL ok = pWow64RevertWow64FsRedirection(m_wasRedirectionEnabled); WARN_IF_FALSE(ok); } //----------------------------------------------------------------------------- // module handle #ifndef LIB_STATIC_LINK HMODULE wutil_LibModuleHandle; -BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD reason, LPVOID reserved) +BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD UNUSED(reason), LPVOID UNUSED(reserved)) { DisableThreadLibraryCalls(hInstance); wutil_LibModuleHandle = hInstance; return TRUE; // success (ignored unless reason == DLL_PROCESS_ATTACH) } #else HMODULE wutil_LibModuleHandle = GetModuleHandle(0); #endif //----------------------------------------------------------------------------- // find main window // this is required by the error dialog and clipboard code. // note that calling from wutil_Init won't work, because the app will not // have created its window by then. static HWND hAppWindow; static BOOL CALLBACK FindAppWindowByPid(HWND hWnd, LPARAM UNUSED(lParam)) { DWORD pid; const DWORD tid = GetWindowThreadProcessId(hWnd, &pid); UNUSED2(tid); // the function can't fail if(pid == GetCurrentProcessId()) { hAppWindow = hWnd; return FALSE; // done } return TRUE; // keep calling } HWND wutil_AppWindow() { if(!hAppWindow) { const DWORD ret = EnumWindows(FindAppWindowByPid, 0); // the callback returns FALSE when it has found the window // (so as not to waste time); EnumWindows then returns 0. // we therefore cannot check for errors. UNUSED2(ret); } return hAppWindow; } //----------------------------------------------------------------------------- static LibError wutil_Init() { InitLocks(); ForciblyLoadUser32Dll(); EnableLowFragmentationHeap(); ReadCommandLine(); GetDirectories(); DetectWindowsVersion(); ImportWow64Functions(); DetectWow64(); return INFO::OK; } static LibError wutil_Shutdown() { FreeCommandLine(); FreeUser32Dll(); ShutdownLocks(); return INFO::OK; } Index: ps/trunk/source/lib/sysdep/os/win/wdbg_sym.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wdbg_sym.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/os/win/wdbg_sym.cpp (revision 6536) @@ -1,1871 +1,1871 @@ /** * ========================================================================= * File : wdbg_sym.cpp * Project : 0 A.D. * Description : Win32 stack trace and symbol engine. * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "wdbg_sym.h" #include #include #include #include "lib/byte_order.h" #include "lib/sysdep/cpu.h" #include "lib/debug_stl.h" #include "lib/app_hooks.h" #include "lib/os_path.h" #include "lib/path_util.h" #if ARCH_IA32 # include "lib/sysdep/arch/ia32/ia32.h" # include "lib/sysdep/arch/ia32/ia32_asm.h" #endif #include "lib/external_libraries/dbghelp.h" #include "winit.h" #include "wdbg.h" #include "wutil.h" WINIT_REGISTER_CRITICAL_INIT(wdbg_sym_Init); //---------------------------------------------------------------------------- // dbghelp //---------------------------------------------------------------------------- // passed to all dbghelp symbol query functions. we're not interested in // resolving symbols in other processes; the purpose here is only to // generate a stack trace. if that changes, we need to init a local copy // of these in dump_sym_cb and pass them to all subsequent dump_*. static HANDLE hProcess; static uintptr_t mod_base; // for StackWalk64; taken from PE header by wdbg_init. static WORD machine; // call on-demand (allows handling exceptions raised before winit.cpp // init functions are called); no effect if already initialized. static LibError sym_init() { // bail if already initialized (there's nothing to do). // don't use pthread_once because we need to return success/error code. static uintptr_t already_initialized = 0; if(!cpu_CAS(&already_initialized, 0, 1)) return INFO::OK; hProcess = GetCurrentProcess(); // set options // notes: // - can be done before SymInitialize; we do so in case // any of the options affect it. // - do not set directly - that would zero any existing flags. DWORD opts = SymGetOptions(); opts |= SYMOPT_DEFERRED_LOADS; // the "fastest, most efficient way" //opts |= SYMOPT_DEBUG; // lots of debug spew in output window opts |= SYMOPT_UNDNAME; SymSetOptions(opts); // initialize dbghelp. // .. request symbols from all currently active modules be loaded. const BOOL fInvadeProcess = TRUE; // .. use default *symbol* search path. we don't use this to locate // our PDB file because its absolute path is stored inside the EXE. PCSTR UserSearchPath = 0; BOOL ok = SymInitialize(hProcess, UserSearchPath, fInvadeProcess); WARN_IF_FALSE(ok); mod_base = SymGetModuleBase64(hProcess, (u64)&sym_init); IMAGE_NT_HEADERS* header = ImageNtHeader((void*)(uintptr_t)mod_base); machine = header->FileHeader.Machine; return INFO::OK; } struct SYMBOL_INFO_PACKAGEW2 : public SYMBOL_INFO_PACKAGEW { SYMBOL_INFO_PACKAGEW2() { si.SizeOfStruct = sizeof(si); si.MaxNameLen = MAX_SYM_NAME; } }; #pragma pack(push, 1) // note: we can't derive from TI_FINDCHILDREN_PARAMS because its members // aren't guaranteed to precede ours (although they do in practice). struct TI_FINDCHILDREN_PARAMS2 { TI_FINDCHILDREN_PARAMS2(DWORD num_children) { p.Start = 0; p.Count = std::min(num_children, MAX_CHILDREN); } static const DWORD MAX_CHILDREN = 300; TI_FINDCHILDREN_PARAMS p; DWORD additional_children[MAX_CHILDREN-1]; }; #pragma pack(pop) // actual implementation; made available so that functions already under // the lock don't have to unlock (slow) to avoid recursive locking. static LibError ResolveSymbol_lk(void* ptr_of_interest, char* sym_name, char* file, int* line) { sym_init(); const DWORD64 addr = (DWORD64)ptr_of_interest; int successes = 0; // get symbol name (if requested) if(sym_name) { sym_name[0] = '\0'; SYMBOL_INFO_PACKAGEW2 sp; SYMBOL_INFOW* sym = &sp.si; if(SymFromAddrW(hProcess, addr, 0, sym)) { wsprintfA(sym_name, "%ws", sym->Name); successes++; } } // get source file and/or line number (if requested) if(file || line) { file[0] = '\0'; *line = 0; IMAGEHLP_LINE64 line_info = { sizeof(IMAGEHLP_LINE64) }; DWORD displacement; // unused but required by SymGetLineFromAddr64! if(SymGetLineFromAddr64(hProcess, addr, &displacement, &line_info)) { if(file) { // strip full path down to base name only. // this loses information, but that isn't expected to be a // problem and is balanced by not having to do this from every // call site (full path is too long to display nicely). const char* base_name = path_name_only(line_info.FileName); wsprintf(file, "%s", base_name); successes++; } if(line) { *line = line_info.LineNumber; successes++; } } } return (successes != 0)? INFO::OK : ERR::FAIL; } // read and return symbol information for the given address. all of the // output parameters are optional; we pass back as much information as is // available and desired. return 0 iff any information was successfully // retrieved and stored. // sym_name and file must hold at least the number of chars above; // file is the base name only, not path (see rationale in wdbg_sym). // the PDB implementation is rather slow (~500µs). LibError debug_ResolveSymbol(void* ptr_of_interest, char* sym_name, char* file, int* line) { WinScopedLock lock(WDBG_SYM_CS); return ResolveSymbol_lk(ptr_of_interest, sym_name, file, line); } //---------------------------------------------------------------------------- // stack walk //---------------------------------------------------------------------------- /* Subroutine linkage example code: push param2 push param1 call func ret_addr: [..] func: push ebp mov ebp, esp sub esp, local_size [..] Stack contents (down = decreasing address) [param2] [param1] ret_addr prev_ebp (<- current ebp points at this value) [local_variables] */ /* call func1 ret1: func1: push ebp mov ebp, esp call func2 ret2: func2: push ebp mov ebp, esp STARTHERE */ #if ARCH_IA32 && !CONFIG_OMIT_FP # define IA32_STACK_WALK_ENABLED 1 #else # define IA32_STACK_WALK_ENABLED 0 #endif #if IA32_STACK_WALK_ENABLED static LibError ia32_walk_stack(_tagSTACKFRAME64* sf) { // read previous values from _tagSTACKFRAME64 void* prev_fp = (void*)(uintptr_t)sf->AddrFrame .Offset; void* prev_ip = (void*)(uintptr_t)sf->AddrPC .Offset; void* prev_ret = (void*)(uintptr_t)sf->AddrReturn.Offset; if(!debug_IsStackPointer(prev_fp)) WARN_RETURN(ERR::_11); if(prev_ip && !debug_IsCodePointer(prev_ip)) WARN_RETURN(ERR::_12); if(prev_ret && !debug_IsCodePointer(prev_ret)) WARN_RETURN(ERR::_13); // read stack frame void* fp = ((void**)prev_fp)[0]; void* ret_addr = ((void**)prev_fp)[1]; if(!fp) return INFO::ALL_COMPLETE; if(!debug_IsStackPointer(fp)) WARN_RETURN(ERR::_14); if(!debug_IsCodePointer(ret_addr)) return ERR::FAIL; // NOWARN (invalid address) void* target; LibError err = ia32_GetCallTarget(ret_addr, &target); RETURN_ERR(err); if(target) // were able to determine it from the call instruction { if(!debug_IsCodePointer(target)) return ERR::FAIL; // NOWARN (invalid address) } sf->AddrFrame .Offset = (DWORD64)fp; sf->AddrPC .Offset = (DWORD64)target; sf->AddrReturn.Offset = (DWORD64)ret_addr; return INFO::OK; } #endif // note: RtlCaptureStackBackTrace (http://msinilo.pl/blog/?p=40) // is likely to be much faster than StackWalk64 (especially relevant // for debug_GetCaller), but wasn't known during development and // remains undocumented. typedef VOID (WINAPI *PRtlCaptureContext)(PCONTEXT); static PRtlCaptureContext s_RtlCaptureContext; LibError wdbg_sym_WalkStack(StackFrameCallback cb, uintptr_t cbData, const CONTEXT* pcontext, const char* lastFuncToSkip) { // to function properly, StackWalk64 requires a CONTEXT on // non-x86 systems (documented) or when in release mode (observed). // exception handlers can call wdbg_sym_WalkStack with their context record; // otherwise (e.g. dump_stack from debug_assert), we need to query it. CONTEXT context; // .. caller knows the context (most likely from an exception); // since StackWalk64 may modify it, copy to a local variable. if(pcontext) context = *pcontext; // .. need to determine context ourselves. else { // there are 4 ways to do so, in order of preference: // - asm (easy to use but currently only implemented on IA32) // - RtlCaptureContext (only available on WinXP or above) // - intentionally raise an SEH exception and capture its context // (causes annoying "first chance exception" messages and // can't co-exist with WinScopedLock's destructor) // - GetThreadContext while suspended (a bit tricky + slow). // note: it used to be common practice to query the current thread // context, but WinXP SP2 and above require it be suspended. // // this MUST be done inline and not in an external function because // compiler-generated prolog code trashes some registers. #if ARCH_IA32 ia32_asm_GetCurrentContext(&context); #else if(!s_RtlCaptureContext) return ERR::NOT_SUPPORTED; // NOWARN memset(&context, 0, sizeof(context)); context.ContextFlags = CONTEXT_CONTROL|CONTEXT_INTEGER; s_RtlCaptureContext(&context); #endif } pcontext = &context; _tagSTACKFRAME64 sf; memset(&sf, 0, sizeof(sf)); sf.AddrPC.Mode = AddrModeFlat; sf.AddrFrame.Mode = AddrModeFlat; sf.AddrStack.Mode = AddrModeFlat; #if ARCH_AMD64 sf.AddrPC.Offset = pcontext->Rip; sf.AddrFrame.Offset = pcontext->Rbp; sf.AddrStack.Offset = pcontext->Rsp; #else sf.AddrPC.Offset = pcontext->Eip; sf.AddrFrame.Offset = pcontext->Ebp; sf.AddrStack.Offset = pcontext->Esp; #endif #if !IA32_STACK_WALK_ENABLED sym_init(); #endif // for each stack frame found: LibError ret = ERR::SYM_NO_STACK_FRAMES_FOUND; for(;;) { // rationale: // - provide a separate ia32 implementation so that simple // stack walks (e.g. to determine callers of malloc) do not // require firing up dbghelp. that takes tens of seconds when // OS symbols are installed (because symserv is wanting to access // the internet), which is entirely unacceptable. // - VC7.1 sometimes generates stack frames despite /Oy ; // ia32_walk_stack may appear to work, but it isn't reliable in // this case and therefore must not be used! // - don't switch between ia32_stack_walk and StackWalk64 when one // of them fails: this needlessly complicates things. the ia32 // code is authoritative provided its prerequisite (FP not omitted) // is met, otherwise totally unusable. LibError err; #if IA32_STACK_WALK_ENABLED err = ia32_walk_stack(&sf); #else { WinScopedLock lock(WDBG_SYM_CS); // note: unfortunately StackWalk64 doesn't always SetLastError, // so we have to reset it and check for 0. *sigh* SetLastError(0); const HANDLE hThread = GetCurrentThread(); const BOOL ok = StackWalk64(machine, hProcess, hThread, &sf, (PVOID)pcontext, 0, SymFunctionTableAccess64, SymGetModuleBase64, 0); // note: don't use LibError_from_win32 because it raises a warning, // and this "fails" commonly (when no stack frames are left). err = ok? INFO::OK : ERR::FAIL; } #endif // no more frames found - abort. note: also test FP because // StackWalk64 sometimes erroneously reports success. void* const fp = (void*)(uintptr_t)sf.AddrFrame.Offset; if(err != INFO::OK || !fp) return ret; if(lastFuncToSkip) { void* const pc = (void*)(uintptr_t)sf.AddrPC.Offset; char func[DBG_SYMBOL_LEN]; err = debug_ResolveSymbol(pc, func, 0, 0); if(err == INFO::OK) { if(strstr(func, lastFuncToSkip)) lastFuncToSkip = 0; continue; } } ret = cb(&sf, cbData); // callback is allowing us to continue if(ret == INFO::CB_CONTINUE) ret = INFO::OK; // callback reports it's done; stop calling it and return that value. // (can be either success or failure) else { debug_assert(ret <= 0); // shouldn't return > 0 return ret; } } } // // get address of Nth function above us on the call stack (uses wdbg_sym_WalkStack) // // called by wdbg_sym_WalkStack for each stack frame static LibError nth_caller_cb(const _tagSTACKFRAME64* sf, uintptr_t cbData) { void** pfunc = (void**)cbData; // return its address *pfunc = (void*)(uintptr_t)sf->AddrPC.Offset; return INFO::OK; } void* debug_GetCaller(void* pcontext, const char* lastFuncToSkip) { void* func; LibError ret = wdbg_sym_WalkStack(nth_caller_cb, (uintptr_t)&func, (const CONTEXT*)pcontext, lastFuncToSkip); return (ret == INFO::OK)? func : 0; } //----------------------------------------------------------------------------- // helper routines for symbol value dump //----------------------------------------------------------------------------- // infinite recursion has never happened, but we check for it anyway. static const size_t MAX_INDIRECTION = 255; static const size_t MAX_LEVEL = 255; struct DumpState { size_t level; size_t indirection; DumpState() { level = 0; indirection = 0; } }; //---------------------------------------------------------------------------- static size_t out_chars_left; static wchar_t* out_pos; // (only warn once until next out_init to avoid flood of messages.) static bool out_have_warned_of_overflow; // some top-level (*) symbols cause tons of output - so much that they may // single-handedly overflow the buffer (e.g. pointer to a tree of huge UDTs). // we can't have that, so there is a limit in place as to how much a // single top-level symbol can output. after that is reached, dumping is // aborted for that symbol but continues for the subsequent top-level symbols. // // this is implemented as follows: dump_sym_cb latches the current output // position; each dump_sym (through which all symbols go) checks if the // new position exceeds the limit and aborts if so. // slight wrinkle: since we don't want each level of UDTs to successively // realize the limit has been hit and display the error message, we // return ERR::SYM_SINGLE_SYMBOL_LIMIT once and thereafter INFO::SYM_SUPPRESS_OUTPUT. // // * example: local variables, as opposed to child symbols in a UDT. static wchar_t* out_latched_pos; static bool out_have_warned_of_limit; static void out_init(wchar_t* buf, size_t max_chars) { out_pos = buf; out_chars_left = max_chars; out_have_warned_of_overflow = false; out_have_warned_of_limit = false; } static void out(const wchar_t* fmt, ...) { va_list args; va_start(args, fmt); int len = vswprintf(out_pos, out_chars_left, fmt, args); va_end(args); // success if(len >= 0) { out_pos += len; // make sure out_chars_left remains nonnegative if((size_t)len > out_chars_left) { debug_assert(0); // apparently wrote more than out_chars_left len = (int)out_chars_left; } out_chars_left -= len; } // no more room left else { // the buffer really is full yet out_chars_left may not be 0 // (since it isn't updated if vswprintf returns -1). // must be set so subsequent calls don't try to squeeze stuff in. out_chars_left = 0; // write a warning into the output buffer (once) so it isn't // abruptly cut off (which looks like an error) if(!out_have_warned_of_overflow) { out_have_warned_of_overflow = true; // with the current out_pos / out_chars_left variables, there's // no way of knowing where the buffer actually ends. no matter; // we'll just put the warning before out_pos and eat into the // second newest text. const wchar_t text[] = L"(no more room in buffer)"; wcscpy(out_pos-ARRAY_SIZE(text), text); // safe } } } static void out_erase(size_t num_chars) { // don't do anything if end of buffer was hit (prevents repeatedly // scribbling over the last few bytes). if(out_have_warned_of_overflow) return; out_chars_left += (ssize_t)num_chars; out_pos -= num_chars; *out_pos = '\0'; // make sure it's 0-terminated in case there is no further output. } // (see above) static void out_latch_pos() { out_have_warned_of_limit = false; out_latched_pos = out_pos; } // (see above) static LibError out_check_limit() { if(out_have_warned_of_limit) return INFO::SYM_SUPPRESS_OUTPUT; if(out_pos - out_latched_pos > 3000) // ~30 lines { out_have_warned_of_limit = true; return ERR::SYM_SINGLE_SYMBOL_LIMIT; // NOWARN } // no limit hit, proceed normally return INFO::OK; } //---------------------------------------------------------------------------- -#define INDENT STMT(for(size_t i = 0; i <= state.level; i++) out(L" ");) +#define INDENT STMT(for(size_t i__ = 0; i__ <= state.level; i__++) out(L" ");) #define UNINDENT STMT(out_erase((state.level+1)*4);) // does it look like an ASCII string is located at ? // set to 2 to search for WCS-2 strings (of western characters!). // called by dump_sequence for its string special-case. // // algorithm: scan the "string" and count # text chars vs. garbage. static bool is_string(const u8* p, size_t stride) { // note: access violations are caught by dump_sym; output is "?". int score = 0; for(;;) { // current character is: const int c = *p & 0xff; // prevent sign extension p += stride; // .. text if(isalnum(c)) score += 5; // .. end of string else if(!c) break; // .. garbage else if(!isprint(c)) score -= 4; // got enough information either way => done. // (we don't want to unnecessarily scan huge binary arrays) if(abs(score) >= 10) break; } return (score > 0); } // forward decl; called by dump_sequence and some of dump_sym_*. static LibError dump_sym(DWORD id, const u8* p, DumpState state); // from cvconst.h // // rationale: we don't provide a get_register routine, since only the // value of FP is known to dump_frame_cb (via _tagSTACKFRAME64). // displaying variables stored in registers is out of the question; // all we can do is display FP-relative variables. enum CV_HREG_e { CV_REG_EBP = 22, CV_AMD64_RSP = 335 }; static void dump_error(LibError err) { switch(err) { case 0: // no error => no output break; case ERR::SYM_SINGLE_SYMBOL_LIMIT: out(L"(too much output; skipping to next top-level symbol)"); break; case ERR::SYM_UNRETRIEVABLE_STATIC: out(L"(unavailable - located in another module)"); break; case ERR::SYM_UNRETRIEVABLE: out(L"(unavailable)"); break; case ERR::SYM_TYPE_INFO_UNAVAILABLE: out(L"(unavailable - type info request failed (GLE=%d))", GetLastError()); break; case ERR::SYM_INTERNAL_ERROR: out(L"(unavailable - internal error)\r\n"); break; case INFO::SYM_SUPPRESS_OUTPUT: // not an error; do not output anything. handled by caller. break; default: out(L"(unavailable - unspecified error 0x%X (%d))", err, err); break; } } // split out of dump_sequence. static LibError dump_string(const u8* p, size_t el_size) { // not char or wchar_t string if(el_size != sizeof(char) && el_size != sizeof(wchar_t)) return INFO::CANNOT_HANDLE; // not text if(!is_string(p, el_size)) return INFO::CANNOT_HANDLE; wchar_t buf[512]; if(el_size == sizeof(wchar_t)) wcscpy_s(buf, ARRAY_SIZE(buf), (const wchar_t*)p); // convert to wchar_t else { size_t i; for(i = 0; i < ARRAY_SIZE(buf)-1; i++) { buf[i] = (wchar_t)p[i]; if(buf[i] == '\0') break; } buf[i] = '\0'; } out(L"\"%s\"", buf); return INFO::OK; } // split out of dump_sequence. static void seq_determine_formatting(size_t el_size, size_t el_count, bool* fits_on_one_line, size_t* num_elements_to_show) { if(el_size == sizeof(char)) { *fits_on_one_line = el_count <= 16; *num_elements_to_show = std::min((size_t)16u, el_count); } else if(el_size <= sizeof(int)) { *fits_on_one_line = el_count <= 8; *num_elements_to_show = std::min((size_t)12u, el_count); } else { *fits_on_one_line = false; *num_elements_to_show = std::min((size_t)8u, el_count); } // make sure empty containers are displayed with [0] {}, otherwise // the lack of output looks like an error. if(!el_count) *fits_on_one_line = true; } static LibError dump_sequence(DebugStlIterator el_iterator, void* internal, size_t el_count, DWORD el_type_id, size_t el_size, DumpState state) { const u8* el_p = 0; // avoid "uninitialized" warning // special case: display as a string if the sequence looks to be text. // do this only if container isn't empty because the otherwise the // iterator may crash. if(el_count) { el_p = el_iterator(internal, el_size); LibError ret = dump_string(el_p, el_size); if(ret == INFO::OK) return ret; } // choose formatting based on element size and count bool fits_on_one_line; size_t num_elements_to_show; seq_determine_formatting(el_size, el_count, &fits_on_one_line, &num_elements_to_show); out(L"[%d] ", el_count); state.level++; out(fits_on_one_line? L"{ " : L"\r\n"); for(size_t i = 0; i < num_elements_to_show; i++) { if(!fits_on_one_line) INDENT; LibError err = dump_sym(el_type_id, el_p, state); el_p = el_iterator(internal, el_size); // there was no output for this child; undo its indentation (if any), // skip everything below and proceed with the next child. if(err == INFO::SYM_SUPPRESS_OUTPUT) { if(!fits_on_one_line) UNINDENT; continue; } dump_error(err); // nop if err == INFO::OK // add separator unless this is the last element (can't just // erase below due to additional "..."). if(i != num_elements_to_show-1) out(fits_on_one_line? L", " : L"\r\n"); if(err == ERR::SYM_SINGLE_SYMBOL_LIMIT) break; } // for each child // indicate some elements were skipped if(el_count != num_elements_to_show) out(L" ..."); state.level--; if(fits_on_one_line) out(L" }"); return INFO::OK; } static const u8* array_iterator(void* internal, size_t el_size) { const u8*& pos = *(const u8**)internal; const u8* cur_pos = pos; pos += el_size; return cur_pos; } static LibError dump_array(const u8* p, size_t el_count, DWORD el_type_id, size_t el_size, DumpState state) { const u8* iterator_internal_pos = p; return dump_sequence(array_iterator, &iterator_internal_pos, el_count, el_type_id, el_size, state); } static const _tagSTACKFRAME64* current_stackframe64; static LibError CanHandleDataKind(DWORD dataKind) { switch(dataKind) { case DataIsMember: // address is already correct (udt_dump_normal retrieved the offset; // we do it that way so we can check it against the total // UDT size for safety) and SymFromIndex would fail return INFO::SKIPPED; case DataIsUnknown: WARN_RETURN(ERR::FAIL); case DataIsStaticMember: // this symbol is defined as static in another module => // there's nothing we can do. return ERR::SYM_UNRETRIEVABLE_STATIC; // NOWARN case DataIsLocal: case DataIsStaticLocal: case DataIsParam: case DataIsObjectPtr: case DataIsFileStatic: case DataIsGlobal: case DataIsConstant: // ok, can handle return INFO::OK; } WARN_RETURN(ERR::LOGIC); // UNREACHABLE } static bool IsRelativeToFramePointer(DWORD flags, DWORD reg) { if(flags & SYMFLAG_FRAMEREL) // note: this is apparently obsolete return true; if((flags & SYMFLAG_REGREL) == 0) return false; if(reg == CV_REG_EBP || reg == CV_AMD64_RSP) return true; return false; } static bool IsUnretrievable(DWORD flags) { // note: it is unlikely that the crashdump register context // contains the correct values for this scope, so symbols // stored in or relative to a general register are unavailable. if(flags & SYMFLAG_REGISTER) return true; // note: IsRelativeToFramePointer is called first, so if we still // see this flag, the base register is not the frame pointer. // since we most probably don't know its value in the current // scope (see above), the symbol is inaccessible. if(flags & SYMFLAG_REGREL) return true; return false; } static LibError DetermineSymbolAddress(DWORD id, const SYMBOL_INFOW* sym, const u8** pp) { const _tagSTACKFRAME64* sf = current_stackframe64; DWORD dataKind; if(!SymGetTypeInfo(hProcess, mod_base, id, TI_GET_DATAKIND, &dataKind)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); LibError ret = CanHandleDataKind(dataKind); RETURN_ERR(ret); if(ret == INFO::SKIPPED) return INFO::OK; // pp is already correct // note: we have not yet observed a non-zero TI_GET_ADDRESSOFFSET or // TI_GET_ADDRESS, and TI_GET_OFFSET is apparently equal to sym->Address. // get address uintptr_t addr = sym->Address; if(IsRelativeToFramePointer(sym->Flags, sym->Register)) { #if ARCH_AMD64 addr += sf->AddrStack.Offset; #else addr += sf->AddrFrame.Offset; # if defined(NDEBUG) // NB: the addresses of register-relative symbols are apparently // incorrect [VC8, 32-bit Wow64]. the problem occurs regardless of // IA32_STACK_WALK_ENABLED and with both ia32_asm_GetCurrentContext // and RtlCaptureContext. the EBP, ESP and EIP values returned by // ia32_asm_GetCurrentContext match those reported by the IDE, so // the problem appears to lie in the offset values stored in the PDB. if(sym->Flags & SYMFLAG_PARAMETER) addr += sizeof(void*); else addr += sizeof(void*) * 2; # endif #endif } else if(IsUnretrievable(sym->Flags)) return ERR::SYM_UNRETRIEVABLE; // NOWARN *pp = (const u8*)(uintptr_t)addr; debug_printf("SYM| %ws at %p flags=%X dk=%d sym->addr=%I64X fp=%I64x\n", sym->Name, *pp, sym->Flags, dataKind, sym->Address, sf->AddrFrame.Offset); return INFO::OK; } //----------------------------------------------------------------------------- // dump routines for each dbghelp symbol type //----------------------------------------------------------------------------- // these functions return != 0 if they're not able to produce any // reasonable output at all; the caller (dump_sym_data, dump_sequence, etc.) // will display the appropriate error message via dump_error. // called by dump_sym; lock is held. static LibError dump_sym_array(DWORD type_id, const u8* p, DumpState state) { ULONG64 size_ = 0; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); const size_t size = (size_t)size_; // get element count and size DWORD el_type_id = 0; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_TYPEID, &el_type_id)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); // .. workaround: TI_GET_COUNT returns total struct size for // arrays-of-struct. therefore, calculate as size / el_size. ULONG64 el_size_; if(!SymGetTypeInfo(hProcess, mod_base, el_type_id, TI_GET_LENGTH, &el_size_)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); const size_t el_size = (size_t)el_size_; debug_assert(el_size != 0); const size_t num_elements = size/el_size; debug_assert(num_elements != 0); return dump_array(p, num_elements, el_type_id, el_size, state); } //----------------------------------------------------------------------------- // if the current value is a printable character, display in that form. // this isn't only done in btChar because characters are sometimes stored // in integers. static void AppendCharacterIfPrintable(u64 data) { if(data < 0x100) { int c = (int)data; if(isprint(c)) out(L" ('%hc')", c); } } static LibError dump_sym_base_type(DWORD type_id, const u8* p, DumpState state) { DWORD base_type; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_BASETYPE, &base_type)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); ULONG64 size_ = 0; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); const size_t size = (size_t)size_; // single out() call. note: we pass a single u64 for all sizes, // which will only work on little-endian systems. // must be declared before goto to avoid W4 warning. const wchar_t* fmt = L""; u64 data = movzx_le64(p, size); // if value is 0xCC..CC (uninitialized mem), we display as hex. // the output would otherwise be garbage; this makes it obvious. // note: be very careful to correctly handle size=0 (e.g. void*). for(size_t i = 0; i < size; i++) { if(p[i] != 0xCC) break; if(i == size-1) goto display_as_hex; } switch(base_type) { // floating-point case btFloat: if(size == sizeof(float)) { // NB: the C calling convention calls for float arguments to be // converted to double. passing `data' wouldn't work because it's // merely a zero-extended 32-bit representation of the float. float value; memcpy(&value, p, sizeof(value)); out(L"%f (0x%08I64X)", value, data); } else if(size == sizeof(double)) out(L"%g (0x%016I64X)", data, data); else debug_assert(0); // invalid float size break; // boolean case btBool: debug_assert(size == sizeof(bool)); if(data == 0 || data == 1) out(L"%hs", data? "true " : "false"); else out(L"(bool)0x%02I64X", data); break; // integers (displayed as decimal and hex) // note: 0x00000000 can get annoying (0 would be nicer), // but it indicates the variable size and makes for consistently // formatted structs/arrays. (0x1234 0 0x5678 is ugly) case btInt: case btLong: case btUInt: case btULong: display_as_hex: if(size == 1) { // _TUCHAR if(state.indirection) { state.indirection = 0; return dump_array(p, 8, type_id, size, state); } fmt = L"%I64d (0x%02I64X)"; } else if(size == 2) fmt = L"%I64d (0x%04I64X)"; else if(size == 4) fmt = L"%I64d (0x%08I64X)"; else if(size == 8) fmt = L"%I64d (0x%016I64X)"; else debug_assert(0); // invalid size for integers out(fmt, data, data); break; // character case btChar: case btWChar: debug_assert(size == sizeof(char) || size == sizeof(wchar_t)); // char*, wchar_t* if(state.indirection) { state.indirection = 0; return dump_array(p, 8, type_id, size, state); } out(L"%d", data); AppendCharacterIfPrintable(data); break; // note: void* is sometimes indicated as (pointer, btNoType). case btVoid: case btNoType: // void* - cannot display what it's pointing to (type unknown). if(state.indirection) { out_erase(4); // " -> " fmt = L""; } else debug_assert(0); // non-pointer btVoid or btNoType break; default: debug_assert(0); // unknown type break; // unsupported complex types case btBCD: case btCurrency: case btDate: case btVariant: case btComplex: case btBit: case btBSTR: case btHresult: return ERR::SYM_UNSUPPORTED; // NOWARN } return INFO::OK; } //----------------------------------------------------------------------------- static LibError dump_sym_base_class(DWORD type_id, const u8* p, DumpState state) { DWORD base_class_type_id; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_TYPEID, &base_class_type_id)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); // this is a virtual base class. we can't display those because it'd // require reading the VTbl, which is difficult given lack of documentation // and just not worth it. DWORD vptr_ofs; if(SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_VIRTUALBASEPOINTEROFFSET, &vptr_ofs)) return ERR::SYM_UNSUPPORTED; // NOWARN return dump_sym(base_class_type_id, p, state); } //----------------------------------------------------------------------------- static LibError dump_sym_data(DWORD id, const u8* p, DumpState state) { SYMBOL_INFO_PACKAGEW2 sp; SYMBOL_INFOW* sym = &sp.si; if(!SymFromIndexW(hProcess, mod_base, id, sym)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); out(L"%ws = ", sym->Name); __try { RETURN_ERR(DetermineSymbolAddress(id, sym, &p)); // display value recursively return dump_sym(sym->TypeIndex, p, state); } __except(EXCEPTION_EXECUTE_HANDLER) { return ERR::SYM_INTERNAL_ERROR; // NOWARN } } //----------------------------------------------------------------------------- static LibError dump_sym_enum(DWORD type_id, const u8* p, DumpState UNUSED(state)) { ULONG64 size_ = 0; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); const size_t size = (size_t)size_; const i64 enum_value = movsx_le64(p, size); // get array of child symbols (enumerants). DWORD num_children; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_CHILDRENCOUNT, &num_children)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); TI_FINDCHILDREN_PARAMS2 fcp(num_children); if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_FINDCHILDREN, &fcp)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); num_children = fcp.p.Count; // was truncated to MAX_CHILDREN const DWORD* children = fcp.p.ChildId; // for each child (enumerant): for(size_t i = 0; i < num_children; i++) { DWORD child_data_id = children[i]; // get this enumerant's value. we can't make any assumptions about // the variant's type or size - no restriction is documented. // rationale: VariantChangeType is much less tedious than doing // it manually and guarantees we cover everything. the OLE DLL is // already pulled in by e.g. OpenGL anyway. VARIANT v; if(!SymGetTypeInfo(hProcess, mod_base, child_data_id, TI_GET_VALUE, &v)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); if(VariantChangeType(&v, &v, 0, VT_I8) != S_OK) continue; // it's the one we want - output its name. if(enum_value == v.llVal) { const wchar_t* name; if(!SymGetTypeInfo(hProcess, mod_base, child_data_id, TI_GET_SYMNAME, &name)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); out(L"%s", name); LocalFree((HLOCAL)name); return INFO::OK; } } // we weren't able to retrieve a matching enum value, but can still // produce reasonable output (the numeric value). // note: could goto here after a SGTI fails, but we fail instead // to make sure those errors are noticed. out(L"%I64d", enum_value); return INFO::OK; } //----------------------------------------------------------------------------- static LibError dump_sym_function(DWORD UNUSED(type_id), const u8* UNUSED(p), DumpState UNUSED(state)) { return INFO::SYM_SUPPRESS_OUTPUT; } //----------------------------------------------------------------------------- static LibError dump_sym_function_type(DWORD UNUSED(type_id), const u8* p, DumpState state) { // this symbol gives class parent, return type, and parameter count. // unfortunately the one thing we care about, its name, // isn't exposed via TI_GET_SYMNAME, so we resolve it ourselves. char name[DBG_SYMBOL_LEN]; LibError err = ResolveSymbol_lk((void*)p, name, 0, 0); if(state.indirection == 0) out(L"0x%p ", p); if(err == INFO::OK) out(L"(%hs)", name); return INFO::OK; } //----------------------------------------------------------------------------- // do not follow pointers that we have already displayed. this reduces // clutter a bit and prevents infinite recursion for cyclical references // (e.g. via struct S { S* p; } s; s.p = &s;) // note: allocating memory dynamically would cause trouble if dumping // the stack from within memory-related code (the allocation hook would // be reentered, which is not permissible). static const size_t maxVisited = 1000; static const u8* visited[maxVisited]; static size_t numVisited; static void ptr_reset_visited() { numVisited = 0; } static bool ptr_already_visited(const u8* p) { for(size_t i = 0; i < numVisited; i++) { if(visited[i] == p) return true; } if(numVisited < maxVisited) { visited[numVisited] = p; numVisited++; } // capacity exceeded else { // warn user - but only once (we can't use the regular // debug_DisplayError and wdbg_assert doesn't have a // suppress mechanism) static bool haveComplained; if(!haveComplained) { debug_printf("WARNING: ptr_already_visited: capacity exceeded, increase maxVisited\n"); debug_break(); haveComplained = true; } } return false; } static LibError dump_sym_pointer(DWORD type_id, const u8* p, DumpState state) { ULONG64 size_ = 0; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); const size_t size = (size_t)size_; // read+output pointer's value. p = (const u8*)(uintptr_t)movzx_le64(p, size); out(L"0x%p", p); // bail if it's obvious the pointer is bogus // (=> can't display what it's pointing to) if(debug_IsPointerBogus(p)) return INFO::OK; // avoid duplicates and circular references if(ptr_already_visited(p)) { out(L" (see above)"); return INFO::OK; } // display what the pointer is pointing to. // if the pointer is invalid (despite "bogus" check above), // dump_data_sym recovers via SEH and prints an error message. // if the pointed-to value turns out to uninteresting (e.g. void*), // the responsible dump_sym* will erase "->", leaving only address. out(L" -> "); if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_TYPEID, &type_id)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); // prevent infinite recursion just to be safe (shouldn't happen) if(state.indirection >= MAX_INDIRECTION) WARN_RETURN(ERR::SYM_NESTING_LIMIT); state.indirection++; return dump_sym(type_id, p, state); } //----------------------------------------------------------------------------- static LibError dump_sym_typedef(DWORD type_id, const u8* p, DumpState state) { if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_TYPEID, &type_id)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); return dump_sym(type_id, p, state); } //----------------------------------------------------------------------------- // determine type and size of the given child in a UDT. // useful for UDTs that contain typedefs describing their contents, // e.g. value_type in STL containers. static LibError udt_get_child_type(const wchar_t* child_name, ULONG num_children, const DWORD* children, DWORD* el_type_id, size_t* el_size) { const DWORD lastError = GetLastError(); *el_type_id = 0; *el_size = 0; for(ULONG i = 0; i < num_children; i++) { const DWORD child_id = children[i]; SYMBOL_INFO_PACKAGEW2 sp; SYMBOL_INFOW* sym = &sp.si; if(!SymFromIndexW(hProcess, mod_base, child_id, sym)) { // this happens for several UDTs; cause is unknown. debug_assert(GetLastError() == ERROR_NOT_FOUND); continue; } if(!wcscmp(sym->Name, child_name)) { *el_type_id = sym->TypeIndex; *el_size = (size_t)sym->Size; return INFO::OK; } } SetLastError(lastError); // (happens if called for containers that are treated as STL but are not) return ERR::SYM_CHILD_NOT_FOUND; // NOWARN } static LibError udt_dump_std(const wchar_t* wtype_name, const u8* p, size_t size, DumpState state, ULONG num_children, const DWORD* children) { LibError err; // not a C++ standard library object; can't handle it. if(wcsncmp(wtype_name, L"std::", 5) != 0) return INFO::CANNOT_HANDLE; // check for C++ objects that should be displayed via udt_dump_normal. // STL containers are special-cased and the rest (apart from those here) // are ignored, because for the most part they are spew. if(!wcsncmp(wtype_name, L"std::pair", 9)) return INFO::CANNOT_HANDLE; // convert to char since debug_stl doesn't support wchar_t. char ctype_name[DBG_SYMBOL_LEN]; sprintf_s(ctype_name, ARRAY_SIZE(ctype_name), "%ws", wtype_name); // display contents of STL containers // .. get element type DWORD el_type_id; size_t el_size; err = udt_get_child_type(L"value_type", num_children, children, &el_type_id, &el_size); if(err != INFO::OK) goto not_valid_container; // .. get iterator and # elements size_t el_count; DebugStlIterator el_iterator; u8 it_mem[DEBUG_STL_MAX_ITERATOR_SIZE]; err = debug_stl_get_container_info(ctype_name, p, size, el_size, &el_count, &el_iterator, it_mem); if(err != INFO::OK) goto not_valid_container; return dump_sequence(el_iterator, it_mem, el_count, el_type_id, el_size, state); not_valid_container: // build and display detailed "error" message. char buf[100]; const char* text; // .. object named std::* but doesn't include a "value_type" child => // it's a non-STL C++ stdlib object. wasn't handled by the // special case above, so we just display its simplified type name // (the contents are usually spew). if(err == ERR::SYM_CHILD_NOT_FOUND) text = ""; // .. not one of the containers we can analyse. if(err == ERR::STL_CNT_UNKNOWN) text = "unsupported "; // .. container of a known type but contents are invalid. if(err == ERR::STL_CNT_INVALID) text = "uninitialized/invalid "; // .. some other error encountered else { sprintf_s(buf, ARRAY_SIZE(buf), "error %d while analyzing ", err); text = buf; } out(L"(%hs%hs)", text, debug_stl_simplify_name(ctype_name)); return INFO::OK; } static bool udt_should_suppress(const wchar_t* type_name) { // specialized HANDLEs are defined as pointers to structs by // DECLARE_HANDLE. we only want the numerical value (pointer address), // so prevent these structs from being displayed. // note: no need to check for indirection; these are only found in // HANDLEs (which are pointers). // removed obsolete defs: HEVENT, HFILE, HUMPD if(type_name[0] != 'H') goto not_handle; #define SUPPRESS_HANDLE(name) if(!wcscmp(type_name, L#name L"__")) return true; SUPPRESS_HANDLE(HACCEL); SUPPRESS_HANDLE(HBITMAP); SUPPRESS_HANDLE(HBRUSH); SUPPRESS_HANDLE(HCOLORSPACE); SUPPRESS_HANDLE(HCURSOR); SUPPRESS_HANDLE(HDC); SUPPRESS_HANDLE(HENHMETAFILE); SUPPRESS_HANDLE(HFONT); SUPPRESS_HANDLE(HGDIOBJ); SUPPRESS_HANDLE(HGLOBAL); SUPPRESS_HANDLE(HGLRC); SUPPRESS_HANDLE(HHOOK); SUPPRESS_HANDLE(HICON); SUPPRESS_HANDLE(HIMAGELIST); SUPPRESS_HANDLE(HIMC); SUPPRESS_HANDLE(HINSTANCE); SUPPRESS_HANDLE(HKEY); SUPPRESS_HANDLE(HKL); SUPPRESS_HANDLE(HKLOCAL); SUPPRESS_HANDLE(HMENU); SUPPRESS_HANDLE(HMETAFILE); SUPPRESS_HANDLE(HMODULE); SUPPRESS_HANDLE(HMONITOR); SUPPRESS_HANDLE(HPALETTE); SUPPRESS_HANDLE(HPEN); SUPPRESS_HANDLE(HRGN); SUPPRESS_HANDLE(HRSRC); SUPPRESS_HANDLE(HSTR); SUPPRESS_HANDLE(HTASK); SUPPRESS_HANDLE(HWINEVENTHOOK); SUPPRESS_HANDLE(HWINSTA); SUPPRESS_HANDLE(HWND); not_handle: return false; } static LibError udt_dump_suppressed(const wchar_t* type_name, const u8* UNUSED(p), size_t UNUSED(size), DumpState state, ULONG UNUSED(num_children), const DWORD* UNUSED(children)) { if(!udt_should_suppress(type_name)) return INFO::CANNOT_HANDLE; // the data symbol is pointer-to-UDT. since we won't display its // contents, leave only the pointer's value. if(state.indirection) out_erase(4); // " -> " // indicate something was deliberately left out // (otherwise, lack of output may be taken for an error) out(L" (..)"); return INFO::OK; } // (by now) non-trivial heuristic to determine if a UDT should be // displayed on one line or several. split out of udt_dump_normal. static bool udt_fits_on_one_line(const wchar_t* type_name, size_t child_count, size_t total_size) { // special case: always put CStr* on one line // (std::*string are displayed directly, but these go through // udt_dump_normal. we want to avoid the ensuing 3-line output) if(!wcscmp(type_name, L"CStr") || !wcscmp(type_name, L"CStr8") || !wcscmp(type_name, L"CStrW")) return true; // try to get actual number of relevant children // (typedefs etc. are never displayed, but are included in child_count. // we have to balance that vs. tons of static members, which aren't // reflected in total_size). // .. prevent division by 0. if(child_count == 0) child_count = 1; // special-case a few types that would otherwise be classified incorrectly // (due to having more or less than expected relevant children) if(!wcsncmp(type_name, L"std::pair", 9)) child_count = 2; const size_t avg_size = total_size / child_count; // (if 0, no worries - child_count will probably be large and // we return false, which is a safe default) // small UDT with a few (small) members: fits on one line. if(child_count <= 3 && avg_size <= sizeof(int)) return true; return false; } static LibError udt_dump_normal(const wchar_t* type_name, const u8* p, size_t size, DumpState state, ULONG num_children, const DWORD* children) { const bool fits_on_one_line = udt_fits_on_one_line(type_name, num_children, size); // prevent infinite recursion just to be safe (shouldn't happen) if(state.level >= MAX_LEVEL) WARN_RETURN(ERR::SYM_NESTING_LIMIT); state.level++; out(fits_on_one_line? L"{ " : L"\r\n"); bool displayed_anything = false; for(ULONG i = 0; i < num_children; i++) { const DWORD child_id = children[i]; // get offset. if not available, skip this child // (we only display data here, not e.g. typedefs) DWORD ofs = 0; if(!SymGetTypeInfo(hProcess, mod_base, child_id, TI_GET_OFFSET, &ofs)) continue; if(ofs >= size) { debug_printf("INVALID_UDT %ws %d %d\n", type_name, ofs, size); } //debug_assert(ofs < size); if(!fits_on_one_line) INDENT; const u8* el_p = p+ofs; LibError err = dump_sym(child_id, el_p, state); // there was no output for this child; undo its indentation (if any), // skip everything below and proceed with the next child. if(err == INFO::SYM_SUPPRESS_OUTPUT) { if(!fits_on_one_line) UNINDENT; continue; } displayed_anything = true; dump_error(err); // nop if err == INFO::OK out(fits_on_one_line? L", " : L"\r\n"); if(err == ERR::SYM_SINGLE_SYMBOL_LIMIT) break; } // for each child state.level--; if(!displayed_anything) { out_erase(2); // "{ " or "\r\n" out(L"(%s)", type_name); return INFO::OK; } // remove trailing comma separator // note: we can't avoid writing it by checking if i == num_children-1: // each child might be the last valid data member. if(fits_on_one_line) { out_erase(2); // ", " out(L" }"); } return INFO::OK; } static LibError dump_sym_udt(DWORD type_id, const u8* p, DumpState state) { ULONG64 size_ = 0; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_LENGTH, &size_)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); const size_t size = (size_t)size_; // get array of child symbols (members/functions/base classes). DWORD num_children; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_CHILDRENCOUNT, &num_children)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); TI_FINDCHILDREN_PARAMS2 fcp(num_children); if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_FINDCHILDREN, &fcp)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); num_children = fcp.p.Count; // was truncated to MAX_CHILDREN const DWORD* children = fcp.p.ChildId; const wchar_t* type_name; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_SYMNAME, &type_name)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); LibError ret; // note: order is important (e.g. STL special-case must come before // suppressing UDTs, which tosses out most other C++ stdlib classes) ret = udt_dump_std (type_name, p, size, state, num_children, children); if(ret != INFO::CANNOT_HANDLE) goto done; ret = udt_dump_suppressed(type_name, p, size, state, num_children, children); if(ret != INFO::CANNOT_HANDLE) goto done; ret = udt_dump_normal (type_name, p, size, state, num_children, children); if(ret != INFO::CANNOT_HANDLE) goto done; done: LocalFree((HLOCAL)type_name); return ret; } //----------------------------------------------------------------------------- static LibError dump_sym_vtable(DWORD UNUSED(type_id), const u8* UNUSED(p), DumpState UNUSED(state)) { // unsupported (vtable internals are undocumented; too much work). return INFO::SYM_SUPPRESS_OUTPUT; } //----------------------------------------------------------------------------- static LibError dump_sym_unknown(DWORD type_id, const u8* UNUSED(p), DumpState UNUSED(state)) { // redundant (already done in dump_sym), but this is rare. DWORD type_tag; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_SYMTAG, &type_tag)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); debug_printf("SYM| unknown tag: %d\n", type_tag); out(L"(unknown symbol type)"); return INFO::OK; } //----------------------------------------------------------------------------- // write name and value of the symbol to the output buffer. // delegates to dump_sym_* depending on the symbol's tag. static LibError dump_sym(DWORD type_id, const u8* p, DumpState state) { RETURN_ERR(out_check_limit()); DWORD type_tag; if(!SymGetTypeInfo(hProcess, mod_base, type_id, TI_GET_SYMTAG, &type_tag)) WARN_RETURN(ERR::SYM_TYPE_INFO_UNAVAILABLE); switch(type_tag) { case SymTagArrayType: return dump_sym_array (type_id, p, state); case SymTagBaseType: return dump_sym_base_type (type_id, p, state); case SymTagBaseClass: return dump_sym_base_class (type_id, p, state); case SymTagData: return dump_sym_data (type_id, p, state); case SymTagEnum: return dump_sym_enum (type_id, p, state); case SymTagFunction: return dump_sym_function (type_id, p, state); case SymTagFunctionType: return dump_sym_function_type (type_id, p, state); case SymTagPointerType: return dump_sym_pointer (type_id, p, state); case SymTagTypedef: return dump_sym_typedef (type_id, p, state); case SymTagUDT: return dump_sym_udt (type_id, p, state); case SymTagVTable: return dump_sym_vtable (type_id, p, state); default: return dump_sym_unknown (type_id, p, state); } } //----------------------------------------------------------------------------- // stack trace //----------------------------------------------------------------------------- struct IMAGEHLP_STACK_FRAME2 : public IMAGEHLP_STACK_FRAME { IMAGEHLP_STACK_FRAME2(const _tagSTACKFRAME64* sf) { // apparently only PC, FP and SP are necessary, but // we go whole-hog to be safe. memset(this, 0, sizeof(IMAGEHLP_STACK_FRAME2)); InstructionOffset = sf->AddrPC.Offset; ReturnOffset = sf->AddrReturn.Offset; FrameOffset = sf->AddrFrame.Offset; StackOffset = sf->AddrStack.Offset; BackingStoreOffset = sf->AddrBStore.Offset; FuncTableEntry = (ULONG64)sf->FuncTableEntry; Virtual = sf->Virtual; // (note: array of different types, can't copy directly) for(int i = 0; i < 4; i++) Params[i] = sf->Params[i]; } }; static bool ShouldSkipSymbol(const wchar_t* name) { if(!wcscmp(name, L"suppress__")) return true; if(!wcscmp(name, L"__profile")) return true; return false; } // output the symbol's name and value via dump_sym*. // called from dump_frame_cb for each local symbol; lock is held. static BOOL CALLBACK dump_sym_cb(SYMBOL_INFOW* sym, ULONG UNUSED(size), void* UNUSED(ctx)) { if(ShouldSkipSymbol(sym->Name)) return TRUE; // continue out_latch_pos(); // see decl mod_base = sym->ModBase; const u8* p = (const u8*)(uintptr_t)sym->Address; DumpState state; INDENT; LibError err = dump_sym(sym->Index, p, state); dump_error(err); if(err == INFO::SYM_SUPPRESS_OUTPUT) UNINDENT; else out(L"\r\n"); return TRUE; // continue } // called by wdbg_sym_WalkStack for each stack frame static LibError dump_frame_cb(const _tagSTACKFRAME64* sf, uintptr_t UNUSED(cbData)) { current_stackframe64 = sf; void* func = (void*)(uintptr_t)sf->AddrPC.Offset; char func_name[DBG_SYMBOL_LEN]; char file[DBG_FILE_LEN]; int line; LibError ret = ResolveSymbol_lk(func, func_name, file, &line); if(ret == INFO::OK) { // don't trace back further than the app's entry point // (no one wants to see this frame). checking for the // function name isn't future-proof, but not stopping is no big deal. // an alternative would be to check if module=kernel32, but // that would cut off callbacks as well. // note: the stdcall mangled name includes parameter size, which is // different in 64-bit, so only check the first characters. if(!strncmp(func_name, "_BaseProcessStart", 17)) return INFO::OK; out(L"%hs (%hs:%d)\r\n", func_name, file, line); } else out(L"%p\r\n", func); // only enumerate symbols for this stack frame // (i.e. its locals and parameters) // problem: debug info is scope-aware, so we won't see any variables // declared in sub-blocks. we'd have to pass an address in that block, // which isn't worth the trouble. since IMAGEHLP_STACK_FRAME2 imghlp_frame(sf); const PIMAGEHLP_CONTEXT context = 0; // ignored SymSetContext(hProcess, &imghlp_frame, context); const ULONG64 base = 0; const wchar_t* const mask = 0; // use scope set by SymSetContext SymEnumSymbolsW(hProcess, base, mask, dump_sym_cb, 0); out(L"\r\n"); return INFO::CB_CONTINUE; } LibError debug_DumpStack(wchar_t* buf, size_t maxChars, void* pcontext, const char* lastFuncToSkip) { static uintptr_t already_in_progress; if(!cpu_CAS(&already_in_progress, 0, 1)) return ERR::REENTERED; // NOWARN out_init(buf, maxChars); ptr_reset_visited(); LibError ret = wdbg_sym_WalkStack(dump_frame_cb, 0, (const CONTEXT*)pcontext, lastFuncToSkip); already_in_progress = 0; return ret; } //----------------------------------------------------------------------------- // write out a "minidump" containing register and stack state; this enables // examining the crash in a debugger. called by wdbg_exception_filter. // heavily modified from http://www.codeproject.com/debug/XCrashReportPt3.asp // lock must be held. void wdbg_sym_WriteMinidump(EXCEPTION_POINTERS* exception_pointers) { WinScopedLock lock(WDBG_SYM_CS); OsPath path = OsPath(ah_get_log_dir())/"crashlog.dmp"; HANDLE hFile = CreateFile(path.string().c_str(), GENERIC_WRITE, FILE_SHARE_WRITE, 0, CREATE_ALWAYS, 0, 0); if(hFile == INVALID_HANDLE_VALUE) { DEBUG_DISPLAY_ERROR(L"wdbg_sym_WriteMinidump: unable to create crashlog.dmp."); return; } MINIDUMP_EXCEPTION_INFORMATION mei; mei.ThreadId = GetCurrentThreadId(); mei.ExceptionPointers = exception_pointers; mei.ClientPointers = FALSE; // exception_pointers is not in our address space. // note: we don't store other crashlog info within the dump file // (UserStreamParam), since we will need to generate a plain text file on // non-Windows platforms. users will just have to send us both files. HANDLE hProcess = GetCurrentProcess(); DWORD pid = GetCurrentProcessId(); if(!MiniDumpWriteDump(hProcess, pid, hFile, MiniDumpNormal, &mei, 0, 0)) DEBUG_DISPLAY_ERROR(L"wdbg_sym_WriteMinidump: unable to generate minidump."); CloseHandle(hFile); } //----------------------------------------------------------------------------- static LibError wdbg_sym_Init() { HMODULE hKernel32Dll = GetModuleHandle("kernel32.dll"); s_RtlCaptureContext = (PRtlCaptureContext)GetProcAddress(hKernel32Dll, "RtlCaptureContext"); return INFO::OK; } Index: ps/trunk/source/lib/sysdep/os/win/wnuma.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wnuma.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/os/win/wnuma.cpp (revision 6536) @@ -1,394 +1,398 @@ #include "precompiled.h" #include "lib/sysdep/numa.h" #include "lib/bits.h" // round_up, PopulationCount #include "lib/timer.h" #include "lib/sysdep/os_cpu.h" #include "lib/sysdep/acpi.h" #include "win.h" #include "wutil.h" #include "wcpu.h" #include "winit.h" #include WINIT_REGISTER_EARLY_INIT(wnuma_Init); //----------------------------------------------------------------------------- // node topology //----------------------------------------------------------------------------- static size_t NumNodes() { typedef BOOL (WINAPI *PGetNumaHighestNodeNumber)(PULONG highestNode); const HMODULE hKernel32 = GetModuleHandle("kernel32.dll"); const PGetNumaHighestNodeNumber pGetNumaHighestNodeNumber = (PGetNumaHighestNodeNumber)GetProcAddress(hKernel32, "GetNumaHighestNodeNumber"); if(pGetNumaHighestNodeNumber) { ULONG highestNode; const BOOL ok = pGetNumaHighestNodeNumber(&highestNode); debug_assert(ok); debug_assert(highestNode < os_cpu_NumProcessors()); // #nodes <= #processors return highestNode+1; } // NUMA not supported else return 1; } static void FillNodesProcessorMask(uintptr_t* nodesProcessorMask) { typedef BOOL (WINAPI *PGetNumaNodeProcessorMask)(UCHAR node, PULONGLONG affinity); const HMODULE hKernel32 = GetModuleHandle("kernel32.dll"); const PGetNumaNodeProcessorMask pGetNumaNodeProcessorMask = (PGetNumaNodeProcessorMask)GetProcAddress(hKernel32, "GetNumaNodeProcessorMask"); if(pGetNumaNodeProcessorMask) { DWORD_PTR processAffinity, systemAffinity; - const BOOL ok = GetProcessAffinityMask(GetCurrentProcess(), &processAffinity, &systemAffinity); - debug_assert(ok); + { + const BOOL ok = GetProcessAffinityMask(GetCurrentProcess(), &processAffinity, &systemAffinity); + debug_assert(ok); + } for(size_t node = 0; node < numa_NumNodes(); node++) { ULONGLONG affinity; - const BOOL ok = pGetNumaNodeProcessorMask((UCHAR)node, &affinity); - debug_assert(ok); + { + const BOOL ok = pGetNumaNodeProcessorMask((UCHAR)node, &affinity); + debug_assert(ok); + } const uintptr_t processorMask = wcpu_ProcessorMaskFromAffinity(processAffinity, (DWORD_PTR)affinity); nodesProcessorMask[node] = processorMask; } } // NUMA not supported - consider node 0 to consist of all system processors else nodesProcessorMask[0] = os_cpu_ProcessorMask(); } // note: it is easier to implement this in terms of nodesProcessorMask // rather than the other way around because wcpu provides the // wcpu_ProcessorMaskFromAffinity helper. there is no similar function to // convert processor to processorNumber. static void FillProcessorsNode(size_t numNodes, const uintptr_t* nodesProcessorMask, size_t* processorsNode) { for(size_t node = 0; node < numNodes; node++) { const uintptr_t processorMask = nodesProcessorMask[node]; for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) { if(IsBitSet(processorMask, processor)) processorsNode[processor] = node; } } } //----------------------------------------------------------------------------- // node topology interface struct NodeTopology // POD { size_t numNodes; size_t processorsNode[os_cpu_MaxProcessors]; uintptr_t nodesProcessorMask[os_cpu_MaxProcessors]; }; static NodeTopology s_nodeTopology; static void DetectNodeTopology() { s_nodeTopology.numNodes = NumNodes(); FillNodesProcessorMask(s_nodeTopology.nodesProcessorMask); FillProcessorsNode(s_nodeTopology.numNodes, s_nodeTopology.nodesProcessorMask, s_nodeTopology.processorsNode); } size_t numa_NumNodes() { return s_nodeTopology.numNodes; } size_t numa_NodeFromProcessor(size_t processor) { debug_assert(processor < os_cpu_NumProcessors()); return s_nodeTopology.processorsNode[processor]; } uintptr_t numa_ProcessorMaskFromNode(size_t node) { debug_assert(node < s_nodeTopology.numNodes); return s_nodeTopology.nodesProcessorMask[node]; } //----------------------------------------------------------------------------- // memory info //----------------------------------------------------------------------------- size_t numa_AvailableMemory(size_t node) { debug_assert(node < numa_NumNodes()); // note: it is said that GetNumaAvailableMemoryNode sometimes incorrectly // reports zero bytes. the actual cause may however be unexpected // RAM configuration, e.g. not all slots filled. typedef BOOL (WINAPI *PGetNumaAvailableMemoryNode)(UCHAR node, PULONGLONG availableBytes); static PGetNumaAvailableMemoryNode pGetNumaAvailableMemoryNode; if(!pGetNumaAvailableMemoryNode) { const HMODULE hKernel32 = GetModuleHandle("kernel32.dll"); pGetNumaAvailableMemoryNode = (PGetNumaAvailableMemoryNode)GetProcAddress(hKernel32, "GetNumaAvailableMemoryNode"); } if(pGetNumaAvailableMemoryNode) { ULONGLONG availableBytes; const BOOL ok = pGetNumaAvailableMemoryNode((UCHAR)node, &availableBytes); debug_assert(ok); const size_t availableMiB = size_t(availableBytes / MiB); return availableMiB; } // NUMA not supported - return available system memory else return os_cpu_MemoryAvailable(); } double numa_Factor() { WinScopedLock lock(WNUMA_CS); static double factor; if(factor == 0.0) { // if non-NUMA, skip the (expensive) measurements below. if(numa_NumNodes() == 1) factor = 1.0; else { // allocate memory on one node const size_t size = 16*MiB; shared_ptr buffer((u8*)numa_AllocateOnNode(size, 0), numa_Deleter()); const uintptr_t previousProcessorMask = os_cpu_SetThreadAffinityMask(os_cpu_ProcessorMask()); // measure min/max fill times required by a processor from each node double minTime = 1e10, maxTime = 0.0; for(size_t node = 0; node < numa_NumNodes(); node++) { const uintptr_t processorMask = numa_ProcessorMaskFromNode(node); os_cpu_SetThreadAffinityMask(processorMask); const double startTime = timer_Time(); memset(buffer.get(), 0, size); const double elapsedTime = timer_Time() - startTime; minTime = std::min(minTime, elapsedTime); maxTime = std::max(maxTime, elapsedTime); } (void)os_cpu_SetThreadAffinityMask(previousProcessorMask); factor = maxTime / minTime; } debug_assert(factor >= 1.0); debug_assert(factor <= 3.0); // (Microsoft guideline for NUMA systems) } return factor; } bool numa_IsMemoryInterleaved() { WinScopedLock lock(WNUMA_CS); static int isInterleaved = -1; if(isInterleaved == -1) { if(acpi_Init()) { // the BIOS only generates an SRAT (System Resource Affinity Table) // if node interleaving is disabled. isInterleaved = acpi_GetTable("SRAT") == 0; acpi_Shutdown(); } else isInterleaved = 0; // can't tell } return isInterleaved != 0; } //----------------------------------------------------------------------------- // allocator //----------------------------------------------------------------------------- static bool largePageAllocationTookTooLong = false; static bool ShouldUseLargePages(LargePageDisposition disposition, size_t allocationSize) { // can't, OS does not support large pages if(os_cpu_LargePageSize() == 0) return false; // overrides if(disposition == LPD_NEVER) return false; if(disposition == LPD_ALWAYS) return true; // default disposition: use a heuristic { // a previous attempt already took too long (Windows is apparently // shoveling aside lots of memory). if(largePageAllocationTookTooLong) return false; // allocation is rather small and would "only" use half of the // TLBs for its pages. if(allocationSize < 64/2 * os_cpu_PageSize()) return false; // we want there to be plenty of memory available, otherwise the // page frames are going to be terribly fragmented and even a // single allocation would take SECONDS. if(os_cpu_MemoryAvailable() < 2000) // 2 GB return false; } return true; } void* numa_Allocate(size_t size, LargePageDisposition largePageDisposition, size_t* ppageSize) { void* mem = 0; // try allocating with large pages (reduces TLB misses) if(ShouldUseLargePages(largePageDisposition, size)) { const size_t largePageSize = os_cpu_LargePageSize(); const size_t paddedSize = round_up(size, largePageSize); // required by MEM_LARGE_PAGES // note: this call can take SECONDS, which is why several checks are // undertaken before we even try. these aren't authoritative, so we // at least prevent future attempts if it takes too long. const double startTime = timer_Time(); mem = VirtualAlloc(0, paddedSize, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE); if(ppageSize) *ppageSize = largePageSize; const double elapsedTime = timer_Time() - startTime; debug_printf("TIMER| NUMA large page allocation: %g\n", elapsedTime); if(elapsedTime > 1.0) largePageAllocationTookTooLong = true; } // try (again) with regular pages if(!mem) { mem = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); if(ppageSize) *ppageSize = os_cpu_PageSize(); } // all attempts failed - we're apparently out of memory. if(!mem) throw std::bad_alloc(); return mem; } static bool VerifyPages(void* mem, size_t size, size_t pageSize, size_t node) { typedef BOOL (WINAPI *PQueryWorkingSetEx)(HANDLE hProcess, PVOID buffer, DWORD bufferSize); static PQueryWorkingSetEx pQueryWorkingSetEx; if(!pQueryWorkingSetEx) { const HMODULE hKernel32 = GetModuleHandle("kernel32.dll"); pQueryWorkingSetEx = (PQueryWorkingSetEx)GetProcAddress(hKernel32, "QueryWorkingSetEx"); if(!pQueryWorkingSetEx) return true; // can't do anything } #if WINVER >= 0x600 // retrieve attributes of all pages constituting mem const size_t numPages = (size + pageSize-1) / pageSize; PSAPI_WORKING_SET_EX_INFORMATION* wsi = new PSAPI_WORKING_SET_EX_INFORMATION[numPages]; for(size_t i = 0; i < numPages; i++) wsi[i].VirtualAddress = (u8*)mem + i*pageSize; pQueryWorkingSetEx(GetCurrentProcess(), wsi, sizeof(PSAPI_WORKING_SET_EX_INFORMATION)*numPages); // ensure each is valid and allocated on the correct node for(size_t i = 0; i < numPages; i++) { const PSAPI_WORKING_SET_EX_BLOCK& attributes = wsi[i].VirtualAttributes; if(!attributes.valid) return false; if(attributes.LargePage != (pageSize == LargePageSize())) { debug_printf("NUMA: is not a large page\n"); return false; } if(attributes.node != node) { debug_printf("NUMA: allocated from remote node\n"); return false; } } delete[] wsi; #else UNUSED2(mem); UNUSED2(size); UNUSED2(pageSize); UNUSED2(node); #endif return true; } void* numa_AllocateOnNode(size_t node, size_t size, LargePageDisposition largePageDisposition, size_t* ppageSize) { debug_assert(node < numa_NumNodes()); // see if there will be enough memory (non-authoritative, for debug purposes only) { const size_t sizeMiB = size/MiB; const size_t availableMiB = numa_AvailableMemory(node); if(availableMiB < sizeMiB) debug_printf("NUMA: warning: node reports insufficient memory (%d vs %d MB)\n", availableMiB, sizeMiB); } size_t pageSize; // (used below even if ppageSize is zero) void* const mem = numa_Allocate(size, largePageDisposition, &pageSize); if(ppageSize) *ppageSize = pageSize; // we can't use VirtualAllocExNuma - it's only available in Vista and Server 2008. // workaround: fault in all pages now to ensure they are allocated from the // current node, then verify page attributes. // (note: VirtualAlloc's MEM_COMMIT only maps virtual pages and does not // actually allocate page frames. Windows XP uses a first-touch heuristic - // the page will be taken from the node whose processor caused the fault. // Windows Vista allocates on the "preferred" node, so affinity should be // set such that this thread is running on .) memset(mem, 0, size); VerifyPages(mem, size, pageSize, node); return mem; } void numa_Deallocate(void* mem) { VirtualFree(mem, 0, MEM_RELEASE); } //----------------------------------------------------------------------------- static LibError wnuma_Init() { DetectNodeTopology(); return INFO::OK; } Index: ps/trunk/source/lib/sysdep/dir_watch.h =================================================================== --- ps/trunk/source/lib/sysdep/dir_watch.h (revision 6535) +++ ps/trunk/source/lib/sysdep/dir_watch.h (revision 6536) @@ -1,105 +1,105 @@ /** * ========================================================================= * File : dir_watch.h * Project : 0 A.D. * Description : portable directory change notification API. * ========================================================================= */ // license: GPL; see lib/license.txt #ifndef INCLUDED_DIR_WATCH #define INCLUDED_DIR_WATCH class DirWatch; typedef shared_ptr PDirWatch; /** * start watching a single directory for changes. * * @param path native path of the directory to watch. * @param dirWatch receives a smart pointer to the watch object. * * note: the FAM backend can only watch single directories, so that is * all we can guarantee. the Win32 implementation watches entire trees; * adding a watch for subdirectories is a no-op there. **/ LIB_API LibError dir_watch_Add(const fs::wpath& path, PDirWatch& dirWatch); /** * stop watching a directory. * * note: any previously queued notifications will still be returned. **/ LIB_API void dir_watch_Remove(PDirWatch& dirWatch); class DirWatchNotification { public: enum Event { Created, Deleted, - Changed, + Changed }; // (default ctor is required because DirWatchNotification is returned // via output parameter.) DirWatchNotification() { } DirWatchNotification(const fs::wpath& pathname, Event type) : m_pathname(pathname), m_type(type) { } const fs::wpath& Pathname() const { return m_pathname; } Event Type() const { return m_type; } static const char* EventString(Event type) { switch(type) { case Created: return "created"; case Deleted: return "deleted"; case Changed: return "changed"; default: throw std::logic_error("invalid type"); } } private: fs::wpath m_pathname; Event m_type; }; /** * check if a directory watch notification is pending. * * @param notification receives the first pending DirWatchNotification from * any of the watched directories. this notification is subsequently removed * from the internal queue. * @return INFO::OK if a notification was retrieved, ERR::AGAIN if none * are pending, or a negative error code. * * note: the run time of this function is independent of the number of * directory watches and number of files. * * rationale for a polling interface: users (e.g. the main game loop) * typically want to receive change notifications at a single point, * rather than deal with the complexity of asynchronous notifications. **/ LIB_API LibError dir_watch_Poll(DirWatchNotification& notification); #endif // #ifndef INCLUDED_DIR_WATCH Index: ps/trunk/source/lib/sysdep/acpi.cpp =================================================================== --- ps/trunk/source/lib/sysdep/acpi.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/acpi.cpp (revision 6536) @@ -1,341 +1,341 @@ #include "precompiled.h" #include "acpi.h" #include "lib/sysdep/os/win/mahaf.h" #include "lib/sysdep/cpu.h" #include "lib/module_init.h" #pragma pack(1) typedef const volatile u8* PCV_u8; typedef const volatile AcpiTable* PCV_AcpiTable; // return 8-bit checksum of a buffer (should be 0) static u8 ComputeChecksum(PCV_u8 buf, size_t numBytes) { // (can't use std::accumulate - we need 8-bit wraparound) u8 sum = 0; for(PCV_u8 p = buf; p < buf+numBytes; p++) sum += *p; return sum; } //----------------------------------------------------------------------------- // exception-safe transactional map/use/unmap //----------------------------------------------------------------------------- // note: if the OS happens to unmap our physical memory, the Unsafe* // functions may crash. we catch this via SEH; on Unix, we'd need handlers // for SIGBUS and/or SIGSEGV. the code is safe in that it releases the // mapped memory and returns an error code. static void* SUCCEEDED = (void*)(intptr_t)1; static void* FAILED = (void*)(intptr_t)-1; typedef void* (*UnsafeFunction)(PCV_u8 mem, size_t numBytes, void* arg); static inline void* CallWithSafetyBlanket(UnsafeFunction func, PCV_u8 mem, size_t numBytes, void* arg) { #if MSC_VERSION __try { return func(mem, numBytes, arg); } __except(1) { return FAILED; } #else return func(mem, numBytes, arg); #endif } static void* TransactPhysicalMemory(uintptr_t physicalAddress, size_t numBytes, UnsafeFunction func, void* arg = 0) { PCV_u8 mem = (PCV_u8)mahaf_MapPhysicalMemory(physicalAddress, numBytes); if(!mem) return FAILED; void* ret = CallWithSafetyBlanket(func, mem, numBytes, arg); mahaf_UnmapPhysicalMemory((volatile void*)mem); return ret; } //----------------------------------------------------------------------------- // Root System Descriptor Pointer //----------------------------------------------------------------------------- struct BiosDataArea { u16 serialBase[4]; u16 parallelBase[3]; u16 ebdaSegment; }; typedef const volatile BiosDataArea* PCV_BiosDataArea; static void* UnsafeReadEbdaPhysicalAddress(PCV_u8 mem, size_t numBytes, void* UNUSED(arg)) { debug_assert(numBytes >= sizeof(BiosDataArea)); PCV_BiosDataArea bda = (PCV_BiosDataArea)mem; const uintptr_t ebdaPhysicalAddress = ((uintptr_t)bda->ebdaSegment) * 16; return (void*)ebdaPhysicalAddress; } struct RSDP { char signature[8]; // "RSD PTR " u8 checksum; // sum of this struct = 0 char oemId[6]; u8 revision; // 0 for 1.0, 2 for 2.0 u32 rsdtPhysicalAddress; }; typedef const volatile RSDP* PCV_RSDP; static const size_t RSDP_ALIGNMENT = 16; static void* UnsafeLocateAndRetrieveRsdp(PCV_u8 buf, size_t numBytes, void* arg) { debug_assert(numBytes >= sizeof(RSDP)); for(PCV_u8 p = buf; p < buf+numBytes; p += RSDP_ALIGNMENT) { RSDP* prsdp = (RSDP*)p; if(memcmp(prsdp->signature, "RSD PTR ", 8) != 0) continue; if(ComputeChecksum(p, sizeof(RSDP)) != 0) continue; memcpy(arg, prsdp, sizeof(RSDP)); return SUCCEEDED; } return FAILED; } static bool RetrieveRsdp(RSDP& rsdp) { // See ACPIspec30b, section 5.2.5.1: // RSDP is either in the first KIB of the extended BIOS data area, void* ret = TransactPhysicalMemory(0x400, 0x100, UnsafeReadEbdaPhysicalAddress); if(ret != FAILED) { const uintptr_t ebdaPhysicalAddress = (uintptr_t)ret; ret = TransactPhysicalMemory(ebdaPhysicalAddress, 0x400, UnsafeLocateAndRetrieveRsdp, &rsdp); if(ret == SUCCEEDED) return true; } // or in read-only BIOS memory. ret = TransactPhysicalMemory(0xE0000, 0x20000, UnsafeLocateAndRetrieveRsdp, &rsdp); if(ret == SUCCEEDED) return true; return false; // not found } //----------------------------------------------------------------------------- // table retrieval //----------------------------------------------------------------------------- static inline void* UnsafeAllocateCopyOfTable(PCV_u8 mem, size_t numBytes, void* arg) { debug_assert(numBytes >= sizeof(AcpiTable)); PCV_AcpiTable table = (PCV_AcpiTable)mem; const size_t tableSize = table->size; // physical memory window is smaller than the table // (caller will map a larger window and call us again) if(numBytes < tableSize) { memcpy(arg, &tableSize, sizeof(size_t)); return 0; } PCV_u8 copy = (PCV_u8)malloc(tableSize); if(!copy) return FAILED; cpu_memcpy((void*)copy, (const void*)mem, tableSize); return (void*)copy; } // caller is responsible for verifying the table is valid and using // DeallocateTable to free it. static const AcpiTable* AllocateCopyOfTable(uintptr_t physicalAddress) { // ACPI table sizes are not known until they've been mapped. since that // is slow, we don't always want to do it twice. the solution is to map // enough for a typical table; if that is too small, realloc and map again. static const size_t initialSize = 4*KiB; size_t actualSize = 0; void* ret = TransactPhysicalMemory(physicalAddress, initialSize, UnsafeAllocateCopyOfTable, &actualSize); // initialSize was too small; actualSize has been set if(ret == 0) ret = TransactPhysicalMemory(physicalAddress, actualSize, UnsafeAllocateCopyOfTable); // *either* of the above calls failed to allocate memory if(ret == FAILED) return 0; return (const AcpiTable*)ret; } template static void DeallocateTable(const T* table) { free((void*)table); } static bool VerifyTable(const AcpiTable* table, const char* signature = 0) { if(!table) return false; // caller knowns the signature; make sure it matches if(signature) { if(memcmp(table->signature, signature, 4) != 0) return false; } // no specific signature is called for; just make sure it's 4 letters else { - for(int i = 0; i < 4; i++) + for(size_t i = 0; i < 4; i++) { if(!isalpha(table->signature[i])) return false; } } // must be at least as large as the common header if(table->size < sizeof(AcpiTable)) return false; // checksum of table must be 0 // .. AMIBIOS OEMB table has an incorrect checksum (off-by-one), // so don't complain about any OEM tables (ignored anyway). const bool isOemTable = (memcmp(table->signature, "OEM", 3) == 0); if(ComputeChecksum((PCV_u8)table, table->size) != 0 && !isOemTable) return false; return true; } static const AcpiTable* GetTable(uintptr_t physicalAddress, const char* signature = 0) { const AcpiTable* table = AllocateCopyOfTable(physicalAddress); if(VerifyTable(table, signature)) return table; else { DeallocateTable(table); return 0; } } //----------------------------------------------------------------------------- // table storage //----------------------------------------------------------------------------- // Root System Descriptor Table struct RSDT { AcpiTable header; u32 tables[1]; }; // avoid std::map et al. because we may be called before _cinit static const AcpiTable** tables; -static int numTables; +static size_t numTables; static bool LatchAllTables() { RSDP rsdp; if(!RetrieveRsdp(rsdp)) return false; const RSDT* rsdt = (const RSDT*)GetTable(rsdp.rsdtPhysicalAddress, "RSDT"); if(!rsdt) return false; numTables = (rsdt->header.size - sizeof(AcpiTable)) / sizeof(rsdt->tables[0]); debug_assert(numTables > 0); tables = new const AcpiTable*[numTables]; - for(int i = 0; i < numTables; i++) + for(size_t i = 0; i < numTables; i++) tables[i] = GetTable(rsdt->tables[i]); DeallocateTable(rsdt); return true; } static void FreeAllTables() { if(tables) { - for(int i = 0; i < numTables; i++) + for(size_t i = 0; i < numTables; i++) DeallocateTable(tables[i]); delete[] tables; } } const AcpiTable* acpi_GetTable(const char* signature) { // (typically only a few tables, linear search is OK) - for(int i = 0; i < numTables; i++) + for(size_t i = 0; i < numTables; i++) { const AcpiTable* table = tables[i]; if(!table) continue; // skip invalid tables, e.g. OEM (see above) if(strncmp(table->signature, signature, 4) == 0) return table; } return 0; } //----------------------------------------------------------------------------- static ModuleInitState initState; bool acpi_Init() { if(ModuleIsError(&initState)) return false; if(!ModuleShouldInitialize(&initState)) return true; if(mahaf_IsPhysicalMappingDangerous()) goto fail; if(!mahaf_Init()) goto fail; if(!LatchAllTables()) goto fail; return true; fail: ModuleSetError(&initState); return false; } void acpi_Shutdown() { if(!ModuleShouldShutdown(&initState)) return; FreeAllTables(); mahaf_Shutdown(); } Index: ps/trunk/source/lib/sysdep/acpi.h =================================================================== --- ps/trunk/source/lib/sysdep/acpi.h (revision 6535) +++ ps/trunk/source/lib/sysdep/acpi.h (revision 6536) @@ -1,56 +1,56 @@ /** * ========================================================================= * File : acpi.h * Project : 0 A.D. * Description : minimal subset of ACPI * ========================================================================= */ // license: GPL; see lib/license.txt #ifndef INCLUDED_ACPI #define INCLUDED_ACPI #pragma pack(push, 1) // common header for all ACPI tables struct AcpiTable { char signature[4]; u32 size; // table size [bytes], including header u8 revision; u8 checksum; // to make sum of entire table == 0 char oemId[6]; char oemTableId[8]; u32 oemRevision; char creatorId[4]; u32 creatorRevision; }; enum AcpiAddressSpace { // (these are not generally powers-of-two - some values have been omitted.) ACPI_AS_MEMORY = 0, ACPI_AS_IO = 1, ACPI_AS_PCI_CONFIG = 2, - ACPI_AS_SMBUS = 4, + ACPI_AS_SMBUS = 4 }; // address of a struct or register struct AcpiGenericAddress { u8 addressSpaceId; u8 registerBitWidth; u8 registerBitOffset; u8 accessSize; u64 address; }; #pragma pack(pop) extern bool acpi_Init(); extern void acpi_Shutdown(); extern const AcpiTable* acpi_GetTable(const char* signature); #endif // #ifndef INCLUDED_ACPI Index: ps/trunk/source/lib/sysdep/gfx.cpp =================================================================== --- ps/trunk/source/lib/sysdep/gfx.cpp (revision 6535) +++ ps/trunk/source/lib/sysdep/gfx.cpp (revision 6536) @@ -1,53 +1,52 @@ /** * ========================================================================= * File : gfx.cpp * Project : 0 A.D. * Description : graphics card detection. * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "gfx.h" #include "lib/external_libraries/sdl.h" +#include "lib/ogl.h" char gfx_card[GFX_CARD_LEN] = ""; char gfx_drv_ver[GFX_DRV_VER_LEN] = ""; int gfx_mem = -1; // [MiB]; approximate -extern LibError ogl_get_gfx_info(); - // detect graphics card and set the above information. void gfx_detect() { // TODO: add sizeof(FB)? gfx_mem = (SDL_GetVideoInfo()->video_mem) / 1048576; // [MiB] // try platform-specific version: they return more // detailed information, and don't require OpenGL to be ready. #if OS_WIN extern LibError win_get_gfx_info(); if(win_get_gfx_info() < 0) #endif { // the OpenGL version should always work, unless OpenGL isn't ready for use, // or we were called between glBegin and glEnd. ogl_get_gfx_info(); } // remove crap from vendor names. (don't dare touch the model name - // it's too risky, there are too many different strings) #define SHORTEN(what, chars_to_keep)\ if(!strncmp(gfx_card, what, ARRAY_SIZE(what)-1))\ memmove(gfx_card+chars_to_keep, gfx_card+ARRAY_SIZE(what)-1, strlen(gfx_card)-(ARRAY_SIZE(what)-1)+1); SHORTEN("ATI Technologies Inc.", 3); SHORTEN("NVIDIA Corporation", 6); SHORTEN("S3 Graphics", 2); // returned by EnumDisplayDevices SHORTEN("S3 Graphics, Incorporated", 2); // returned by GL_VENDOR #undef SHORTEN } Index: ps/trunk/source/lib/code_annotation.h =================================================================== --- ps/trunk/source/lib/code_annotation.h (revision 6535) +++ ps/trunk/source/lib/code_annotation.h (revision 6536) @@ -1,173 +1,180 @@ /** * ========================================================================= * File : code_annotation.h * Project : 0 A.D. * Description : macros for code annotation. * ========================================================================= */ // license: GPL; see lib/license.txt #ifndef INCLUDED_CODE_ANNOTATION #define INCLUDED_CODE_ANNOTATION /** * mark a function local variable or parameter as unused and avoid * the corresponding compiler warning. * use inside the function body, e.g. void f(int x) { UNUSED2(x); } **/ #define UNUSED2(param) (void)param; /** * mark a function parameter as unused and avoid * the corresponding compiler warning. * wrap around the parameter name, e.g. void f(int UNUSED(x)) **/ #define UNUSED(param) /** "unreachable code" helpers unreachable lines of code are often the source or symptom of subtle bugs. they are flagged by compiler warnings; however, the opposite problem - erroneously reaching certain spots (e.g. due to missing return statement) is worse and not detected automatically. to defend against this, the programmer can annotate their code to indicate to humans that a particular spot should never be reached. however, that isn't much help; better is a sentinel that raises an error if if it is actually reached. hence, the UNREACHABLE macro. ironically, if the code guarded by UNREACHABLE works as it should, compilers may flag the macro's code as unreachable. this would distract from genuine warnings, which is unacceptable. even worse, compilers differ in their code checking: GCC only complains if non-void functions end without returning a value (i.e. missing return statement), while VC checks if lines are unreachable (e.g. if they are preceded by a return on all paths). our implementation of UNREACHABLE solves this dilemna as follows: - on GCC: call abort(); since it has the noreturn attributes, the "non-void" warning disappears. - on VC: avoid generating any code. we allow the compiler to assume the spot is actually unreachable, which incidentally helps optimization. if reached after all, a crash usually results. in that case, compile with CONFIG_PARANOIA, which will cause an error message to be displayed. this approach still allows for the possiblity of automated checking, but does not cause any compiler warnings. **/ #define UNREACHABLE // actually defined below.. this is for # undef UNREACHABLE // CppDoc's benefit only. // 1) final build: optimize assuming this location cannot be reached. // may crash if that turns out to be untrue, but removes checking overhead. #if CONFIG_FINAL # define UNREACHABLE ASSUME_UNREACHABLE // 2) normal build: #else // a) normal implementation: includes "abort", which is declared with // noreturn attribute and therefore avoids GCC's "execution reaches // end of non-void function" warning. # if !MSC_VERSION || ICC_VERSION || CONFIG_PARANOIA # define UNREACHABLE\ STMT(\ debug_assert(0); /* hit supposedly unreachable code */\ abort();\ ) // b) VC only: don't generate any code; squelch the warning and optimize. # else # define UNREACHABLE ASSUME_UNREACHABLE # endif #endif /** convenient specialization of UNREACHABLE for switch statements whose default can never be reached. example usage: int x; switch(x % 2) { case 0: break; case 1: break; NODEFAULT; } **/ #define NODEFAULT default: UNREACHABLE /** * equivalent to strcpy, but indicates that the programmer checked usage and * promises it is safe. * * (this macro prevents actually-safe instances of the function from * showing up in searches) **/ #define SAFE_STRCPY str##cpy #define SAFE_WCSCPY wcs##cpy // generate a symbol containing the line number of the macro invocation. // used to give a unique name (per file) to types made by cassert. // we can't prepend __FILE__ to make it globally unique - the filename // may be enclosed in quotes. PASTE3_HIDDEN__ is needed to make sure // __LINE__ is expanded correctly. #define PASTE3_HIDDEN__(a, b, c) a ## b ## c #define PASTE3__(a, b, c) PASTE3_HIDDEN__(a, b, c) #define UID__ PASTE3__(LINE_, __LINE__, _) #define UID2__ PASTE3__(LINE_, __LINE__, _2) /** * compile-time debug_assert. causes a compile error if the expression * evaluates to zero/false. * * no runtime overhead; may be used anywhere, including file scope. * especially useful for testing sizeof types. * * @param expression that is expected to evaluate to non-zero at compile-time. **/ -#define cassert(expr) typedef detail::static_assert_<(expr)>::type UID__; +#define cassert(expr) typedef detail::static_assert_<(expr)>::type UID__ namespace detail { template struct static_assert_; template<> struct static_assert_ { typedef int type; }; } /** * compile-time debug_assert. causes a compile error if the expression * evaluates to zero/false. * * no runtime overhead; may be used anywhere, including file scope. * especially useful for testing sizeof types. * * this version has a less helpful error message, but redefinition doesn't * trigger warnings. * * @param expression that is expected to evaluate to non-zero at compile-time. **/ #define cassert2(expr) extern char CASSERT_FAILURE[1][(expr)] // copied from boost::noncopyable; this definition avoids warnings when // an exported class derives from noncopyable. namespace noncopyable_ // protection from unintended ADL { class noncopyable { protected: noncopyable() {} ~noncopyable() {} private: // emphasize the following members are private noncopyable(const noncopyable&); const noncopyable& operator=(const noncopyable&); }; } typedef noncopyable_::noncopyable noncopyable; +// this form avoids ICC 11 W4 warnings about non-virtual dtors and +// suppression of the copy assignment operator. +#define NONCOPYABLE(className)\ +private:\ + className(const className&);\ + const className& operator=(const className&) + #endif // #ifndef INCLUDED_CODE_ANNOTATION Index: ps/trunk/source/lib/frequency_filter.cpp =================================================================== --- ps/trunk/source/lib/frequency_filter.cpp (revision 6535) +++ ps/trunk/source/lib/frequency_filter.cpp (revision 6536) @@ -1,215 +1,217 @@ #include "precompiled.h" #include "frequency_filter.h" static const double errorTolerance = 0.05f; static const double sensitivity = 0.10; /** * variable-width window for frequency determination **/ -class FrequencyEstimator : boost::noncopyable +class FrequencyEstimator { + NONCOPYABLE(FrequencyEstimator); public: FrequencyEstimator(double resolution) : m_minDeltaTime(4.0 * resolution) // chosen to reduce error but still yield rapid updates. , m_lastTime(0) // will be set on first call , m_numEvents(0) { debug_assert(resolution > 0.0); } bool operator()(double time, double& frequency) { m_numEvents++; if(m_lastTime == 0.0) m_lastTime = time; // count # events until deltaTime is large enough // (reduces quantization errors if resolution is low) const double deltaTime = time - m_lastTime; if(deltaTime <= m_minDeltaTime) return false; frequency = m_numEvents / deltaTime; m_numEvents = 0; m_lastTime = time; return true; // success } private: const double m_minDeltaTime; double m_lastTime; int m_numEvents; }; /** * variable-gain IIR filter **/ class IirFilter { public: IirFilter(double sensitivity, double initialValue) : m_sensitivity(sensitivity), m_prev(initialValue) { } // bias = 0: no change. > 0: increase (n-th root). < 0: decrease (^n) double operator()(double x, int bias) { // sensitivity to changes ([0,1]). const double gain = pow(m_sensitivity, ComputeExponent(bias)); return m_prev = x*gain + m_prev*(1.0-gain); } private: static double ComputeExponent(int bias) { if(bias > 0) return 1.0 / bias; // n-th root else if(bias == 0) return 1.0; // no change else return -bias; // power-of-n } double m_sensitivity; double m_prev; }; /** * regulate IIR gain for rapid but smooth tracking of a function. * this is similar in principle to a PID controller but is tuned for * the special case of FPS values to simplify stabilizing the filter. **/ class Controller { public: Controller(double initialValue) : m_timesOnSameSide(0) { std::fill(m_history, m_history+m_historySize, initialValue); } // bias := exponential change to gain, (-inf, inf) int ComputeBias(double smoothedValue, double value) { if(WasOnSameSide(value)) // (must be checked before updating history) m_timesOnSameSide++; else m_timesOnSameSide = 0; // update history std::copy(m_history, m_history+m_historySize, m_history+1); m_history[m_historySize-1] = value; // dampen jitter if(Change(smoothedValue, value) < 0.04) return -1; // dampen spikes/bounces. if(WasSpike()) return -2; // if the past few samples have been consistently above/below // average, the function is changing and we need to catch up. // (similar to I in a PID) if(m_timesOnSameSide >= 3) return std::min(m_timesOnSameSide, 4); // suppress large jumps. if(Change(m_history[m_historySize-1], value) > 0.30) return -4; // gain -> 0 return 0; } private: bool WasOnSameSide(double value) const { int sum = 0; for(size_t i = 0; i < m_historySize; i++) { const int vote = (value >= m_history[i])? 1 : -1; sum += vote; } return abs(sum) == (int)m_historySize; } static double Change(double from, double to) { return fabs(from - to) / from; } // /\ or \/ in last three history entries bool WasSpike() const { cassert(m_historySize >= 3); const double h2 = m_history[m_historySize-3], h1 = m_history[m_historySize-2], h0 = m_history[m_historySize-1]; if(((h2-h1) * (h1-h0)) > 0) // no sign change return false; if(Change(h2, h0) > 0.05) // overall change from oldest to newest value return false; if(Change(h1, h0) < 0.10) // no intervening spike return false; return true; } static const size_t m_historySize = 3; double m_history[m_historySize]; int m_timesOnSameSide; }; class FrequencyFilter : public IFrequencyFilter { + NONCOPYABLE(FrequencyFilter); public: FrequencyFilter(double resolution, double expectedFrequency) : m_controller(expectedFrequency), m_frequencyEstimator(resolution), m_iirFilter(sensitivity, expectedFrequency) - , m_stableFrequency(expectedFrequency), m_smoothedFrequency(expectedFrequency) + , m_stableFrequency((int)expectedFrequency), m_smoothedFrequency(expectedFrequency) { } virtual void Update(double time) { double frequency; if(!m_frequencyEstimator(time, frequency)) return; const int bias = m_controller.ComputeBias(m_smoothedFrequency, frequency); m_smoothedFrequency = m_iirFilter(frequency, bias); // allow the smoothed FPS to free-run until it is no longer near the // previous stable FPS value. round up because values are more often // too low than too high. const double difference = fabs(m_smoothedFrequency - m_stableFrequency); if(difference > errorTolerance*m_stableFrequency) m_stableFrequency = (int)(m_smoothedFrequency + 0.99); } virtual double SmoothedFrequency() const { return m_smoothedFrequency; } virtual int StableFrequency() const { return m_stableFrequency; } private: FrequencyEstimator m_frequencyEstimator; Controller m_controller; IirFilter m_iirFilter; int m_stableFrequency; double m_smoothedFrequency; }; PIFrequencyFilter CreateFrequencyFilter(double resolution, double expectedFrequency) { return PIFrequencyFilter(new FrequencyFilter(resolution, expectedFrequency)); } Index: ps/trunk/source/lib/debug.cpp =================================================================== --- ps/trunk/source/lib/debug.cpp (revision 6535) +++ ps/trunk/source/lib/debug.cpp (revision 6536) @@ -1,507 +1,508 @@ /** * ========================================================================= * File : debug.cpp * Project : 0 A.D. * Description : platform-independent debug support code. * ========================================================================= */ // license: GPL; see lib/license.txt #include "precompiled.h" #include "debug.h" #include #include #include "app_hooks.h" #include "os_path.h" #include "path_util.h" #include "lib/allocators/allocators.h" // page_aligned_alloc #include "fnv_hash.h" #include "lib/sysdep/cpu.h" // cpu_CAS #include "lib/sysdep/sysdep.h" #if OS_WIN #include "lib/sysdep/os/win/wdbg_heap.h" #endif ERROR_ASSOCIATE(ERR::SYM_NO_STACK_FRAMES_FOUND, "No stack frames found", -1); ERROR_ASSOCIATE(ERR::SYM_UNRETRIEVABLE_STATIC, "Value unretrievable (stored in external module)", -1); ERROR_ASSOCIATE(ERR::SYM_UNRETRIEVABLE, "Value unretrievable", -1); ERROR_ASSOCIATE(ERR::SYM_TYPE_INFO_UNAVAILABLE, "Error getting type_info", -1); ERROR_ASSOCIATE(ERR::SYM_INTERNAL_ERROR, "Exception raised while processing a symbol", -1); ERROR_ASSOCIATE(ERR::SYM_UNSUPPORTED, "Symbol type not (fully) supported", -1); ERROR_ASSOCIATE(ERR::SYM_CHILD_NOT_FOUND, "Symbol does not have the given child", -1); ERROR_ASSOCIATE(ERR::SYM_NESTING_LIMIT, "Symbol nesting too deep or infinite recursion", -1); ERROR_ASSOCIATE(ERR::SYM_SINGLE_SYMBOL_LIMIT, "Symbol has produced too much output", -1); ERROR_ASSOCIATE(INFO::SYM_SUPPRESS_OUTPUT, "Symbol was suppressed", -1); // needed when writing crashlog static const size_t LOG_CHARS = 16384; wchar_t debug_log[LOG_CHARS]; wchar_t* debug_log_pos = debug_log; // write to memory buffer (fast) void debug_wprintf_mem(const wchar_t* fmt, ...) { - const ssize_t charsLeft = (ssize_t)LOG_CHARS - (debug_log_pos-debug_log); + const ssize_t charsLeft = (ssize_t)(LOG_CHARS - (debug_log_pos-debug_log)); debug_assert(charsLeft >= 0); // potentially not enough room for the new string; throw away the // older half of the log. we still protect against overflow below. if(charsLeft < 512) { const size_t copySize = sizeof(wchar_t) * LOG_CHARS/2; wchar_t* const middle = &debug_log[LOG_CHARS/2]; cpu_memcpy(debug_log, middle, copySize); memset(middle, 0, copySize); debug_log_pos -= LOG_CHARS/2; // don't assign middle (may leave gap) } // write into buffer (in-place) va_list args; va_start(args, fmt); int len = vswprintf(debug_log_pos, charsLeft-2, fmt, args); va_end(args); debug_log_pos += len+2; wcscpy_s(debug_log_pos-2, 3, L"\r\n"); } // need to shoehorn printf-style variable params into // the OutputDebugString call. // - don't want to split into multiple calls - would add newlines to output. // - fixing Win32 _vsnprintf to return # characters that would be written, // as required by C99, looks difficult and unnecessary. if any other code // needs that, implement GNU vasprintf. // - fixed size buffers aren't nice, but much simpler than vasprintf-style // allocate+expand_until_it_fits. these calls are for quick debug output, // not loads of data, anyway. // rationale: static data instead of std::set to allow setting at any time. // we store FNV hash of tag strings for fast comparison; collisions are // extremely unlikely and can only result in displaying more/less text. static const size_t MAX_TAGS = 20; static u32 tags[MAX_TAGS]; static size_t num_tags; void debug_filter_add(const char* tag) { const u32 hash = fnv_hash(tag); // make sure it isn't already in the list for(size_t i = 0; i < MAX_TAGS; i++) if(tags[i] == hash) return; // too many already? if(num_tags == MAX_TAGS) { debug_assert(0); // increase MAX_TAGS return; } tags[num_tags++] = hash; } void debug_filter_remove(const char* tag) { const u32 hash = fnv_hash(tag); for(size_t i = 0; i < MAX_TAGS; i++) // found it if(tags[i] == hash) { // replace with last element (avoid holes) tags[i] = tags[MAX_TAGS-1]; num_tags--; // can only happen once, so we're done. return; } } void debug_filter_clear() { std::fill(tags, tags+MAX_TAGS, 0); } bool debug_filter_allows(const char* text) { size_t i; for(i = 0; ; i++) { // no | found => no tag => should always be displayed if(text[i] == ' ' || text[i] == '\0') return true; if(text[i] == '|' && i != 0) break; } const u32 hash = fnv_hash(text, i); // check if entry allowing this tag is found for(i = 0; i < MAX_TAGS; i++) if(tags[i] == hash) return true; return false; } // max # characters (including \0) output by debug_printf in one call. const size_t DEBUG_PRINTF_MAX_CHARS = 1024; // refer to wdbg.cpp!debug_vsprintf before changing this #undef debug_printf // allowing #defining it out void debug_printf(const char* fmt, ...) { char buf[DEBUG_PRINTF_MAX_CHARS]; buf[ARRAY_SIZE(buf)-1] = '\0'; va_list ap; va_start(ap, fmt); const int len = vsnprintf(buf, DEBUG_PRINTF_MAX_CHARS, fmt, ap); debug_assert(len >= 0); va_end(ap); if(debug_filter_allows(buf)) debug_puts(buf); } void debug_printf(const wchar_t* fmt, ...) { wchar_t buf[DEBUG_PRINTF_MAX_CHARS]; buf[ARRAY_SIZE(buf)-1] = '\0'; va_list ap; va_start(ap, fmt); const int numChars = vswprintf(buf, DEBUG_PRINTF_MAX_CHARS, fmt, ap); debug_assert(numChars >= 0); va_end(ap); char buf2[DEBUG_PRINTF_MAX_CHARS]; size_t bytesWritten = wcstombs(buf2, buf, DEBUG_PRINTF_MAX_CHARS); debug_assert(bytesWritten == (size_t)numChars); if(debug_filter_allows(buf2)) debug_puts(buf2); } //----------------------------------------------------------------------------- LibError debug_WriteCrashlog(const wchar_t* text) { // avoid potential infinite loop if an error occurs here. static uintptr_t isBusy; if(!cpu_CAS(&isBusy, 0, 1)) return ERR::REENTERED; // NOWARN OsPath path = OsPath(ah_get_log_dir())/"crashlog.txt"; FILE* f = fopen(path.string().c_str(), "w"); if(!f) { isBusy = 0; WARN_RETURN(ERR::FAIL); } fputwc(0xFEFF, f); // BOM fwprintf(f, L"%ls\n", text); fwprintf(f, L"\n\n====================================\n\n"); // allow user to bundle whatever information they want ah_bundle_logs(f); fwprintf(f, L"Last known activity:\n\n %ls\n", debug_log); fclose(f); isBusy = 0; return INFO::OK; } //----------------------------------------------------------------------------- // output //----------------------------------------------------------------------------- // translates and displays the given strings in a dialog. // this is typically only used when debug_DisplayError has failed or // is unavailable because that function is much more capable. // implemented via sys_display_msg; see documentation there. void debug_DisplayMessage(const wchar_t* caption, const wchar_t* msg) { sys_display_msg(ah_translate(caption), ah_translate(msg)); } // when an error has come up and user clicks Exit, we don't want any further // errors (e.g. caused by atexit handlers) to come up, possibly causing an // infinite loop. it sucks to hide errors, but we assume that whoever clicked // exit really doesn't want to see any more errors. static bool isExiting; // this logic is applicable to any type of error. special cases such as // suppressing certain expected WARN_ERRs are done there. static bool ShouldSuppressError(u8* suppress) { if(!suppress) return false; if(*suppress == DEBUG_SUPPRESS) return true; if(isExiting) return true; return false; } // (NB: this may appear obscene, but deep stack traces have been // observed to take up > 256 KiB) static const size_t messageSize = 512*KiB; void debug_FreeErrorMessage(ErrorMessageMem* emm) { page_aligned_free(emm->pa_mem, messageSize); } // split out of debug_DisplayError because it's used by the self-test. const wchar_t* debug_BuildErrorMessage( const wchar_t* description, const char* filename, int line, const char* func, void* context, const char* lastFuncToSkip, ErrorMessageMem* emm) { // rationale: see ErrorMessageMem emm->pa_mem = page_aligned_alloc(messageSize); if(!emm->pa_mem) return L"(insufficient memory to generate error message)"; wchar_t* const buf = (wchar_t*)emm->pa_mem; const size_t maxChars = messageSize / sizeof(wchar_t); wchar_t* pos = buf; size_t charsLeft = maxChars; int len; // header len = swprintf(pos, charsLeft, L"%ls\r\n" L"Location: %hs:%d (%hs)\r\n" L"\r\n" L"Call stack:\r\n" L"\r\n", description, filename, line, func); if(len < 0) { fail: return L"(error while formatting error message)"; } pos += len; charsLeft -= len; // append stack trace LibError ret = debug_DumpStack(pos, charsLeft, context, lastFuncToSkip); if(ret == ERR::REENTERED) { len = swprintf(pos, charsLeft, L"(cannot start a nested stack trace; what probably happened is that " L"an debug_assert/debug_warn/CHECK_ERR fired during the current trace.)" ); if(len < 0) goto fail; pos += len; charsLeft -= len; } else if(ret != INFO::OK) { char description_buf[100] = {'?'}; len = swprintf(pos, charsLeft, L"(error while dumping stack: %hs)", error_description_r(ret, description_buf, ARRAY_SIZE(description_buf)) ); if(len < 0) goto fail; pos += len; charsLeft -= len; } else // success { len = (int)wcslen(buf); pos = buf+len; charsLeft = maxChars-len; } // append OS error (just in case it happens to be relevant - // it's usually still set from unrelated operations) char description_buf[100] = "?"; LibError errno_equiv = LibError_from_errno(false); if(errno_equiv != ERR::FAIL) // meaningful translation error_description_r(errno_equiv, description_buf, ARRAY_SIZE(description_buf)); char os_error[100] = "?"; sys_error_description_r(0, os_error, ARRAY_SIZE(os_error)); len = swprintf(pos, charsLeft, L"\r\n" L"errno = %d (%hs)\r\n" L"OS error = %hs\r\n", errno, description_buf, os_error ); if(len < 0) goto fail; pos += len; charsLeft -= len; return buf; } static ErrorReaction CallDisplayError(const wchar_t* text, size_t flags) { // first try app hook implementation ErrorReaction er = ah_display_error(text, flags); // .. it's only a stub: default to normal implementation if(er == ER_NOT_IMPLEMENTED) er = sys_display_error(text, flags); return er; } static ErrorReaction PerformErrorReaction(ErrorReaction er, size_t flags, u8* suppress) { const bool shouldHandleBreak = (flags & DE_MANUAL_BREAK) == 0; switch(er) { case ER_BREAK: // handle "break" request unless the caller wants to (doing so here // instead of within the dlgproc yields a correct call stack) if(shouldHandleBreak) { debug_break(); er = ER_CONTINUE; } break; case ER_SUPPRESS: *suppress = DEBUG_SUPPRESS; er = ER_CONTINUE; break; case ER_EXIT: isExiting = true; // see declaration #if OS_WIN // prevent (slow) heap reporting since we're exiting abnormally and // thus probably leaking like a sieve. wdbg_heap_Enable(false); #endif exit(EXIT_FAILURE); } return er; } ErrorReaction debug_DisplayError(const wchar_t* description, size_t flags, void* context, const char* lastFuncToSkip, const char* pathname, int line, const char* func, u8* suppress) { // "suppressing" this error means doing nothing and returning ER_CONTINUE. if(ShouldSuppressError(suppress)) return ER_CONTINUE; // fix up params // .. translate description = ah_translate(description); // .. caller supports a suppress flag; set the corresponding flag so that // the error display implementation enables the Suppress option. if(suppress) flags |= DE_ALLOW_SUPPRESS; // .. deal with incomplete file/line info if(!pathname || pathname[0] == '\0') pathname = "unknown"; if(line <= 0) line = 0; if(!func || func[0] == '\0') func = "?"; // .. _FILE__ evaluates to the full path (albeit without drive letter) // which is rather long. we only display the base name for clarity. const char* filename = path_name_only(pathname); // display in output window; double-click will navigate to error location. debug_printf("%s(%d): %ls\n", filename, line, description); ErrorMessageMem emm; const wchar_t* text = debug_BuildErrorMessage(description, filename, line, func, context, lastFuncToSkip, &emm); debug_WriteCrashlog(text); ErrorReaction er = CallDisplayError(text, flags); // note: debug_break-ing here to make sure the app doesn't continue // running is no longer necessary. debug_DisplayError now determines our // window handle and is modal. // must happen before PerformErrorReaction because that may exit. debug_FreeErrorMessage(&emm); return PerformErrorReaction(er, flags, suppress); } // strobe indicating expectedError is valid and the next error should be // compared against that / skipped if equal to it. // set/reset via cpu_CAS for thread-safety (hence uintptr_t). static uintptr_t isExpectedErrorValid; static LibError expectedError; void debug_SkipNextError(LibError err) { if(cpu_CAS(&isExpectedErrorValid, 0, 1)) expectedError = err; else debug_assert(0); // internal error: concurrent attempt to skip assert/error } static bool ShouldSkipThisError(LibError err) { // (compare before resetting strobe - expectedError may change afterwards) bool isExpected = (expectedError == err); // (use cpu_CAS to ensure only one error is skipped) if(cpu_CAS(&isExpectedErrorValid, 1, 0)) { debug_assert(isExpected); return isExpected; } return false; } ErrorReaction debug_OnError(LibError err, u8* suppress, const char* file, int line, const char* func) { if(ShouldSkipThisError(err)) return ER_CONTINUE; void* context = 0; const char* lastFuncToSkip = __func__; wchar_t buf[400]; char err_buf[200]; error_description_r(err, err_buf, ARRAY_SIZE(err_buf)); swprintf(buf, ARRAY_SIZE(buf), L"Function call failed: return value was %d (%hs)", err, err_buf); return debug_DisplayError(buf, DE_MANUAL_BREAK, context, lastFuncToSkip, file,line,func, suppress); } void debug_SkipNextAssertion() { // to share code between assert and error skip mechanism, we treat the // former as an error. debug_SkipNextError(ERR::ASSERTION_FAILED); } static bool ShouldSkipThisAssertion() { return ShouldSkipThisError(ERR::ASSERTION_FAILED); } ErrorReaction debug_OnAssertionFailure(const char* expr, u8* suppress, const char* file, int line, const char* func) { if(ShouldSkipThisAssertion()) return ER_CONTINUE; void* context = 0; const char* lastFuncToSkip = __func__; wchar_t buf[400]; swprintf(buf, ARRAY_SIZE(buf), L"Assertion failed: \"%hs\"", expr); return debug_DisplayError(buf, DE_MANUAL_BREAK, context, lastFuncToSkip, file,line,func, suppress); } + Index: ps/trunk/source/lib/sysdep/arch/x86_x64/x86_x64.h =================================================================== --- ps/trunk/source/lib/sysdep/arch/x86_x64/x86_x64.h (revision 6535) +++ ps/trunk/source/lib/sysdep/arch/x86_x64/x86_x64.h (revision 6536) @@ -1,167 +1,167 @@ /** * ========================================================================= * File : x86_x64.h * Project : 0 A.D. * Description : CPU-specific routines common to 32 and 64-bit x86 * ========================================================================= */ // license: GPL; see lib/license.txt #ifndef INCLUDED_X86_X64 #define INCLUDED_X86_X64 #if !ARCH_X86_X64 #error "including x86_x64.h without ARCH_X86_X64=1" #endif /** * registers used/returned by x86_x64_cpuid **/ struct x86_x64_CpuidRegs { u32 eax; u32 ebx; u32 ecx; u32 edx; }; /** * invoke CPUID instruction. * @param regs input/output registers. * regs->eax must be set to the desired function. * some functions (e.g. 4) require regs->ecx to be set as well. * rationale: this interface (input/output structure vs. function parameters) * avoids unnecessary copying/initialization if some inputs aren't needed * and allows graceful expansion to functions that require further inputs. * @return true on success or false if the sub-function isn't supported. **/ extern bool x86_x64_cpuid(x86_x64_CpuidRegs* regs); /** * CPU vendor. * (this is exposed because some CPUID functions are vendor-specific.) * (an enum is easier to compare than the original string values.) **/ enum x86_x64_Vendors { X86_X64_VENDOR_UNKNOWN, X86_X64_VENDOR_INTEL, - X86_X64_VENDOR_AMD, + X86_X64_VENDOR_AMD }; LIB_API x86_x64_Vendors x86_x64_Vendor(); /** * @return the colloquial processor generation * (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron) **/ LIB_API size_t x86_x64_Generation(); /** * bit indices of CPU capability flags (128 bits). * values are defined by IA-32 CPUID feature flags - do not change! **/ enum x86_x64_Cap { // standard (ecx) - currently only defined by Intel X86_X64_CAP_SSE3 = 0+0, // Streaming SIMD Extensions 3 X86_X64_CAP_EST = 0+7, // Enhanced Speedstep Technology // standard (edx) X86_X64_CAP_FPU = 32+0, // Floating Point Unit X86_X64_CAP_TSC = 32+4, // TimeStamp Counter X86_X64_CAP_CMOV = 32+15, // Conditional MOVe X86_X64_CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock X86_X64_CAP_MMX = 32+23, // MultiMedia eXtensions X86_X64_CAP_SSE = 32+25, // Streaming SIMD Extensions X86_X64_CAP_SSE2 = 32+26, // Streaming SIMD Extensions 2 X86_X64_CAP_HT = 32+28, // HyperThreading // extended (ecx) X86_X64_CAP_AMD_CMP_LEGACY = 64+1, // N-core and X86_X64_CAP_HT is falsely set // extended (edx) X86_X64_CAP_AMD_MP = 96+19, // MultiProcessing capable; reserved on AMD64 X86_X64_CAP_AMD_MMX_EXT = 96+22, X86_X64_CAP_AMD_3DNOW_PRO = 96+30, X86_X64_CAP_AMD_3DNOW = 96+31 }; /** * @return whether the CPU supports the indicated x86_x64_Cap / feature flag. **/ LIB_API bool x86_x64_cap(x86_x64_Cap cap); //----------------------------------------------------------------------------- // cache enum x86_x64_CacheType { X86_X64_CACHE_TYPE_NULL, // never passed to the callback X86_X64_CACHE_TYPE_DATA, X86_X64_CACHE_TYPE_INSTRUCTION, X86_X64_CACHE_TYPE_UNIFIED // note: further values are "reserved" }; struct x86_x64_CacheParameters { x86_x64_CacheType type; size_t level; size_t associativity; size_t lineSize; size_t sharedBy; size_t size; }; typedef void (CALL_CONV *x86_x64_CacheCallback)(const x86_x64_CacheParameters*); /** * call back for each cache reported by CPUID. * * note: ordering is undefined (see Intel AP-485) **/ LIB_API void x86_x64_EnumerateCaches(x86_x64_CacheCallback callback); LIB_API size_t x86_x64_L1CacheLineSize(); LIB_API size_t x86_x64_L2CacheLineSize(); //----------------------------------------------------------------------------- // stateless /** * @return APIC ID of the currently executing processor or zero if the * platform does not have an xAPIC (i.e. 7th generation x86 or below). * * rationale: the alternative of accessing the APIC mmio registers is not * feasible - mahaf_MapPhysicalMemory only works reliably on WinXP. we also * don't want to intefere with the OS's constant use of the APIC registers. **/ LIB_API u8 x86_x64_ApicId(); /** * @return the current value of the TimeStampCounter (a counter of * CPU cycles since power-on, which is useful for high-resolution timing * but potentially differs between multiple CPUs) **/ LIB_API u64 x86_x64_rdtsc(); /** * trigger a breakpoint inside this function when it is called. **/ LIB_API void x86_x64_DebugBreak(void); /** * measure the CPU clock frequency via x86_x64_rdtsc and timer_Time. * (it follows that this must not be called from WHRT init.) * this takes several milliseconds (i.e. much longer than * os_cpu_ClockFrequency) but delivers accurate measurements. **/ LIB_API double x86_x64_ClockFrequency(); #endif // #ifndef INCLUDED_X86_X64 Index: ps/trunk/source/lib/cache_adt.h =================================================================== --- ps/trunk/source/lib/cache_adt.h (revision 6535) +++ ps/trunk/source/lib/cache_adt.h (revision 6536) @@ -1,733 +1,733 @@ /** * ========================================================================= * File : cache_adt.h * Project : 0 A.D. * Description : Customizable cache data structure. * ========================================================================= */ // license: GPL; see lib/license.txt #ifndef INCLUDED_CACHE_ADT #define INCLUDED_CACHE_ADT #include #include #include #include // std::priority_queue /* Cache for items of variable size and value/"cost". underlying displacement algorithm is pluggable; default is "Landlord". template reference: Entry provides size, cost, credit and credit_density(). rationale: - made a template instead of exposing Cache::Entry because that would drag a lot of stuff out of Cache. - calculates its own density since that entails a Divider functor, which requires storage inside Entry. Entries is a collection with iterator and begin()/end() and "static Entry& entry_from_it(iterator)". rationale: - STL map has pair as its value_type, so this function would return it->second. however, we want to support other container types (where we'd just return *it). Manager is a template parameterized on typename Key and class Entry. its interface is as follows: // is the cache empty? bool empty() const; // add (key, entry) to cache. void add(const Key& key, const Entry& entry); // if the entry identified by is not in cache, return false; // otherwise return true and pass back a pointer to it. bool find(const Key& key, const Entry** pentry) const; // remove an entry from cache, which is assumed to exist! // this makes sense because callers will typically first use find() to // return info about the entry; this also checks if present. void remove(const Key& key); // mark as just accessed for purpose of cache management. // it will tend to be kept in cache longer. void on_access(Entry& entry); // caller's intent is to remove the least valuable entry. // in implementing this, you have the latitude to "shake loose" // several entries (e.g. because their 'value' is equal). // they must all be push_back-ed into the list; Cache will dole // them out one at a time in FIFO order to callers. // // rationale: // - it is necessary for callers to receive a copy of the // Entry being evicted - e.g. file_cache owns its items and // they must be passed back to allocator when evicted. // - e.g. Landlord can potentially see several entries become // evictable in one call to remove_least_valuable. there are // several ways to deal with this: // 1) generator interface: we return one of { empty, nevermind, // removed, remove-and-call-again }. this greatly complicates // the call site. // 2) return immediately after finding an item to evict. // this changes cache behavior - entries stored at the // beginning would be charged more often (unfair). // resuming charging at the next entry doesn't work - this // would have to be flushed when adding, at which time there // is no provision for returning any items that may be evicted. // 3) return list of all entries "shaken loose". this incurs // frequent mem allocs, which can be alleviated via suballocator. // note: an intrusive linked-list doesn't make sense because // entries to be returned need to be copied anyway (they are // removed from the manager's storage). void remove_least_valuable(std::list& entry_list) */ // // functors to calculate minimum credit density (MCD) // // MCD is required for the Landlord algorithm's evict logic. // [Young02] calls it '\delta'. // scan over all entries and return MCD. template float ll_calc_min_credit_density(const Entries& entries) { float min_credit_density = FLT_MAX; for(typename Entries::const_iterator it = entries.begin(); it != entries.end(); ++it) { const float credit_density = Entries::entry_from_it(it).credit_density(); min_credit_density = fminf(min_credit_density, credit_density); } return min_credit_density; } // note: no warning is given that the MCD entry is being removed! // (reduces overhead in remove_least_valuable) // these functors must account for that themselves (e.g. by resetting // their state directly after returning MCD). // determine MCD by scanning over all entries. // tradeoff: O(N) time complexity, but all notify* calls are no-ops. template class McdCalc_Naive { public: void notify_added(const Entry&) const {} void notify_decreased(const Entry&) const {} void notify_impending_increase_or_remove(const Entry&) const {} void notify_increased_or_removed(const Entry&) const {} float operator()(const Entries& entries) const { const float mcd = ll_calc_min_credit_density(entries); return mcd; } }; // cache previous MCD and update it incrementally (when possible). // tradeoff: amortized O(1) time complexity, but notify* calls must // perform work whenever something in the cache changes. template class McdCalc_Cached { public: McdCalc_Cached() : min_credit_density(FLT_MAX), min_valid(false) {} void notify_added(const Entry& entry) { // when adding a new item, the minimum credit density can only // decrease or remain the same; acting as if entry's credit had // been decreased covers both cases. notify_decreased(entry); } void notify_decreased(const Entry& entry) { min_credit_density = std::min(min_credit_density, entry.credit_density()); } void notify_impending_increase_or_remove(const Entry& entry) { // remember if this entry had the smallest density is_min_entry = feq(min_credit_density, entry.credit_density()); } void notify_increased_or_removed(const Entry& UNUSED(entry)) { // .. it did and was increased or removed. we must invalidate // MCD and recalculate it next time. if(is_min_entry) { min_valid = false; min_credit_density = -1.0f; } } float operator()(const Entries& entries) { if(min_valid) { // the entry that has MCD will be removed anyway by caller; // we need to invalidate here because they don't call // notify_increased_or_removed. min_valid = false; return min_credit_density; } // this is somewhat counterintuitive. since we're calculating // MCD directly, why not mark our cached version of it valid // afterwards? reason is that our caller will remove the entry with // MCD, so it'll be invalidated anyway. // instead, our intent is to calculate MCD for the *next time*. const float ret = ll_calc_min_credit_density(entries); min_valid = true; min_credit_density = FLT_MAX; return ret; } private: float min_credit_density; bool min_valid; // temporary flag set by notify_impending_increase_or_remove bool is_min_entry; }; // // Landlord cache management policy: see [Young02]. // // in short, each entry has credit initially set to cost. when wanting to // remove an item, all are charged according to MCD and their size; // entries are evicted if their credit is exhausted. accessing an entry // restores "some" of its credit. template class McdCalc = McdCalc_Cached> class Landlord { public: bool empty() const { return map.empty(); } void add(const Key& key, const Entry& entry) { // adapter for add_ (which returns an iterator) (void)add_(key, entry); } bool find(const Key& key, const Entry** pentry) const { MapCIt it = map.find(key); if(it == map.end()) return false; *pentry = &it->second; return true; } void remove(const Key& key) { MapIt it = map.find(key); // note: don't complain if not in the cache: this happens after // writing a file and invalidating its cache entry, which may // or may not exist. if(it != map.end()) remove_(it); } void on_access(Entry& entry) { mcd_calc.notify_impending_increase_or_remove(entry); // Landlord algorithm calls for credit to be reset to anything // between its current value and the cost. const float gain = 0.75f; // restore most credit entry.credit = gain*entry.cost + (1.0f-gain)*entry.credit; mcd_calc.notify_increased_or_removed(entry); } void remove_least_valuable(std::list& entry_list) { // we are required to evict at least one entry. one iteration // ought to suffice, due to definition of min_credit_density and // tolerance; however, we provide for repeating if necessary. again: // messing with this (e.g. raising if tiny) would result in // different evictions than Landlord_Lazy, which is unacceptable. // nor is doing so necessary: if mcd is tiny, so is credit. const float min_credit_density = mcd_calc(map); debug_assert(min_credit_density > 0.0f); for(MapIt it = map.begin(); it != map.end();) // no ++it { Entry& entry = it->second; charge(entry, min_credit_density); if(should_evict(entry)) { entry_list.push_back(entry); // annoying: we have to increment before erasing MapIt it_to_remove = it++; map.erase(it_to_remove); } else { mcd_calc.notify_decreased(entry); ++it; } } if(entry_list.empty()) goto again; } protected: // note: hash_map is probably better in terms of locality // (relevant when iterating over all items in remove_least_valuable), // but would require a hash comparator for VfsPath. class Map : public std::map { public: static Entry& entry_from_it(typename Map::iterator it) { return it->second; } static const Entry& entry_from_it(typename Map::const_iterator it) { return it->second; } }; typedef typename Map::iterator MapIt; typedef typename Map::const_iterator MapCIt; Map map; // add entry and return iterator pointing to it. MapIt add_(const Key& key, const Entry& entry) { typedef std::pair PairIB; typename Map::value_type val = std::make_pair(key, entry); PairIB ret = map.insert(val); debug_assert(ret.second); // must not already be in map mcd_calc.notify_added(entry); return ret.first; } // remove entry (given by iterator) directly. void remove_(MapIt it) { const Entry& entry = it->second; mcd_calc.notify_impending_increase_or_remove(entry); mcd_calc.notify_increased_or_removed(entry); map.erase(it); } void charge(Entry& entry, float delta) { entry.credit -= delta * entry.size; // don't worry about entry.size being 0 - if so, cost // should also be 0, so credit will already be 0 anyway. } // for each entry, 'charge' it (i.e. reduce credit by) delta * its size. // delta is typically MCD (see above); however, several such updates // may be lumped together to save time. Landlord_Lazy does this. void charge_all(float delta) { for(MapIt it = map.begin(); it != map.end(); ++it) { Entry& entry = it->second; entry.credit -= delta * entry.size; if(!should_evict(entry)) mcd_calc.notify_decreased(entry); } } // is entry's credit exhausted? if so, it should be evicted. bool should_evict(const Entry& entry) { // we need a bit of leeway because density calculations may not // be exact. choose value carefully: must not be high enough to // trigger false positives. return entry.credit < 0.0001f; } private: McdCalc mcd_calc; }; // Cache manger policies. (these are partial specializations of Landlord, // adapting it to the template params required by Cache) template class Landlord_Naive : public Landlord {}; template class Landlord_Cached: public Landlord {}; // variant of Landlord that adds a priority queue to directly determine // which entry to evict. this allows lumping several charge operations // together and thus reduces iteration over all entries. // tradeoff: O(logN) removal (instead of N), but additional O(N) storage. template class Landlord_Lazy : public Landlord_Naive { typedef typename Landlord_Naive::Map Map; typedef typename Landlord_Naive::MapIt MapIt; typedef typename Landlord_Naive::MapCIt MapCIt; public: Landlord_Lazy() { pending_delta = 0.0f; } void add(const Key& key, const Entry& entry) { // we must apply pending_delta now - otherwise, the existing delta // would later be applied to this newly added item (incorrect). commit_pending_delta(); MapIt it = Parent::add_(key, entry); pri_q.push(it); } void remove(const Key& key) { Parent::remove(key); // reconstruct pri_q from current map. this is slow (N*logN) and // could definitely be done better, but we don't bother since // remove is a very rare operation (e.g. invalidating entries). while(!pri_q.empty()) pri_q.pop(); for(MapCIt it = this->map.begin(); it != this->map.end(); ++it) pri_q.push(it); } void on_access(Entry& entry) { Parent::on_access(entry); // entry's credit was changed. we now need to reshuffle the // pri queue to reflect this. pri_q.ensure_heap_order(); } void remove_least_valuable(std::list& entry_list) { MapIt least_valuable_it = pri_q.top(); pri_q.pop(); Entry& entry = Map::entry_from_it(least_valuable_it); entry_list.push_back(entry); // add to pending_delta the MCD that would have resulted // if removing least_valuable_it normally. // first, calculate actual credit (i.e. apply pending_delta to // this entry); then add the resulting density to pending_delta. entry.credit -= pending_delta*entry.size; const float credit_density = entry.credit_density(); debug_assert(credit_density > 0.0f); pending_delta += credit_density; Parent::remove_(least_valuable_it); } private: typedef Landlord_Naive Parent; // sort iterators by credit_density of the Entry they reference. struct CD_greater { bool operator()(MapIt it1, MapIt it2) const { return Map::entry_from_it(it1).credit_density() > Map::entry_from_it(it2).credit_density(); } }; // wrapper on top of priority_queue that allows 'heap re-sift' // (see on_access). // notes: // - greater comparator makes pri_q.top() the one with // LEAST credit_density, which is what we want. // - deriving from an STL container is a bit dirty, but we need this // to get at the underlying data (priority_queue interface is not // very capable). class PriQ: public std::priority_queue, CD_greater> { public: void ensure_heap_order() { // TODO: this is actually N*logN - ouch! that explains high // CPU cost in profile. this is called after only 1 item has // changed, so a logN "sift" operation ought to suffice. // that's not supported by the STL heap functions, so we'd // need a better implementation. pending.. std::make_heap(this->c.begin(), this->c.end(), this->comp); } }; PriQ pri_q; // delta values that have accumulated over several // remove_least_valuable() calls. applied during add(). float pending_delta; void commit_pending_delta() { if(pending_delta > 0.0f) { this->charge_all(pending_delta); pending_delta = 0.0f; // we've changed entry credit, so the heap order *may* have been // violated; reorder the pri queue. (I don't think so, // due to definition of delta, but we'll play it safe) pri_q.ensure_heap_order(); } } }; // // functor that implements division of first arg by second // // this is used to calculate credit_density(); performance matters // because this is called for each entry during each remove operation. // floating-point division (fairly slow) class Divider_Naive { public: Divider_Naive() {} // needed for default CacheEntry ctor Divider_Naive(float UNUSED(x)) {} float operator()(float val, float divisor) const { return val / divisor; } }; // caches reciprocal of divisor and multiplies by that. // tradeoff: only 4 clocks (instead of 20), but 4 bytes extra per entry. class Divider_Recip { float recip; public: Divider_Recip() {} // needed for default CacheEntry ctor Divider_Recip(float x) { recip = 1.0f / x; } float operator()(float val, float UNUSED(divisor)) const { return val * recip; } }; // TODO: use SSE/3DNow RCP instruction? not yet, because not all systems // support it and overhead of detecting this support eats into any gains. // initial implementation for testing purposes; quite inefficient. template class LRU { public: bool empty() const { return lru.empty(); } void add(const Key& key, const Entry& entry) { lru.push_back(KeyAndEntry(key, entry)); } bool find(const Key& key, const Entry** pentry) const { CIt it = std::find_if(lru.begin(), lru.end(), KeyEq(key)); if(it == lru.end()) return false; *pentry = &it->entry; return true; } void remove(const Key& key) { std::remove_if(lru.begin(), lru.end(), KeyEq(key)); } void on_access(Entry& entry) { for(It it = lru.begin(); it != lru.end(); ++it) { if(&entry == &it->entry) { add(it->key, it->entry); lru.erase(it); return; } } debug_assert(0); // entry not found in list } void remove_least_valuable(std::list& entry_list) { entry_list.push_back(lru.front().entry); lru.pop_front(); } private: struct KeyAndEntry { Key key; Entry entry; KeyAndEntry(const Key& key_, const Entry& entry_) : key(key_), entry(entry_) {} }; class KeyEq { Key key; public: KeyEq(const Key& key_) : key(key_) {} bool operator()(const KeyAndEntry& ke) const { return ke.key == key; } }; typedef std::list List; typedef typename List::iterator It; typedef typename List::const_iterator CIt; List lru; }; // this is applicable to all cache management policies and stores all // required information. a Divider functor is used to implement // division for credit_density. template struct CacheEntry { Item item; size_t size; size_t cost; float credit; Divider divider; // needed for mgr.remove_least_valuable's entry_copy CacheEntry() { } CacheEntry(const Item& item_, size_t size_, size_t cost_) : item(item_), divider((float)size_) { size = size_; cost = cost_; - credit = cost; + credit = (float)cost; // else divider will fail debug_assert(size != 0); } float credit_density() const { return divider(credit, (float)size); } }; // // Cache // template < typename Key, typename Item, // see documentation above for Manager's interface. template class Manager = Landlord_Cached, class Divider = Divider_Naive > class Cache { public: Cache() : mgr() {} void add(const Key& key, const Item& item, size_t size, size_t cost) { return mgr.add(key, Entry(item, size, cost)); } // remove the entry identified by . expected usage is to check // if present and determine size via retrieve(), so no need for // error checking. // useful for invalidating single cache entries. void remove(const Key& key) { mgr.remove(key); } // if there is no entry for in the cache, return false. // otherwise, return true and pass back item and (optionally) size. // // if refill_credit (default), the cache manager 'rewards' this entry, // tending to keep it in cache longer. this parameter is not used in // normal operation - it's only for special cases where we need to // make an end run around the cache accounting (e.g. for cache simulator). bool retrieve(const Key& key, Item& item, size_t* psize = 0, bool refill_credit = true) { const Entry* entry; if(!mgr.find(key, &entry)) return false; item = entry->item; if(psize) *psize = entry->size; if(refill_credit) mgr.on_access((Entry&)*entry); return true; } bool peek(const Key& key, Item& item, size_t* psize = 0) { return retrieve(key, item, psize, false); } // toss out the least valuable entry. return false if cache is empty, // otherwise true and (optionally) pass back its item and size. bool remove_least_valuable(Item* pItem = 0, size_t* pSize = 0) { // as an artefact of the cache eviction policy, several entries // may be "shaken loose" by one call to remove_least_valuable. // we cache them in a list to disburden callers (they always get // exactly one). if(entries_awaiting_eviction.empty()) { if(empty()) return false; mgr.remove_least_valuable(entries_awaiting_eviction); debug_assert(!entries_awaiting_eviction.empty()); } const Entry& entry = entries_awaiting_eviction.front(); if(pItem) *pItem = entry.item; if(pSize) *pSize = entry.size; entries_awaiting_eviction.pop_front(); return true; } bool empty() const { return mgr.empty(); } private: typedef CacheEntry Entry; // see note in remove_least_valuable(). std::list entries_awaiting_eviction; Manager mgr; }; #endif // #ifndef INCLUDED_CACHE_ADT Index: ps/trunk/source/lib/timer.h =================================================================== --- ps/trunk/source/lib/timer.h (revision 6535) +++ ps/trunk/source/lib/timer.h (revision 6536) @@ -1,340 +1,342 @@ /** * ========================================================================= * File : timer.h * Project : 0 A.D. * Description : platform-independent high resolution timer * ========================================================================= */ // license: GPL; see lib/license.txt #ifndef INCLUDED_TIMER #define INCLUDED_TIMER #include "lib/config2.h" // CONFIG2_TIMER_ALLOW_RDTSC #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC # include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc # include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency #endif /** * timer_Time will subsequently return values relative to the current time. **/ LIB_API void timer_LatchStartTime(); /** * @return high resolution (> 1 us) timestamp [s]. **/ LIB_API double timer_Time(void); /** * @return resolution [s] of the timer. **/ LIB_API double timer_Resolution(void); //----------------------------------------------------------------------------- // scope timing /// used by TIMER -class ScopeTimer : noncopyable +class ScopeTimer { + NONCOPYABLE(ScopeTimer); public: ScopeTimer(const char* description) : m_t0(timer_Time()), m_description(description) { } ~ScopeTimer() { double t1 = timer_Time(); double dt = t1-m_t0; // determine scale factor for pretty display double scale = 1e6; const char* unit = "us"; if(dt > 1.0) scale = 1, unit = "s"; else if(dt > 1e-3) scale = 1e3, unit = "ms"; debug_printf("TIMER| %s: %g %s\n", m_description, dt*scale, unit); } private: double m_t0; const char* m_description; }; /** * Measures the time taken to execute code up until end of the current scope; * displays it via debug_printf. Can safely be nested. * Useful for measuring time spent in a function or basic block. * must remain valid over the lifetime of this object; * a string literal is safest. * * Example usage: * void func() * { * TIMER("description"); * // code to be measured * } **/ #define TIMER(description) ScopeTimer UID__(description) /** * Measures the time taken to execute code between BEGIN and END markers; * displays it via debug_printf. Can safely be nested. * Useful for measuring several pieces of code within the same function/block. * must remain valid over the lifetime of this object; * a string literal is safest. * * Caveats: * - this wraps the code to be measured in a basic block, so any * variables defined there are invisible to surrounding code. * - the description passed to END isn't inspected; you are responsible for * ensuring correct nesting! * * Example usage: * void func2() * { * // uninteresting code * TIMER_BEGIN("description2"); * // code to be measured * TIMER_END("description2"); * // uninteresting code * } **/ #define TIMER_BEGIN(description) { ScopeTimer UID__(description) #define TIMER_END(description) } //----------------------------------------------------------------------------- // cumulative timer API // this supplements in-game profiling by providing low-overhead, // high resolution time accounting of specific areas. // since TIMER_ACCRUE et al. are called so often, we try to keep // overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles // returned by ia32_rdtsc) instead of absolute time has two benefits: // - no need to convert from raw->time on every call // (instead, it's only done once when displaying the totals) // - possibly less overhead to querying the time itself // (timer_Time may be using slower time sources with ~3us overhead) // // however, the cycle count is not necessarily a measure of wall-clock time // (see http://www.gamedev.net/reference/programming/features/timing). // therefore, on systems with SpeedStep active, measurements of I/O or other // non-CPU bound activity may be skewed. this is ok because the timer is // only used for profiling; just be aware of the issue. // if this is a problem, disable CONFIG2_TIMER_ALLOW_RDTSC. // // note that overflow isn't an issue either way (63 bit cycle counts // at 10 GHz cover intervals of 29 years). #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC class TimerUnit { public: void SetToZero() { m_ticks = 0; } void SetFromTimer() { m_ticks = x86_x64_rdtsc(); } void AddDifference(TimerUnit t0, TimerUnit t1) { m_ticks += t1.m_ticks - t0.m_ticks; } void Subtract(TimerUnit t) { m_ticks -= t.m_ticks; } std::string ToString() const { debug_assert(m_ticks >= 0.0); // determine scale factor for pretty display double scale = 1.0; const char* unit = " c"; if(m_ticks > 10000000000LL) // 10 Gc scale = 1e-9, unit = " Gc"; else if(m_ticks > 10000000) // 10 Mc scale = 1e-6, unit = " Mc"; else if(m_ticks > 10000) // 10 kc scale = 1e-3, unit = " kc"; std::stringstream ss; ss << m_ticks*scale; ss << unit; return ss.str(); - } double ToSeconds() const { return m_ticks / os_cpu_ClockFrequency(); } private: u64 m_ticks; }; #else class TimerUnit { public: void SetToZero() { m_seconds = 0.0; } void SetFromTimer() { m_seconds = timer_Time(); } void AddDifference(TimerUnit t0, TimerUnit t1) { m_seconds += t1.m_seconds - t0.m_seconds; } void Subtract(TimerUnit t) { m_seconds -= t.m_seconds; } std::string ToString() const { debug_assert(m_seconds >= 0.0); // determine scale factor for pretty display double scale = 1e6; const char* unit = " us"; if(m_seconds > 1.0) scale = 1, unit = " s"; else if(m_seconds > 1e-3) scale = 1e3, unit = " ms"; std::stringstream ss; ss << m_seconds*scale; ss << unit; - return ss.str(); } + return ss.str(); + } double ToSeconds() const { return m_seconds; } private: double m_seconds; }; #endif // opaque - do not access its fields! // note: must be defined here because clients instantiate them; // fields cannot be made private due to POD requirement. struct TimerClient { TimerUnit sum; // total bill // only store a pointer for efficiency. const char* description; TimerClient* next; // how often timer_BillClient was called (helps measure relative // performance of something that is done indeterminately often). size_t num_calls; }; /** * make the given TimerClient (usually instantiated as static data) * ready for use. returns its address for TIMER_ADD_CLIENT's convenience. * this client's total (added to by timer_BillClient) will be * displayed by timer_DisplayClientTotals. * notes: * - may be called at any time; * - always succeeds (there's no fixed limit); * - free() is not needed nor possible. * - description must remain valid until exit; a string literal is safest. **/ LIB_API TimerClient* timer_AddClient(TimerClient* tc, const char* description); /** * "allocate" a new TimerClient that will keep track of the total time * billed to it, along with a description string. These are displayed when * timer_DisplayClientTotals is called. * Invoke this at file or function scope; a (static) TimerClient pointer of * name will be defined, which should be passed to TIMER_ACCRUE. **/ #define TIMER_ADD_CLIENT(id)\ static TimerClient UID__;\ static TimerClient* id = timer_AddClient(&UID__, #id); /** * bill the difference between t0 and t1 to the client's total. **/ LIB_API void timer_BillClient(TimerClient* tc, TimerUnit t0, TimerUnit t1); /** * display all clients' totals; does not reset them. * typically called at exit. **/ LIB_API void timer_DisplayClientTotals(); /// used by TIMER_ACCRUE class ScopeTimerAccrue { + NONCOPYABLE(ScopeTimerAccrue); public: ScopeTimerAccrue(TimerClient* tc) : m_tc(tc) { m_t0.SetFromTimer(); } ~ScopeTimerAccrue() { TimerUnit t1; t1.SetFromTimer(); timer_BillClient(m_tc, m_t0, t1); } private: TimerUnit m_t0; TimerClient* m_tc; }; /** * Measure the time taken to execute code up until end of the current scope; * bill it to the given TimerClient object. Can safely be nested. * Useful for measuring total time spent in a function or basic block over the * entire program. * must remain valid over the lifetime of this object; * a string literal is safest. * * Example usage: * TIMER_ADD_CLIENT(identifier) * * void func() * { * TIMER_ACCRUE(name_of_pointer_to_client); * // code to be measured * } * * [at exit] * timer_DisplayClientTotals(); **/ #define TIMER_ACCRUE(client) ScopeTimerAccrue UID__(client) #endif // #ifndef INCLUDED_TIMER