Index: ps/trunk/source/lib/fat_time.h =================================================================== --- ps/trunk/source/lib/fat_time.h (revision 9349) +++ ps/trunk/source/lib/fat_time.h (nonexistent) @@ -1,29 +0,0 @@ -/* Copyright (c) 2010 Wildfire Games - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef INCLUDED_FAT_TIME -#define INCLUDED_FAT_TIME - -extern time_t time_t_from_FAT(u32 fat_timedate); -extern u32 FAT_from_time_t(time_t time); - -#endif // INCLUDED_FAT_TIME Property changes on: ps/trunk/source/lib/fat_time.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Index: ps/trunk/source/lib/fat_time.cpp =================================================================== --- ps/trunk/source/lib/fat_time.cpp (revision 9349) +++ ps/trunk/source/lib/fat_time.cpp (nonexistent) @@ -1,77 +0,0 @@ -/* Copyright (c) 2010 Wildfire Games - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * timestamp conversion: DOS FAT <-> Unix time_t - */ - -#include "precompiled.h" -#include "lib/fat_time.h" - -#include - -#include "lib/bits.h" - - -time_t time_t_from_FAT(u32 fat_timedate) -{ - const u32 fat_time = bits(fat_timedate, 0, 15); - const u32 fat_date = bits(fat_timedate, 16, 31); - - struct tm t; // struct tm format: - t.tm_sec = bits(fat_time, 0,4) * 2; // [0,59] - t.tm_min = bits(fat_time, 5,10); // [0,59] - t.tm_hour = bits(fat_time, 11,15); // [0,23] - t.tm_mday = bits(fat_date, 0,4); // [1,31] - t.tm_mon = bits(fat_date, 5,8) - 1; // [0,11] - t.tm_year = bits(fat_date, 9,15) + 80; // since 1900 - t.tm_isdst = -1; // unknown - let libc determine - - // otherwise: totally bogus, and at the limit of 32-bit time_t - debug_assert(t.tm_year < 138); - - time_t ret = mktime(&t); - debug_assert(ret != (time_t)-1); // mktime shouldn't fail - return ret; -} - - -u32 FAT_from_time_t(time_t time) -{ - // (values are adjusted for DST) - struct tm* t = localtime(&time); - - const u16 fat_time = u16( - (t->tm_sec/2) | // 5 - (u16(t->tm_min) << 5) | // 6 - (u16(t->tm_hour) << 11) // 5 - ); - - const u16 fat_date = u16( - (t->tm_mday) | // 5 - (u16(t->tm_mon+1) << 5) | // 4 - (u16(t->tm_year-80) << 9) // 7 - ); - - u32 fat_timedate = u32_from_u16(fat_date, fat_time); - return fat_timedate; -} Property changes on: ps/trunk/source/lib/fat_time.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Index: ps/trunk/source/lib/file/io/block_cache.h =================================================================== --- ps/trunk/source/lib/file/io/block_cache.h (revision 9349) +++ ps/trunk/source/lib/file/io/block_cache.h (nonexistent) @@ -1,106 +0,0 @@ -/* Copyright (c) 2010 Wildfire Games - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * cache for aligned I/O blocks. - */ - -#ifndef INCLUDED_BLOCK_CACHE -#define INCLUDED_BLOCK_CACHE - -#include "lib/os_path.h" - -/** - * ID that uniquely identifies a block within a file - **/ -class BlockId -{ -public: - BlockId(); - BlockId(const OsPath& pathname, off_t ofs); - bool operator==(const BlockId& rhs) const; - bool operator!=(const BlockId& rhs) const; - -private: - u64 m_id; -}; - - -/** - * cache of (aligned) file blocks with support for zero-copy IO. - * absorbs the overhead of rounding up archive IOs to the nearest block - * boundaries by keeping the last few blocks in memory. - * - * the interface is somewhat similar to FileCache; see the note there. - * - * not thread-safe (each thread is intended to have its own cache). - **/ -class BlockCache -{ -public: - /** - * @param numBlocks (the default value is enough to support temp buffers - * and absorb the cost of unaligned reads from archives.) - **/ - BlockCache(size_t numBlocks = 16); - - /** - * Add a block to the cache. - * - * @param id Key that will be used to Retrieve the block. - * @param buf - * - * Call this when the block's IO has completed; its data will - * satisfy subsequent Retrieve calls for the same id. - * If CONFIG2_CACHE_READ_ONLY, the memory is made read-only. - **/ - void Add(BlockId id, const shared_ptr& buf); - - /** - * Attempt to retrieve a block's contents. - * - * @return whether the block is in cache. - * - * if successful, a shared pointer to the contents is returned. - * they remain valid until all references are removed and the block - * is evicted. - **/ - bool Retrieve(BlockId id, shared_ptr& buf); - - /** - * Invalidate the contents of the cache. - * - * this effectively discards the contents of existing blocks - * (more specifically: prevents them from satisfying Retrieve calls - * until a subsequent Add with the same id). - * - * useful for self-tests: multiple independent IO tests run in the same - * process and must not influence each other via the cache. - **/ - void InvalidateAll(); - -private: - class Impl; - shared_ptr impl; -}; - -#endif // #ifndef INCLUDED_BLOCK_CACHE Property changes on: ps/trunk/source/lib/file/io/block_cache.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Index: ps/trunk/source/lib/file/io/block_cache.cpp =================================================================== --- ps/trunk/source/lib/file/io/block_cache.cpp (revision 9349) +++ ps/trunk/source/lib/file/io/block_cache.cpp (nonexistent) @@ -1,163 +0,0 @@ -/* Copyright (c) 2010 Wildfire Games - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * cache for aligned I/O blocks. - */ - -#include "precompiled.h" -#include "lib/file/io/block_cache.h" - -#include "lib/config2.h" // CONFIG2_CACHE_READ_ONLY -#include "lib/posix/posix_mman.h" // mprotect -#include "lib/file/common/file_stats.h" -#include "lib/lockfree.h" -#include "lib/allocators/pool.h" -#include "lib/fnv_hash.h" -#include "lib/file/io/io_align.h" - - -//----------------------------------------------------------------------------- - -BlockId::BlockId() - : m_id(0) -{ -} - -BlockId::BlockId(const OsPath& pathname, off_t ofs) -{ - const Path::String& string = pathname.string(); - m_id = fnv_hash64(string.c_str(), string.length()*sizeof(string[0])); - const size_t indexBits = 16; - m_id <<= indexBits; - const off_t blockIndex = off_t(ofs / BLOCK_SIZE); - debug_assert(blockIndex < off_t(1) << indexBits); - m_id |= blockIndex; -} - -bool BlockId::operator==(const BlockId& rhs) const -{ - return m_id == rhs.m_id; -} - -bool BlockId::operator!=(const BlockId& rhs) const -{ - return !operator==(rhs); -} - - -//----------------------------------------------------------------------------- - -struct Block -{ - Block(BlockId id, const shared_ptr& buf) - { - this->id = id; - this->buf = buf; - } - - // block is "valid" and can satisfy Retrieve() requests if a - // (non-default-constructed) ID has been assigned. - BlockId id; - - // this block is "in use" if use_count != 1. - shared_ptr buf; -}; - - -//----------------------------------------------------------------------------- - -class BlockCache::Impl -{ -public: - Impl(size_t numBlocks) - : m_maxBlocks(numBlocks) - { - } - - void Add(BlockId id, const shared_ptr& buf) - { - if(m_blocks.size() > m_maxBlocks) - { -#if CONFIG2_CACHE_READ_ONLY - mprotect((void*)m_blocks.front().buf.get(), BLOCK_SIZE, PROT_READ); -#endif - m_blocks.pop_front(); // evict oldest block - } - -#if CONFIG2_CACHE_READ_ONLY - mprotect((void*)buf.get(), BLOCK_SIZE, PROT_WRITE|PROT_READ); -#endif - m_blocks.push_back(Block(id, buf)); - } - - bool Retrieve(BlockId id, shared_ptr& buf) - { - // (linear search is ok since we only expect to manage a few blocks) - for(size_t i = 0; i < m_blocks.size(); i++) - { - Block& block = m_blocks[i]; - if(block.id == id) - { - buf = block.buf; - return true; - } - } - - return false; - } - - void InvalidateAll() - { - // note: don't check whether any references are held etc. because - // this should only be called at the end of the (test) program. - m_blocks.clear(); - } - -private: - size_t m_maxBlocks; - typedef std::deque Blocks; - Blocks m_blocks; -}; - - -//----------------------------------------------------------------------------- - -BlockCache::BlockCache(size_t numBlocks) - : impl(new Impl(numBlocks)) -{ -} - -void BlockCache::Add(BlockId id, const shared_ptr& buf) -{ - impl->Add(id, buf); -} - -bool BlockCache::Retrieve(BlockId id, shared_ptr& buf) -{ - return impl->Retrieve(id, buf); -} - -void BlockCache::InvalidateAll() -{ - return impl->InvalidateAll(); -} Property changes on: ps/trunk/source/lib/file/io/block_cache.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Index: ps/trunk/source/lib/file/io/io_align.cpp =================================================================== --- ps/trunk/source/lib/file/io/io_align.cpp (revision 9349) +++ ps/trunk/source/lib/file/io/io_align.cpp (nonexistent) @@ -1,24 +0,0 @@ -/* Copyright (c) 2010 Wildfire Games - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "precompiled.h" -#include "lib/file/io/io_align.h" Property changes on: ps/trunk/source/lib/file/io/io_align.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Index: ps/trunk/source/lib/file/io/io_align.h =================================================================== --- ps/trunk/source/lib/file/io/io_align.h (revision 9349) +++ ps/trunk/source/lib/file/io/io_align.h (nonexistent) @@ -1,71 +0,0 @@ -/* Copyright (c) 2010 Wildfire Games - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef INCLUDED_IO_ALIGN -#define INCLUDED_IO_ALIGN - -#include "lib/bits.h" // IsAligned, round_up - -/** - * block := power-of-two sized chunk of a file. - * all transfers are expanded to naturally aligned, whole blocks. - * (this makes caching parts of files feasible; it is also much faster - * for some aio implementations, e.g. wposix.) - * (blocks are also thereby page-aligned, which allows write-protecting - * file buffers without worrying about their boundaries.) - **/ -static const size_t BLOCK_SIZE = 1024*KiB; - -// note: *sizes* and *offsets* are aligned to blocks to allow zero-copy block cache. -// that the *buffer* need only be sector-aligned (we assume 4kb for simplicity) -// (this is a requirement of the underlying Windows OS) -static const size_t SECTOR_SIZE = 4*KiB; - - -template -inline bool IsAligned_Data(T* address) -{ - return IsAligned((uintptr_t)address, SECTOR_SIZE); -} - -inline bool IsAligned_Offset(off_t ofs) -{ - return IsAligned(ofs, BLOCK_SIZE); -} - - -inline off_t AlignedOffset(off_t ofs) -{ - return (off_t)round_down(size_t(ofs), BLOCK_SIZE); -} - -inline off_t AlignedSize(off_t size) -{ - return (off_t)round_up(size_t(size), BLOCK_SIZE); -} - -inline off_t PaddedSize(off_t size, off_t ofs) -{ - return (off_t)round_up(size_t(size + ofs - AlignedOffset(ofs)), BLOCK_SIZE); -} - -#endif // #ifndef INCLUDED_IO_ALIGN Property changes on: ps/trunk/source/lib/file/io/io_align.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Index: ps/trunk/source/lib/sysdep/os/win/wdir_watch.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wdir_watch.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wdir_watch.cpp (revision 9350) @@ -1,440 +1,388 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * Win32 directory change notification */ #include "precompiled.h" #include "lib/sysdep/dir_watch.h" #include "lib/allocators/shared_ptr.h" #include "lib/path.h" // path_is_subpath #include "lib/sysdep/os/win/win.h" #include "lib/sysdep/os/win/winit.h" #include "lib/sysdep/os/win/wutil.h" +#include "lib/sysdep/os/win/wiocp.h" WINIT_REGISTER_MAIN_INIT(wdir_watch_Init); WINIT_REGISTER_MAIN_SHUTDOWN(wdir_watch_Shutdown); //----------------------------------------------------------------------------- // DirHandle class DirHandle { public: DirHandle(const OsPath& path) { WinScopedPreserveLastError s; // CreateFile const DWORD share = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE; const DWORD flags = FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OVERLAPPED; m_hDir = CreateFileW(OsString(path).c_str(), FILE_LIST_DIRECTORY, share, 0, OPEN_EXISTING, flags, 0); } ~DirHandle() { // contrary to MSDN, the canceled IOs do not issue a completion notification. // (receiving packets after (unsuccessful) cancellation would be dangerous) BOOL ok = CancelIo(m_hDir); WARN_IF_FALSE(ok); CloseHandle(m_hDir); m_hDir = INVALID_HANDLE_VALUE; } // == INVALID_HANDLE_VALUE if path doesn't exist operator HANDLE() const { return m_hDir; } private: HANDLE m_hDir; }; //----------------------------------------------------------------------------- // DirWatchRequest class DirWatchRequest { NONCOPYABLE(DirWatchRequest); public: DirWatchRequest(const OsPath& path) : m_path(path), m_dirHandle(path), m_data(new u8[dataSize]) { m_ovl = (OVERLAPPED*)calloc(1, sizeof(OVERLAPPED)); // rationale for dynamic alloc: see decl debug_assert(m_ovl); // (hEvent is needed for the wait after CancelIo below) const BOOL manualReset = TRUE; const BOOL initialState = FALSE; m_ovl->hEvent = CreateEvent(0, manualReset, initialState, 0); } ~DirWatchRequest() { // we need to free m_data here, so the pending IO had better // not write to that memory in future. therefore: WARN_IF_FALSE(CancelIo(m_dirHandle)); // however, this is not synchronized with the DPC (?) that apparently // delivers the data - m_data is filled anyway. // we need to ensure that either the IO has happened or that it // was successfully canceled before freeing m_data and m_ovl, so wait: { WinScopedPreserveLastError s; // (GetOverlappedResult without a valid hEvent hangs on Vista; // we'll abort after a timeout to be safe.) const DWORD ret = WaitForSingleObject(m_ovl->hEvent, 1000); WARN_IF_FALSE(CloseHandle(m_ovl->hEvent)); if(ret == WAIT_OBJECT_0 || GetLastError() == ERROR_OPERATION_ABORTED) { delete[] m_data; free(m_ovl); } else { // (this could conceivably happen if a kernel debugger // hangs the system during the wait duration.) debug_printf(L"WARNING: IO may still be pending; to avoid memory corruption, we won't free the buffer.\n"); DEBUG_WARN_ERR(ERR::TIMED_OUT); // intentionally leak m_data and m_ovl! } } } const OsPath& Path() const { return m_path; } /** * (this is the handle to be associated with the completion port) **/ HANDLE GetDirHandle() const { return m_dirHandle; } LibError Issue() { if(m_dirHandle == INVALID_HANDLE_VALUE) WARN_RETURN(ERR::PATH_NOT_FOUND); const BOOL watchSubtree = TRUE; // (see IntrusiveLink comments) const DWORD filter = FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME | FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_CREATION; // not set: FILE_NOTIFY_CHANGE_ATTRIBUTES, FILE_NOTIFY_CHANGE_LAST_ACCESS, FILE_NOTIFY_CHANGE_SECURITY DWORD undefined = 0; // (non-NULL pointer avoids BoundsChecker warning) m_ovl->Internal = 0; const BOOL ok = ReadDirectoryChangesW(m_dirHandle, m_data, dataSize, watchSubtree, filter, &undefined, m_ovl, 0); WARN_IF_FALSE(ok); return INFO::OK; } /** * (call when completion port indicates data is available) **/ void RetrieveNotifications(DirWatchNotifications& notifications) const { const FILE_NOTIFY_INFORMATION* fni = (const FILE_NOTIFY_INFORMATION*)m_data; for(;;) { // convert name from BSTR (non-zero-terminated) to OsPath cassert(sizeof(wchar_t) == sizeof(WCHAR)); const size_t nameChars = fni->FileNameLength / sizeof(WCHAR); const OsPath name(Path::String(fni->FileName, nameChars)); const OsPath pathname = m_path / name; const DirWatchNotification::EType type = TypeFromAction(fni->Action); notifications.push_back(DirWatchNotification(pathname, type)); if(!fni->NextEntryOffset) // this was the last entry. break; fni = (const FILE_NOTIFY_INFORMATION*)(uintptr_t(fni) + fni->NextEntryOffset); } } private: static DirWatchNotification::EType TypeFromAction(const DWORD action) { switch(action) { case FILE_ACTION_ADDED: case FILE_ACTION_RENAMED_NEW_NAME: return DirWatchNotification::Created; case FILE_ACTION_REMOVED: case FILE_ACTION_RENAMED_OLD_NAME: return DirWatchNotification::Deleted; case FILE_ACTION_MODIFIED: return DirWatchNotification::Changed; default: debug_assert(0); return DirWatchNotification::Changed; } } OsPath m_path; DirHandle m_dirHandle; // rationale: // - if too small, notifications may be lost! (the CSD-poll application // may be confronted with hundreds of new files in a short time frame) // - requests larger than 64 KiB fail on SMB due to packet restrictions. static const size_t dataSize = 64*KiB; // rationale: // - each instance needs their own buffer. (we can't share a central // copy because the watches are independent and may be triggered // 'simultaneously' before the next poll.) // - lifetime must be managed manually (see dtor) u8* m_data; // rationale: // - ReadDirectoryChangesW's asynchronous mode is triggered by passing // a valid OVERLAPPED parameter; notification proceeds via // completion ports, but we still need hEvent - see above. // - this must remain valid while the IO is pending. if the wait // were to fail, we must not free this memory, either. OVERLAPPED* m_ovl; }; typedef shared_ptr PDirWatchRequest; //----------------------------------------------------------------------------- // IntrusiveLink // using watches of entire subtrees to satisfy single-directory requests // requires a list of existing watches. an intrusive, doubly-linked list // is convenient because removal must occur within the DirWatch destructor. // since boost::intrusive doesn't automatically remove objects from their // containers when they are destroyed, we implement a simple circular list // via sentinel. note that DirWatchManager iterates over DirWatch, not their // embedded links. we map from link to the parent object via offsetof // (slightly less complex than storing back pointers to the parents, and // avoids 'this-pointer used during initialization list' warnings). class IntrusiveLink { public: IntrusiveLink() { m_prev = m_next = this; // sentinel } IntrusiveLink(IntrusiveLink* sentinel) { // insert after sentinel m_prev = sentinel; m_next = sentinel->m_next; m_next->m_prev = this; sentinel->m_next = this; } ~IntrusiveLink() { // remove from list m_prev->m_next = m_next; m_next->m_prev = m_prev; } IntrusiveLink* Next() const { return m_next; } private: IntrusiveLink* m_prev; IntrusiveLink* m_next; }; //----------------------------------------------------------------------------- // DirWatch struct DirWatch { DirWatch(IntrusiveLink* sentinel, const PDirWatchRequest& request) : link(sentinel), request(request) { } IntrusiveLink link; PDirWatchRequest request; }; //----------------------------------------------------------------------------- -// CompletionPort +// DirWatchManager -// this appears to be the best solution for IO notification. -// there are three alternatives: -// - multiple threads with blocking I/O. this is rather inefficient when -// many directories (e.g. mods) are being watched. -// - normal overlapped I/O: build a contiguous array of the hEvents -// in all OVERLAPPED structures, and WaitForMultipleObjects. -// it would be cumbersome to update this array when adding/removing watches. -// - callback notification: a notification function is called when the thread -// that initiated the I/O (ReadDirectoryChangesW) enters an alertable -// wait state. it is desirable for notifications to arrive at a single -// known point - see dir_watch_Poll. unfortunately there doesn't appear to -// be a reliable and non-blocking means of entering AWS - SleepEx(1) may -// wait for 10..15 ms if the system timer granularity is low. even worse, -// it was noted in a previous project that APCs are sometimes delivered from -// within APIs without having used SleepEx (it seems threads sometimes enter -// a semi-AWS when calling the kernel). -class CompletionPort +class DirWatchManager { public: - CompletionPort() - { - m_hIOCP = 0; // CreateIoCompletionPort requires 0, not INVALID_HANDLE_VALUE - } - - ~CompletionPort() + DirWatchManager() + : hIOCP(0) { - CloseHandle(m_hIOCP); - m_hIOCP = INVALID_HANDLE_VALUE; } - void Attach(HANDLE hFile, uintptr_t key) + ~DirWatchManager() { - WinScopedPreserveLastError s; // CreateIoCompletionPort - - // (when called for the first time, ends up creating m_hIOCP) - m_hIOCP = CreateIoCompletionPort(hFile, m_hIOCP, (ULONG_PTR)key, 0); - debug_assert(wutil_IsValidHandle(m_hIOCP)); - } - - LibError Poll(size_t& bytesTransferred, uintptr_t& key, OVERLAPPED*& ovl) - { - if(m_hIOCP == 0) - return ERR::INVALID_HANDLE; // NOWARN (happens if called before the first Attach) - for(;;) // don't return abort notifications to caller - { - DWORD dwBytesTransferred = 0; - ULONG_PTR ulKey = 0; - ovl = 0; - const DWORD timeout = 0; - const BOOL gotPacket = GetQueuedCompletionStatus(m_hIOCP, &dwBytesTransferred, &ulKey, &ovl, timeout); - bytesTransferred = size_t(dwBytesTransferred); - key = uintptr_t(ulKey); - if(gotPacket) - return INFO::OK; - - if(GetLastError() == WAIT_TIMEOUT) - return ERR::AGAIN; // NOWARN (nothing pending) - else if(GetLastError() == ERROR_OPERATION_ABORTED) - continue; // watch was canceled - ignore - else - return LibError_from_GLE(); // actual error - } + CloseHandle(hIOCP); } -private: - HANDLE m_hIOCP; -}; - - -//----------------------------------------------------------------------------- -// DirWatchManager - -class DirWatchManager -{ -public: LibError Add(const OsPath& path, PDirWatch& dirWatch) { debug_assert(path.IsDirectory()); // check if this is a subdirectory of a tree that's already being // watched (this is much faster than issuing a new watch; it also // prevents accidentally watching the same directory twice). for(IntrusiveLink* link = m_sentinel.Next(); link != &m_sentinel; link = link->Next()) { DirWatch* const existingDirWatch = (DirWatch*)(uintptr_t(link) - offsetof(DirWatch, link)); if(path_is_subpath(OsString(path).c_str(), OsString(existingDirWatch->request->Path()).c_str())) { dirWatch.reset(new DirWatch(&m_sentinel, existingDirWatch->request)); return INFO::OK; } } PDirWatchRequest request(new DirWatchRequest(path)); - m_completionPort.Attach(request->GetDirHandle(), (uintptr_t)request.get()); + AttachToCompletionPort(request->GetDirHandle(), hIOCP, (uintptr_t)request.get()); RETURN_ERR(request->Issue()); dirWatch.reset(new DirWatch(&m_sentinel, request)); return INFO::OK; } LibError Poll(DirWatchNotifications& notifications) { - size_t bytesTransferred; uintptr_t key; OVERLAPPED* ovl; - RETURN_ERR(m_completionPort.Poll(bytesTransferred, key, ovl)); + DWORD bytesTransferred; ULONG_PTR key; OVERLAPPED* ovl; + for(;;) // skip notifications of canceled watches + { + const LibError ret = PollCompletionPort(hIOCP, 0, bytesTransferred, key, ovl); + if(ret == INFO::OK) + break; + if(GetLastError() == ERROR_OPERATION_ABORTED) + continue; // watch was canceled - ignore + return ret; + } + DirWatchRequest* request = (DirWatchRequest*)key; request->RetrieveNotifications(notifications); RETURN_ERR(request->Issue()); // re-issue return INFO::OK; } private: IntrusiveLink m_sentinel; - CompletionPort m_completionPort; + HANDLE hIOCP; }; static DirWatchManager* s_dirWatchManager; //----------------------------------------------------------------------------- LibError dir_watch_Add(const OsPath& path, PDirWatch& dirWatch) { WinScopedLock lock(WDIR_WATCH_CS); return s_dirWatchManager->Add(path, dirWatch); } LibError dir_watch_Poll(DirWatchNotifications& notifications) { WinScopedLock lock(WDIR_WATCH_CS); return s_dirWatchManager->Poll(notifications); } //----------------------------------------------------------------------------- static LibError wdir_watch_Init() { s_dirWatchManager = new DirWatchManager; return INFO::OK; } static LibError wdir_watch_Shutdown() { SAFE_DELETE(s_dirWatchManager); return INFO::OK; } Index: ps/trunk/source/ps/ConfigDB.cpp =================================================================== --- ps/trunk/source/ps/ConfigDB.cpp (revision 9349) +++ ps/trunk/source/ps/ConfigDB.cpp (revision 9350) @@ -1,445 +1,447 @@ /* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" +#include + #include "Pyrogenesis.h" #include "Parser.h" #include "ConfigDB.h" #include "CLogger.h" #include "Filesystem.h" #include "scripting/ScriptingHost.h" +#include "lib/allocators/shared_ptr.h" #include "scriptinterface/ScriptInterface.h" -#include - typedef std::map TConfigMap; TConfigMap CConfigDB::m_Map[CFG_LAST]; VfsPath CConfigDB::m_ConfigFile[CFG_LAST]; #define GET_NS_PRIVATE(cx, obj) (EConfigNamespace)((intptr_t)JS_GetPrivate(cx, obj) >> 1) namespace ConfigNamespace_JS { JSBool GetProperty(JSContext* cx, JSObject* obj, jsid id, jsval* vp) { EConfigNamespace cfgNs = GET_NS_PRIVATE(cx, obj); if (cfgNs < 0 || cfgNs >= CFG_LAST) return JS_FALSE; jsval idval; if (!JS_IdToValue(cx, id, &idval)) return JS_FALSE; std::string propName; if (!ScriptInterface::FromJSVal(cx, idval, propName)) return JS_FALSE; CConfigValue *val = g_ConfigDB.GetValue(cfgNs, propName); if (val) { JSString *js_str = JS_NewStringCopyN(cx, val->m_String.c_str(), val->m_String.size()); *vp = STRING_TO_JSVAL(js_str); } return JS_TRUE; } JSBool SetProperty(JSContext* cx, JSObject* obj, jsid id, jsval* vp) { EConfigNamespace cfgNs = GET_NS_PRIVATE(cx, obj); if (cfgNs < 0 || cfgNs >= CFG_LAST) return JS_FALSE; jsval idval; if (!JS_IdToValue(cx, id, &idval)) return JS_FALSE; std::string propName; if (!ScriptInterface::FromJSVal(cx, idval, propName)) return JS_FALSE; CConfigValue *val = g_ConfigDB.CreateValue(cfgNs, propName); if (!ScriptInterface::FromJSVal(cx, *vp, val->m_String)) return JS_FALSE; return JS_TRUE; } JSClass Class = { "ConfigNamespace", JSCLASS_HAS_PRIVATE, JS_PropertyStub, JS_PropertyStub, GetProperty, SetProperty, JS_EnumerateStub, JS_ResolveStub, JS_ConvertStub, JS_FinalizeStub }; JSBool Construct(JSContext* cx, uintN argc, jsval* vp) { UNUSED2(argc); JSObject *newObj = JS_NewObject(cx, &Class, NULL, NULL); JS_SET_RVAL(cx, vp, OBJECT_TO_JSVAL(newObj)); return JS_TRUE; } void SetNamespace(JSContext *cx, JSObject *obj, EConfigNamespace cfgNs) { JS_SetPrivate(cx, obj, (void *)((uintptr_t)cfgNs << 1)); // JS requires bottom bit = 0 } JSBool WriteFile(JSContext* cx, uintN argc, jsval* vp) { EConfigNamespace cfgNs = GET_NS_PRIVATE(cx, JS_THIS_OBJECT(cx, vp)); if (cfgNs < 0 || cfgNs >= CFG_LAST) return JS_FALSE; if (argc != 1) return JS_FALSE; VfsPath path; if (!ScriptInterface::FromJSVal(cx, JS_ARGV(cx, vp)[0], path)) return JS_FALSE; bool res = g_ConfigDB.WriteFile(cfgNs, path); JS_SET_RVAL(cx, vp, BOOLEAN_TO_JSVAL(res)); return JS_TRUE; } JSBool Reload(JSContext* cx, uintN argc, jsval* vp) { if (argc != 0) return JS_FALSE; EConfigNamespace cfgNs = GET_NS_PRIVATE(cx, JS_THIS_OBJECT(cx, vp)); if (cfgNs < 0 || cfgNs >= CFG_LAST) return JS_FALSE; JSBool ret = g_ConfigDB.Reload(cfgNs); JS_SET_RVAL(cx, vp, BOOLEAN_TO_JSVAL(ret)); return JS_TRUE; } JSBool SetFile(JSContext* cx, uintN argc, jsval* vp) { EConfigNamespace cfgNs = GET_NS_PRIVATE(cx, JS_THIS_OBJECT(cx, vp)); if (cfgNs < 0 || cfgNs >= CFG_LAST) return JS_FALSE; if (argc != 1) return JS_FALSE; VfsPath path; if (!ScriptInterface::FromJSVal(cx, JS_ARGV(cx, vp)[0], path)) return JS_FALSE; g_ConfigDB.SetConfigFile(cfgNs, path); JS_SET_RVAL(cx, vp, JSVAL_VOID); return JS_TRUE; } JSFunctionSpec Funcs[] = { { "writeFile", WriteFile, 2, 0 }, { "reload", Reload, 0, 0 }, { "setFile", SetFile, 2, 0 }, {0} }; }; namespace ConfigDB_JS { JSClass Class = { "ConfigDB", 0, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_PropertyStub, JS_EnumerateStub, JS_ResolveStub, JS_ConvertStub, JS_FinalizeStub }; JSPropertySpec Props[] = { {0} }; JSFunctionSpec Funcs[] = { {0} }; JSBool Construct(JSContext* cx, uintN argc, jsval* vp) { UNUSED2(argc); JSObject *newObj = JS_NewObject(cx, &Class, NULL, NULL); JS_SET_RVAL(cx, vp, OBJECT_TO_JSVAL(newObj)); int flags=JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_PERMANENT; #define cfg_ns(_propname, _enum) STMT (\ JSObject *nsobj=g_ScriptingHost.CreateCustomObject("ConfigNamespace"); \ debug_assert(nsobj); \ ConfigNamespace_JS::SetNamespace(cx, nsobj, _enum); \ debug_assert(JS_DefineProperty(cx, newObj, _propname, OBJECT_TO_JSVAL(nsobj), NULL, NULL, flags)); ) cfg_ns("default", CFG_DEFAULT); cfg_ns("system", CFG_SYSTEM); cfg_ns("user", CFG_USER); cfg_ns("mod", CFG_MOD); #undef cfg_ns return JS_TRUE; } }; CConfigDB::CConfigDB() { g_ScriptingHost.DefineCustomObjectType(&ConfigDB_JS::Class, ConfigDB_JS::Construct, 0, ConfigDB_JS::Props, ConfigDB_JS::Funcs, NULL, NULL); g_ScriptingHost.DefineCustomObjectType(&ConfigNamespace_JS::Class, ConfigNamespace_JS::Construct, 0, NULL, ConfigNamespace_JS::Funcs, NULL, NULL); JSObject *js_ConfigDB = g_ScriptingHost.CreateCustomObject("ConfigDB"); g_ScriptingHost.SetGlobal("g_ConfigDB", OBJECT_TO_JSVAL(js_ConfigDB)); } CConfigValue *CConfigDB::GetValue(EConfigNamespace ns, const CStr& name) { CConfigValueSet* values = GetValues(ns, name); if (!values) return (NULL); return &((*values)[0]); } CConfigValueSet *CConfigDB::GetValues(EConfigNamespace ns, const CStr& name) { if (ns < 0 || ns >= CFG_LAST) { debug_warn(L"CConfigDB: Invalid ns value"); return NULL; } TConfigMap::iterator it = m_Map[CFG_COMMAND].find(name); if (it != m_Map[CFG_COMMAND].end()) return &(it->second); for (int search_ns = ns; search_ns >= 0; search_ns--) { TConfigMap::iterator it = m_Map[search_ns].find(name); if (it != m_Map[search_ns].end()) return &(it->second); } return NULL; } EConfigNamespace CConfigDB::GetValueNamespace(EConfigNamespace ns, const CStr& name) { if (ns < 0 || ns >= CFG_LAST) { debug_warn(L"CConfigDB: Invalid ns value"); return CFG_LAST; } TConfigMap::iterator it = m_Map[CFG_COMMAND].find(name); if (it != m_Map[CFG_COMMAND].end()) return CFG_COMMAND; for (int search_ns = ns; search_ns >= 0; search_ns--) { TConfigMap::iterator it = m_Map[search_ns].find(name); if (it != m_Map[search_ns].end()) return (EConfigNamespace)search_ns; } return CFG_LAST; } std::vector > CConfigDB::GetValuesWithPrefix(EConfigNamespace ns, const CStr& prefix) { std::vector > ret; if (ns < 0 || ns >= CFG_LAST) { debug_warn(L"CConfigDB: Invalid ns value"); return ret; } for (TConfigMap::iterator it = m_Map[CFG_COMMAND].begin(); it != m_Map[CFG_COMMAND].end(); ++it) { if (boost::algorithm::starts_with(it->first, prefix)) ret.push_back(std::make_pair(it->first, it->second)); } for (int search_ns = ns; search_ns >= 0; search_ns--) { for (TConfigMap::iterator it = m_Map[search_ns].begin(); it != m_Map[search_ns].end(); ++it) { if (boost::algorithm::starts_with(it->first, prefix)) ret.push_back(std::make_pair(it->first, it->second)); } } return ret; } CConfigValue *CConfigDB::CreateValue(EConfigNamespace ns, const CStr& name) { if (ns < 0 || ns >= CFG_LAST) { debug_warn(L"CConfigDB: Invalid ns value"); return NULL; } CConfigValue *ret=GetValue(ns, name); if (ret) return ret; TConfigMap::iterator it=m_Map[ns].insert(m_Map[ns].begin(), make_pair(name, CConfigValueSet( 1 ))); return &(it->second[0]); } void CConfigDB::SetConfigFile(EConfigNamespace ns, const VfsPath& path) { if (ns < 0 || ns >= CFG_LAST) { debug_warn(L"CConfigDB: Invalid ns value"); return; } m_ConfigFile[ns]=path; } bool CConfigDB::Reload(EConfigNamespace ns) { if (ns < 0 || ns >= CFG_LAST) { debug_warn(L"CConfigDB: Invalid ns value"); return false; } // Set up CParser CParser parser; CParserLine parserLine; parser.InputTaskType("Assignment", "_$ident_=<_[-$arg(_minus)]_$value_,>_[-$arg(_minus)]_$value[[;]$rest]"); parser.InputTaskType("CommentOrBlank", "_[;[$rest]]"); // Open file with VFS shared_ptr buffer; size_t buflen; { // Handle missing files quietly if (g_VFS->GetFileInfo(m_ConfigFile[ns], NULL) < 0) { LOGMESSAGE(L"Cannot find config file \"%ls\" - ignoring", m_ConfigFile[ns].string().c_str()); return false; } else { LOGMESSAGE(L"Loading config file \"%ls\"", m_ConfigFile[ns].string().c_str()); LibError ret = g_VFS->LoadFile(m_ConfigFile[ns], buffer, buflen); if (ret != INFO::OK) { LOGERROR(L"CConfigDB::Reload(): vfs_load for \"%ls\" failed: return was %ld", m_ConfigFile[ns].string().c_str(), ret); return false; } } } TConfigMap newMap; char *filebuf=(char *)buffer.get(); char *filebufend=filebuf+buflen; // Read file line by line char *next=filebuf-1; do { char *pos=next+1; next=(char *)memchr(pos, '\n', filebufend-pos); if (!next) next=filebufend; char *lend=next; if (lend > filebuf && *(lend-1) == '\r') lend--; // Send line to parser bool parseOk=parserLine.ParseString(parser, std::string(pos, lend)); // Get name and value from parser std::string name; std::string value; if (parseOk && parserLine.GetArgCount()>=2 && parserLine.GetArgString(0, name) && parserLine.GetArgString(1, value)) { // Add name and value to the map size_t argCount = parserLine.GetArgCount(); newMap[name].clear(); for( size_t t = 0; t < argCount; t++ ) { if( !parserLine.GetArgString( (int)t + 1, value ) ) continue; CConfigValue argument; argument.m_String = value; newMap[name].push_back( argument ); LOGMESSAGE(L"Loaded config string \"%hs\" = \"%hs\"", name.c_str(), value.c_str()); } } } while (next < filebufend); m_Map[ns].swap(newMap); return true; } bool CConfigDB::WriteFile(EConfigNamespace ns) { if (ns < 0 || ns >= CFG_LAST) { debug_warn(L"CConfigDB: Invalid ns value"); return false; } return WriteFile(ns, m_ConfigFile[ns]); } bool CConfigDB::WriteFile(EConfigNamespace ns, const VfsPath& path) { if (ns < 0 || ns >= CFG_LAST) { debug_warn(L"CConfigDB: Invalid ns value"); return false; } - shared_ptr buf = io_Allocate(1*MiB); + shared_ptr buf; + AllocateAligned(buf, 1*MiB, maxSectorSize); char* pos = (char*)buf.get(); TConfigMap &map=m_Map[ns]; for(TConfigMap::const_iterator it = map.begin(); it != map.end(); ++it) { pos += sprintf(pos, "%s = \"%s\"\n", it->first.c_str(), it->second[0].m_String.c_str()); } const size_t len = pos - (char*)buf.get(); LibError ret = g_VFS->CreateFile(path, buf, len); if(ret < 0) { LOGERROR(L"CConfigDB::WriteFile(): CreateFile \"%ls\" failed (error: %d)", path.string().c_str(), (int)ret); return false; } return true; } Index: ps/trunk/source/lib/sysdep/os/win/wutil.h =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wutil.h (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wutil.h (revision 9350) @@ -1,212 +1,213 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * various Windows-specific utilities */ #ifndef INCLUDED_WUTIL #define INCLUDED_WUTIL #include "lib/os_path.h" #include "lib/sysdep/os/win/win.h" template bool wutil_IsValidHandle(H h) { return h != 0 && h != INVALID_HANDLE_VALUE; } //----------------------------------------------------------------------------- // dynamic linking // define a function pointer (optionally prepend 'static') #define WUTIL_FUNC(varName, ret, params)\ ret (WINAPI* varName) params // rationale: // - splitting up WUTIL_FUNC and WUTIL_IMPORT is a bit verbose in // the common case of a local function pointer definition, // but allows one-time initialization of static variables. // - differentiating between procName and varName allows searching // for the actual definition of the function pointer in the code. // - a cast would require passing in ret/params. // - writing a type-punned pointer breaks strict-aliasing rules. #define WUTIL_IMPORT(hModule, procName, varName)\ STMT(\ const FARPROC f = GetProcAddress(hModule, #procName);\ memcpy(&varName, &f, sizeof(FARPROC));\ ) // note: Kernel32 is guaranteed to be loaded, so we don't // need to LoadLibrary and FreeLibrary. #define WUTIL_IMPORT_KERNEL32(procName, varName)\ WUTIL_IMPORT(GetModuleHandleW(L"kernel32.dll"), procName, varName) //----------------------------------------------------------------------------- // safe allocator extern void* wutil_Allocate(size_t size); extern void wutil_Free(void* p); //----------------------------------------------------------------------------- // locks // critical sections used by win-specific code enum WinLockId { - WAIO_CS, WDBG_SYM_CS, // protects (non-reentrant) dbghelp.dll WDIR_WATCH_CS, NUM_CS }; extern void wutil_Lock(WinLockId id); extern void wutil_Unlock(WinLockId id); // used in a desperate attempt to avoid deadlock in wseh. extern bool wutil_IsLocked(WinLockId id); class WinScopedLock { public: WinScopedLock(WinLockId id) : m_id(id) { wutil_Lock(m_id); } ~WinScopedLock() { wutil_Unlock(m_id); } private: WinLockId m_id; }; //----------------------------------------------------------------------------- // errors /** * some WinAPI functions SetLastError(0) on success, which is bad because * it can hide previous errors. this class takes care of restoring the * previous value. **/ class WinScopedPreserveLastError { public: WinScopedPreserveLastError() : m_lastError(GetLastError()) { SetLastError(0); } ~WinScopedPreserveLastError() { if(m_lastError != 0 && GetLastError() == 0) SetLastError(m_lastError); } private: DWORD m_lastError; }; /** * @return the LibError equivalent of GetLastError(), or ERR::FAIL if * there's no equivalent. * you should SetLastError(0) before calling whatever will set ret * to make sure we do not report any stale errors. * * @param warn_if_failed if set, raises an error dialog that reports * the LibError. **/ LibError LibError_from_GLE(bool warn_if_failed = true); #define WARN_WIN32_ERR (void)LibError_from_GLE(true) /** * @return INFO::OK if ret != FALSE, else LibError_from_GLE(). **/ extern LibError LibError_from_win32(DWORD ret, bool warn_if_failed = true); //----------------------------------------------------------------------------- // command line extern int wutil_argc(); extern wchar_t** wutil_argv(); extern bool wutil_HasCommandLineArgument(const wchar_t* arg); //----------------------------------------------------------------------------- // directories extern const OsPath& wutil_SystemPath(); extern const OsPath& wutil_ExecutablePath(); extern const OsPath& wutil_AppdataPath(); //----------------------------------------------------------------------------- // Wow64 extern bool wutil_IsWow64(); class WinScopedDisableWow64Redirection { public: WinScopedDisableWow64Redirection(); ~WinScopedDisableWow64Redirection(); private: void* m_wasRedirectionEnabled; }; //----------------------------------------------------------------------------- +LIB_API LibError wutil_SetPrivilege(const wchar_t* privilege, bool enable); + /** * @return module handle of lib code (that of the main EXE if * linked statically, otherwise the DLL). * this is necessary for the error dialog. **/ extern HMODULE wutil_LibModuleHandle(); /** * @return handle to the first window owned by the current process, or * 0 if none exist (e.g. it hasn't yet created one). * * enumerates all top-level windows and stops if PID matches. * once this function returns a non-NULL handle, it will always * return that cached value. **/ extern HWND wutil_AppWindow(); #endif // #ifndef INCLUDED_WUTIL Index: ps/trunk/source/lib/sysdep/os/win/wsysdep.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wsysdep.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wsysdep.cpp (revision 9350) @@ -1,606 +1,606 @@ /* Copyright (c) 2011 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * Windows backend of the sysdep interface */ #include "precompiled.h" #include "lib/sysdep/sysdep.h" #include "lib/sysdep/os/win/win.h" // includes windows.h; must come before shlobj #include // pick_dir #include // open_url #include #include // message crackers #include #include "lib/sysdep/clipboard.h" #include "lib/sysdep/os/win/error_dialog.h" #include "lib/sysdep/os/win/wutil.h" #if CONFIG_ENABLE_BOOST # include #endif #if MSC_VERSION #pragma comment(lib, "shell32.lib") // for sys_pick_directory SH* calls #pragma comment(lib, "winhttp.lib") #endif bool sys_IsDebuggerPresent() { return (IsDebuggerPresent() != 0); } std::wstring sys_WideFromArgv(const char* argv_i) { // NB: despite http://cbloomrants.blogspot.com/2008/06/06-14-08-1.html, // WinXP x64 EN cmd.exe (chcp reports 437) encodes argv u-umlaut // (entered manually or via auto-complete) via cp1252. the same applies // to WinXP SP2 DE (where chcp reports 850). const UINT cp = CP_ACP; const DWORD flags = MB_PRECOMPOSED|MB_ERR_INVALID_CHARS; const int inputSize = -1; // null-terminated std::vector buf(strlen(argv_i)+1); // (upper bound on number of characters) // NB: avoid mbstowcs because it may specify another locale - const int ret = MultiByteToWideChar(cp, flags, argv_i, inputSize, &buf[0], buf.size()); + const int ret = MultiByteToWideChar(cp, flags, argv_i, (int)inputSize, &buf[0], (int)buf.size()); debug_assert(ret != 0); return std::wstring(&buf[0]); } void sys_display_msg(const wchar_t* caption, const wchar_t* msg) { MessageBoxW(0, msg, caption, MB_ICONEXCLAMATION|MB_TASKMODAL|MB_SETFOREGROUND); } //----------------------------------------------------------------------------- // "program error" dialog (triggered by debug_assert and exception) //----------------------------------------------------------------------------- // support for resizing the dialog / its controls (must be done manually) static POINTS dlg_clientOrigin; static POINTS dlg_prevClientSize; static void dlg_OnMove(HWND UNUSED(hDlg), int x, int y) { dlg_clientOrigin.x = (short)x; dlg_clientOrigin.y = (short)y; } static const size_t ANCHOR_LEFT = 0x01; static const size_t ANCHOR_RIGHT = 0x02; static const size_t ANCHOR_TOP = 0x04; static const size_t ANCHOR_BOTTOM = 0x08; static const size_t ANCHOR_ALL = 0x0F; static void dlg_ResizeControl(HWND hDlg, int dlgItem, int dx, int dy, size_t anchors) { HWND hControl = GetDlgItem(hDlg, dlgItem); RECT r; GetWindowRect(hControl, &r); int w = r.right - r.left, h = r.bottom - r.top; int x = r.left - dlg_clientOrigin.x, y = r.top - dlg_clientOrigin.y; if(anchors & ANCHOR_RIGHT) { // right only if(!(anchors & ANCHOR_LEFT)) x += dx; // horizontal (stretch width) else w += dx; } if(anchors & ANCHOR_BOTTOM) { // bottom only if(!(anchors & ANCHOR_TOP)) y += dy; // vertical (stretch height) else h += dy; } SetWindowPos(hControl, 0, x,y, w,h, SWP_NOZORDER); } static void dlg_OnSize(HWND hDlg, UINT state, int clientSizeX, int clientSizeY) { // 'minimize' was clicked. we need to ignore this, otherwise // dx/dy would reduce some control positions to less than 0. // since Windows clips them, we wouldn't later be able to // reconstruct the previous values when 'restoring'. if(state == SIZE_MINIMIZED) return; // NB: origin might legitimately be 0, but we know it is invalid // on the first call to this function, where dlg_prevClientSize is 0. const bool isOriginValid = (dlg_prevClientSize.y != 0); const int dx = clientSizeX - dlg_prevClientSize.x; const int dy = clientSizeY - dlg_prevClientSize.y; dlg_prevClientSize.x = (short)clientSizeX; dlg_prevClientSize.y = (short)clientSizeY; if(!isOriginValid) // must not call dlg_ResizeControl return; dlg_ResizeControl(hDlg, IDC_CONTINUE, dx,dy, ANCHOR_LEFT|ANCHOR_BOTTOM); dlg_ResizeControl(hDlg, IDC_SUPPRESS, dx,dy, ANCHOR_LEFT|ANCHOR_BOTTOM); dlg_ResizeControl(hDlg, IDC_BREAK , dx,dy, ANCHOR_LEFT|ANCHOR_BOTTOM); dlg_ResizeControl(hDlg, IDC_EXIT , dx,dy, ANCHOR_LEFT|ANCHOR_BOTTOM); dlg_ResizeControl(hDlg, IDC_COPY , dx,dy, ANCHOR_RIGHT|ANCHOR_BOTTOM); dlg_ResizeControl(hDlg, IDC_EDIT1 , dx,dy, ANCHOR_ALL); } static void dlg_OnGetMinMaxInfo(HWND UNUSED(hDlg), LPMINMAXINFO mmi) { // we must make sure resize_control will never set negative coords - // Windows would clip them, and its real position would be lost. // restrict to a reasonable and good looking minimum size [pixels]. mmi->ptMinTrackSize.x = 407; mmi->ptMinTrackSize.y = 159; // determined experimentally } struct DialogParams { const wchar_t* text; size_t flags; }; static BOOL dlg_OnInitDialog(HWND hDlg, HWND UNUSED(hWndFocus), LPARAM lParam) { const DialogParams* params = (const DialogParams*)lParam; HWND hWnd; // need to reset for new instance of dialog dlg_clientOrigin.x = dlg_clientOrigin.y = 0; dlg_prevClientSize.x = dlg_prevClientSize.y = 0; if(!(params->flags & DE_ALLOW_SUPPRESS)) { hWnd = GetDlgItem(hDlg, IDC_SUPPRESS); EnableWindow(hWnd, FALSE); } // set fixed font for readability hWnd = GetDlgItem(hDlg, IDC_EDIT1); HGDIOBJ hObj = (HGDIOBJ)GetStockObject(SYSTEM_FIXED_FONT); LPARAM redraw = FALSE; SendMessage(hWnd, WM_SETFONT, (WPARAM)hObj, redraw); SetDlgItemTextW(hDlg, IDC_EDIT1, params->text); return TRUE; // set default keyboard focus } static void dlg_OnCommand(HWND hDlg, int id, HWND UNUSED(hWndCtl), UINT UNUSED(codeNotify)) { switch(id) { case IDC_COPY: { std::vector buf(128*KiB); // (too big for stack) GetDlgItemTextW(hDlg, IDC_EDIT1, &buf[0], (int)buf.size()); sys_clipboard_set(&buf[0]); break; } case IDC_CONTINUE: EndDialog(hDlg, ERI_CONTINUE); break; case IDC_SUPPRESS: EndDialog(hDlg, ERI_SUPPRESS); break; case IDC_BREAK: EndDialog(hDlg, ERI_BREAK); break; case IDC_EXIT: EndDialog(hDlg, ERI_EXIT); break; default: break; } } static void dlg_OnSysCommand(HWND hDlg, UINT cmd, int UNUSED(x), int UNUSED(y)) { switch(cmd & 0xFFF0) // NB: lower 4 bits are reserved { // [X] clicked -> close dialog (doesn't happen automatically) case SC_CLOSE: EndDialog(hDlg, 0); break; default: break; } } static INT_PTR CALLBACK dlg_OnMessage(HWND hDlg, unsigned int msg, WPARAM wParam, LPARAM lParam) { switch(msg) { case WM_INITDIALOG: return HANDLE_WM_INITDIALOG(hDlg, wParam, lParam, dlg_OnInitDialog); case WM_SYSCOMMAND: return HANDLE_WM_SYSCOMMAND(hDlg, wParam, lParam, dlg_OnSysCommand); case WM_COMMAND: return HANDLE_WM_COMMAND(hDlg, wParam, lParam, dlg_OnCommand); case WM_MOVE: return HANDLE_WM_MOVE(hDlg, wParam, lParam, dlg_OnMove); case WM_GETMINMAXINFO: return HANDLE_WM_GETMINMAXINFO(hDlg, wParam, lParam, dlg_OnGetMinMaxInfo); case WM_SIZE: return HANDLE_WM_SIZE(hDlg, wParam, lParam, dlg_OnSize); default: // we didn't process the message; caller will perform default action. return FALSE; } } ErrorReactionInternal sys_display_error(const wchar_t* text, size_t flags) { // note: other threads might still be running, crash and take down the // process before we have a chance to display this error message. // ideally we would suspend them all and resume when finished; however, // they may be holding system-wide locks (e.g. heap or loader) that // are potentially needed by DialogBoxParam. in that case, deadlock // would result; this is much worse than a crash because no error // at all is displayed to the end-user. therefore, do nothing here. // temporarily remove any pending quit message from the queue because // it would prevent the dialog from being displayed (DialogBoxParam // returns IDOK without doing anything). will be restored below. // notes: // - this isn't only relevant at exit - Windows also posts one if // window init fails. therefore, it is important that errors can be // displayed regardless. // - by passing hWnd=0, we check all windows belonging to the current // thread. there is no reason to use hWndParent below. MSG msg; const BOOL isQuitPending = PeekMessage(&msg, 0, WM_QUIT, WM_QUIT, PM_REMOVE); const HINSTANCE hInstance = wutil_LibModuleHandle(); LPCWSTR lpTemplateName = MAKEINTRESOURCEW(IDD_DIALOG1); const DialogParams params = { text, flags }; // get the enclosing app's window handle. we can't just pass 0 or // the desktop window because the dialog must be modal (if the app // continues running, it may crash and take down the process before // we've managed to show the dialog). const HWND hWndParent = wutil_AppWindow(); INT_PTR ret = DialogBoxParamW(hInstance, lpTemplateName, hWndParent, dlg_OnMessage, (LPARAM)¶ms); if(isQuitPending) PostQuitMessage((int)msg.wParam); // failed; warn user and make sure we return an ErrorReactionInternal. if(ret == 0 || ret == -1) { debug_DisplayMessage(L"Error", L"Unable to display detailed error dialog."); return ERI_CONTINUE; } return (ErrorReactionInternal)ret; } //----------------------------------------------------------------------------- // misc //----------------------------------------------------------------------------- LibError sys_error_description_r(int user_err, wchar_t* buf, size_t max_chars) { // validate user_err - Win32 doesn't have negative error numbers if(user_err < 0) return ERR::FAIL; // NOWARN const DWORD err = user_err? (DWORD)user_err : GetLastError(); // no one likes to see "The operation completed successfully" in // error messages, so return more descriptive text instead. if(err == 0) { wcscpy_s(buf, max_chars, L"0 (no error code was set)"); return INFO::OK; } wchar_t message[200]; { const LPCVOID source = 0; // ignored (we're not using FROM_HMODULE etc.) const DWORD lang_id = 0; // look for neutral, then current locale va_list* args = 0; // we don't care about "inserts" const DWORD charsWritten = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM, source, err, lang_id, message, (DWORD)ARRAY_SIZE(message), args); if(!charsWritten) WARN_RETURN(ERR::FAIL); debug_assert(charsWritten < max_chars); } const int charsWritten = swprintf_s(buf, max_chars, L"%d (%ls)", err, message); debug_assert(charsWritten != -1); return INFO::OK; } static LibError GetModulePathname(HMODULE hModule, OsPath& pathname) { wchar_t pathnameBuf[32768]; // NTFS limit const DWORD length = (DWORD)ARRAY_SIZE(pathnameBuf); const DWORD charsWritten = GetModuleFileNameW(hModule, pathnameBuf, length); if(charsWritten == 0) // failed return LibError_from_GLE(); debug_assert(charsWritten < length); // why would the above buffer ever be exceeded? pathname = pathnameBuf; return INFO::OK; } LibError sys_get_module_filename(void* addr, OsPath& pathname) { MEMORY_BASIC_INFORMATION mbi; const SIZE_T bytesWritten = VirtualQuery(addr, &mbi, sizeof(mbi)); if(!bytesWritten) return LibError_from_GLE(); debug_assert(bytesWritten >= sizeof(mbi)); return GetModulePathname((HMODULE)mbi.AllocationBase, pathname); } OsPath sys_ExecutablePathname() { OsPath pathname; debug_assert(GetModulePathname(0, pathname) == INFO::OK); return pathname; } std::wstring sys_get_user_name() { wchar_t usernameBuf[256]; DWORD size = ARRAY_SIZE(usernameBuf); if(!GetUserNameW(usernameBuf, &size)) return L""; return usernameBuf; } // callback for shell directory picker: used to set starting directory // (for user convenience). static int CALLBACK BrowseCallback(HWND hWnd, unsigned int msg, LPARAM UNUSED(lParam), LPARAM lpData) { if(msg == BFFM_INITIALIZED) { const WPARAM wParam = TRUE; // lpData is a Unicode string, not PIDL. // (MSDN: the return values for both of these BFFM_ notifications are ignored) (void)SendMessage(hWnd, BFFM_SETSELECTIONW, wParam, lpData); } return 0; } LibError sys_pick_directory(OsPath& path) { // (must not use multi-threaded apartment due to BIF_NEWDIALOGSTYLE) const HRESULT hr = CoInitialize(0); debug_assert(hr == S_OK || hr == S_FALSE); // S_FALSE == already initialized // note: bi.pszDisplayName isn't the full path, so it isn't of any use. BROWSEINFOW bi; memset(&bi, 0, sizeof(bi)); bi.ulFlags = BIF_RETURNONLYFSDIRS|BIF_NEWDIALOGSTYLE|BIF_NONEWFOLDERBUTTON; // for setting starting directory: bi.lpfn = (BFFCALLBACK)BrowseCallback; const Path::String initialPath = OsString(path); // NB: BFFM_SETSELECTIONW can't deal with '/' separators bi.lParam = (LPARAM)initialPath.c_str(); const LPITEMIDLIST pidl = SHBrowseForFolderW(&bi); if(!pidl) // user canceled return INFO::SKIPPED; // translate ITEMIDLIST to string wchar_t pathBuf[MAX_PATH]; // mandated by SHGetPathFromIDListW const BOOL ok = SHGetPathFromIDListW(pidl, pathBuf); // free the ITEMIDLIST IMalloc* p_malloc; SHGetMalloc(&p_malloc); p_malloc->Free(pidl); p_malloc->Release(); if(ok == TRUE) { path = pathBuf; return INFO::OK; } return LibError_from_GLE(); } LibError sys_open_url(const std::string& url) { HINSTANCE r = ShellExecuteA(NULL, "open", url.c_str(), NULL, NULL, SW_SHOWNORMAL); if ((int)(intptr_t)r > 32) return INFO::OK; WARN_RETURN(ERR::FAIL); } LibError sys_generate_random_bytes(u8* buffer, size_t size) { HCRYPTPROV hCryptProv = 0; if(!CryptAcquireContext(&hCryptProv, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) return LibError_from_GLE(); memset(buffer, 0, size); if(!CryptGenRandom(hCryptProv, (DWORD)size, (BYTE*)buffer)) return LibError_from_GLE(); if(!CryptReleaseContext(hCryptProv, 0)) return LibError_from_GLE(); return INFO::OK; } #if CONFIG_ENABLE_BOOST /* * Given a string of the form * "example.com:80" * or * "ftp=ftp.example.com:80;http=example.com:80;https=example.com:80" * separated by semicolons or whitespace, * return the string "example.com:80". */ static std::wstring parse_proxy(const std::wstring& input) { if(input.find('=') == input.npos) return input; std::vector parts; split(parts, input, boost::algorithm::is_any_of("; \t\r\n"), boost::algorithm::token_compress_on); for(size_t i = 0; i < parts.size(); ++i) if(boost::algorithm::starts_with(parts[i], "http=")) return parts[i].substr(5); // If we got this far, proxies were only set for non-HTTP protocols return L""; } LibError sys_get_proxy_config(const std::wstring& url, std::wstring& proxy) { WINHTTP_AUTOPROXY_OPTIONS autoProxyOptions; memset(&autoProxyOptions, 0, sizeof(autoProxyOptions)); autoProxyOptions.dwFlags = WINHTTP_AUTOPROXY_AUTO_DETECT; autoProxyOptions.dwAutoDetectFlags = WINHTTP_AUTO_DETECT_TYPE_DHCP | WINHTTP_AUTO_DETECT_TYPE_DNS_A; autoProxyOptions.fAutoLogonIfChallenged = TRUE; WINHTTP_PROXY_INFO proxyInfo; memset(&proxyInfo, 0, sizeof(proxyInfo)); WINHTTP_CURRENT_USER_IE_PROXY_CONFIG ieConfig; memset(&ieConfig, 0, sizeof(ieConfig)); HINTERNET hSession = NULL; LibError err = INFO::SKIPPED; bool useAutoDetect; if(WinHttpGetIEProxyConfigForCurrentUser(&ieConfig)) { if(ieConfig.lpszAutoConfigUrl) { // Use explicit auto-config script if specified useAutoDetect = true; autoProxyOptions.dwFlags |= WINHTTP_AUTOPROXY_CONFIG_URL; autoProxyOptions.lpszAutoConfigUrl = ieConfig.lpszAutoConfigUrl; } else { // Use auto-discovery if enabled useAutoDetect = (ieConfig.fAutoDetect == TRUE); } } else { // Can't find IE config settings - fall back to auto-discovery useAutoDetect = true; } if(useAutoDetect) { hSession = WinHttpOpen(NULL, WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0); if(hSession && WinHttpGetProxyForUrl(hSession, url.c_str(), &autoProxyOptions, &proxyInfo) && proxyInfo.lpszProxy) { proxy = parse_proxy(proxyInfo.lpszProxy); if(!proxy.empty()) { err = INFO::OK; goto done; } } } // No valid auto-config; try explicit proxy instead if(ieConfig.lpszProxy) { proxy = parse_proxy(ieConfig.lpszProxy); if(!proxy.empty()) { err = INFO::OK; goto done; } } done: if(ieConfig.lpszProxy) GlobalFree(ieConfig.lpszProxy); if(ieConfig.lpszProxyBypass) GlobalFree(ieConfig.lpszProxyBypass); if(ieConfig.lpszAutoConfigUrl) GlobalFree(ieConfig.lpszAutoConfigUrl); if(proxyInfo.lpszProxy) GlobalFree(proxyInfo.lpszProxy); if(proxyInfo.lpszProxyBypass) GlobalFree(proxyInfo.lpszProxyBypass); if(hSession) WinHttpCloseHandle(hSession); return err; } #endif FILE* sys_OpenFile(const OsPath& pathname, const char* mode) { FILE* f = 0; const std::wstring wmode(mode, mode+strlen(mode)); (void)_wfopen_s(&f, OsString(pathname).c_str(), wmode.c_str()); return f; } Index: ps/trunk/source/lib/sysdep/os/win/whrt/whrt.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/whrt/whrt.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/whrt/whrt.cpp (revision 9350) @@ -1,321 +1,321 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * Windows High Resolution Timer */ #include "precompiled.h" #include "lib/sysdep/os/win/whrt/whrt.h" #include // _beginthreadex #include "lib/sysdep/cpu.h" #include "lib/sysdep/os/win/win.h" #include "lib/sysdep/os/win/winit.h" #include "lib/sysdep/acpi.h" -#include "lib/adts.h" +//#include "lib/adts.h" #include "lib/bits.h" #include "lib/sysdep/os/win/whrt/counter.h" WINIT_REGISTER_EARLY_INIT2(whrt_Init); // wutil -> whrt -> wtime WINIT_REGISTER_LATE_SHUTDOWN(whrt_Shutdown); namespace ERR { const LibError WHRT_COUNTER_UNSAFE = 140000; } //----------------------------------------------------------------------------- // choose best available safe counter // (moved into a separate function to simplify error handling) static inline LibError ActivateCounter(ICounter* counter) { RETURN_ERR(counter->Activate()); if(!counter->IsSafe()) return ERR::WHRT_COUNTER_UNSAFE; // NOWARN (happens often) return INFO::OK; } /** * @return the newly created and unique instance of the next best counter * that is deemed safe, or 0 if all have already been created. **/ static ICounter* GetNextBestSafeCounter() { for(;;) { static size_t nextCounterId = 0; ICounter* counter = CreateCounter(nextCounterId++); if(!counter) return 0; // tried all, none were safe LibError err = ActivateCounter(counter); if(err == INFO::OK) { debug_printf(L"HRT| using name=%ls freq=%f\n", counter->Name(), counter->NominalFrequency()); return counter; // found a safe counter } else { wchar_t buf[100]; debug_printf(L"HRT| activating %ls failed: %ls\n", counter->Name(), error_description_r(err, buf, ARRAY_SIZE(buf))); DestroyCounter(counter); } } } //----------------------------------------------------------------------------- // counter that drives the timer static ICounter* counter; // (these counter properties are cached for efficiency and convenience:) static double nominalFrequency; static double resolution; static size_t counterBits; static u64 counterMask; static void InitCounter() { // we used to support switching counters at runtime, but that's // unnecessarily complex. it need and should only be done once. debug_assert(counter == 0); counter = GetNextBestSafeCounter(); debug_assert(counter != 0); nominalFrequency = counter->NominalFrequency(); resolution = counter->Resolution(); counterBits = counter->CounterBits(); debug_printf(L"HRT| counter=%ls freq=%g res=%g bits=%d\n", counter->Name(), nominalFrequency, resolution, counterBits); // sanity checks debug_assert(nominalFrequency >= 500.0-DBL_EPSILON); debug_assert(resolution <= 2e-3); debug_assert(8 <= counterBits && counterBits <= 64); counterMask = bit_mask(counterBits); } static void ShutdownCounter() { DestroyCounter(counter); } static inline u64 Counter() { return counter->Counter(); } /** * @return difference [ticks], taking rollover into account. * (time-critical, so it's not called through ICounter.) **/ static inline u64 CounterDelta(u64 oldCounter, u64 newCounter) { return (newCounter - oldCounter) & counterMask; } double whrt_Resolution() { debug_assert(resolution != 0.0); return resolution; } //----------------------------------------------------------------------------- // timer state // we're not going to bother calibrating the counter (i.e. measuring its // current frequency by means of a second timer). rationale: // - all counters except the TSC are stable and run at fixed frequencies; // - it's not clear that any other HRT or the tick count would be useful // as a stable time reference (if it were, we should be using it instead); // - calibration would complicate the code (we'd have to make sure the // secondary counter is safe and can co-exist with the primary). /** * stores all timer state shared between readers and the update thread. * (must be POD because it's used before static ctors run.) **/ struct TimerState { // value of the counter at last update. u64 counter; // total elapsed time [seconds] since first update. // converted from tick deltas with the *then current* frequency // (this enables calibration, which is currently not implemented, // but leaving open the possibility costs nothing) double time; u8 padding[48]; }; // how do we detect when the old TimerState is no longer in use and can be // freed? we use two static instances (avoids dynamic allocation headaches) // and swap between them ('double-buffering'). it is assumed that all // entered critical sections (the latching of TimerState fields) will have // been exited before the next update comes around; if not, TimerState.time // changes, the critical section notices and re-reads the new values. static __declspec(align(64)) TimerState timerStates[2]; // note: exchanging pointers is easier than XORing an index. static volatile TimerState* volatile ts = &timerStates[0]; static volatile TimerState* volatile ts2 = &timerStates[1]; static void UpdateTimerState() { // how can we synchronize readers and the update thread? locks are // preferably avoided since they're dangerous and can be slow. what we // need to ensure is that TimerState doesn't change while another thread is // accessing it. the first step is to linearize the update, i.e. have it // appear to happen in an instant (done by building a new TimerState and // having it go live by switching pointers). all that remains is to make // reads of the state variables consistent, done by latching them all and // retrying if an update came in the middle of this. const u64 counter = Counter(); const u64 deltaTicks = CounterDelta(ts->counter, counter); ts2->counter = counter; ts2->time = ts->time + deltaTicks/nominalFrequency; ts = (volatile TimerState*)InterlockedExchangePointer((volatile PVOID*)&ts2, (PVOID)ts); } double whrt_Time() { retry: // latch timer state (counter and time must be from the same update) const double time = ts->time; COMPILER_FENCE; const u64 counter = ts->counter; // ts changed after reading time. note: don't compare counter because // it _might_ have the same value after two updates. if(time != ts->time) goto retry; const u64 deltaTicks = CounterDelta(counter, Counter()); return (time + deltaTicks/nominalFrequency); } //----------------------------------------------------------------------------- // update thread // note: we used to discipline the HRT timestamp to the system time, so it // was advantageous to trigger updates via WinMM event (thus reducing // instances where we're called in the middle of a scheduler tick). // since that's no longer relevant, we prefer using a thread, because that // avoids the dependency on WinMM and its lengthy startup time. // rationale: (+ and - are reasons for longer and shorter lengths) // + minimize CPU usage // + ensure all threads currently using TimerState return from those // functions before the next interval // - avoid more than 1 counter rollover per interval (InitUpdateThread makes // sure our interval is shorter than the current counter's rollover rate) static const DWORD UPDATE_INTERVAL_MS = 1000; static HANDLE hExitEvent; static HANDLE hUpdateThread; static unsigned __stdcall UpdateThread(void* UNUSED(data)) { debug_SetThreadName("whrt_UpdateThread"); for(;;) { const DWORD ret = WaitForSingleObject(hExitEvent, UPDATE_INTERVAL_MS); // owner terminated or wait failed or exit event signaled - exit thread if(ret != WAIT_TIMEOUT) break; UpdateTimerState(); } return 0; } static inline LibError InitUpdateThread() { // make sure our interval isn't too long // (counterBits can be 64 => Bit() would overflow => calculate period/2) const double period_2 = Bit(counterBits-1) / nominalFrequency; const size_t rolloversPerInterval = size_t(UPDATE_INTERVAL_MS / i64(period_2*2.0*1000.0)); debug_assert(rolloversPerInterval <= 1); hExitEvent = CreateEvent(0, TRUE, FALSE, 0); // manual reset, initially false if(hExitEvent == INVALID_HANDLE_VALUE) WARN_RETURN(ERR::LIMIT); hUpdateThread = (HANDLE)_beginthreadex(0, 0, UpdateThread, 0, 0, 0); if(!hUpdateThread) WARN_RETURN(ERR::LIMIT); return INFO::OK; } static inline void ShutdownUpdateThread() { // signal thread BOOL ok = SetEvent(hExitEvent); WARN_IF_FALSE(ok); // the nice way is to wait for it to exit if(WaitForSingleObject(hUpdateThread, 100) != WAIT_OBJECT_0) TerminateThread(hUpdateThread, 0); // forcibly exit (dangerous) CloseHandle(hExitEvent); CloseHandle(hUpdateThread); } //----------------------------------------------------------------------------- static LibError whrt_Init() { InitCounter(); // latch initial counter value so that timer starts at 0 ts->counter = Counter(); // must come before UpdateTimerState UpdateTimerState(); // must come before InitUpdateThread to avoid race RETURN_ERR(InitUpdateThread()); return INFO::OK; } static LibError whrt_Shutdown() { ShutdownUpdateThread(); ShutdownCounter(); acpi_Shutdown(); return INFO::OK; } Index: ps/trunk/source/lib/sysdep/os/win/wutil.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wutil.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wutil.cpp (revision 9350) @@ -1,542 +1,568 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * various Windows-specific utilities */ #include "precompiled.h" #include "lib/sysdep/os/win/wutil.h" #include #include // __argc #include "lib/file/file.h" #include "lib/file/vfs/vfs.h" #include "lib/posix/posix.h" #include "lib/sysdep/sysdep.h" #include "lib/sysdep/os/win/win.h" #include "lib/sysdep/os/win/wdbg.h" // wdbg_assert #include "lib/sysdep/os/win/winit.h" #include // SHGetFolderPath WINIT_REGISTER_EARLY_INIT(wutil_Init); WINIT_REGISTER_LATE_SHUTDOWN(wutil_Shutdown); //----------------------------------------------------------------------------- // safe allocator // may be used independently of libc malloc // (in particular, before _cinit and while calling static dtors). // used by wpthread critical section code. void* wutil_Allocate(size_t size) { const DWORD flags = HEAP_ZERO_MEMORY; return HeapAlloc(GetProcessHeap(), flags, size); } void wutil_Free(void* p) { const DWORD flags = 0; HeapFree(GetProcessHeap(), flags, p); } //----------------------------------------------------------------------------- // locks // several init functions are before called before _cinit. // POSIX static mutex init may not have been done by then, // so we need our own lightweight functions. static CRITICAL_SECTION cs[NUM_CS]; static bool cs_valid; void wutil_Lock(WinLockId id) { if(!cs_valid) return; EnterCriticalSection(&cs[id]); } void wutil_Unlock(WinLockId id) { if(!cs_valid) return; LeaveCriticalSection(&cs[id]); } bool wutil_IsLocked(WinLockId id) { if(!cs_valid) return false; const BOOL successfullyEntered = TryEnterCriticalSection(&cs[id]); if(!successfullyEntered) return true; // still locked LeaveCriticalSection(&cs[id]); return false; // probably not locked } static void InitLocks() { for(int i = 0; i < NUM_CS; i++) InitializeCriticalSection(&cs[i]); cs_valid = true; } static void ShutdownLocks() { cs_valid = false; for(int i = 0; i < NUM_CS; i++) DeleteCriticalSection(&cs[i]); memset(cs, 0, sizeof(cs)); } //----------------------------------------------------------------------------- // error codes // only call after a Win32 function indicates failure. LibError LibError_from_GLE(bool warn_if_failed) { LibError err = ERR::FAIL; switch(GetLastError()) { case ERROR_OUTOFMEMORY: case ERROR_NOT_ENOUGH_MEMORY: err = ERR::NO_MEM; break; case ERROR_INVALID_HANDLE: case ERROR_INVALID_PARAMETER: case ERROR_BAD_ARGUMENTS: err = ERR::INVALID_PARAM; break; case ERROR_INSUFFICIENT_BUFFER: err = ERR::BUF_SIZE; break; case ERROR_ACCESS_DENIED: err = ERR::FILE_ACCESS; break; case ERROR_NOT_SUPPORTED: err = ERR::NOT_SUPPORTED; break; case ERROR_CALL_NOT_IMPLEMENTED: err = ERR::NOT_IMPLEMENTED; break; case ERROR_PROC_NOT_FOUND: err = ERR::NO_SYS; break; case ERROR_BUSY: err = ERR::AGAIN; break; case ERROR_FILE_NOT_FOUND: err = ERR::VFS_FILE_NOT_FOUND; break; case ERROR_PATH_NOT_FOUND: err = ERR::VFS_DIR_NOT_FOUND; break; default: break; // err already set above } if(warn_if_failed) DEBUG_WARN_ERR(err); return err; } LibError LibError_from_win32(DWORD ret, bool warn_if_failed) { if(ret != FALSE) return INFO::OK; return LibError_from_GLE(warn_if_failed); } //----------------------------------------------------------------------------- // command line // copy of GetCommandLine string. will be tokenized and then referenced by // the argv pointers. static wchar_t* argvContents; int s_argc = 0; wchar_t** s_argv = 0; static void ReadCommandLine() { const wchar_t* commandLine = GetCommandLineW(); // (this changes as quotation marks are removed) size_t numChars = wcslen(commandLine); argvContents = (wchar_t*)HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, (numChars+1)*sizeof(wchar_t)); wcscpy_s(argvContents, numChars+1, commandLine); // first pass: tokenize string and count number of arguments bool ignoreSpace = false; for(size_t i = 0; i < numChars; i++) { switch(argvContents[i]) { case '"': ignoreSpace = !ignoreSpace; // strip the " character memmove(argvContents+i, argvContents+i+1, (numChars-i)*sizeof(wchar_t)); numChars--; i--; break; case ' ': if(!ignoreSpace) { argvContents[i] = '\0'; s_argc++; } break; } } s_argc++; // have argv entries point into the tokenized string s_argv = (wchar_t**)HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, s_argc*sizeof(wchar_t*)); wchar_t* nextArg = argvContents; for(int i = 0; i < s_argc; i++) { s_argv[i] = nextArg; nextArg += wcslen(nextArg)+1; } } int wutil_argc() { return s_argc; } wchar_t** wutil_argv() { debug_assert(s_argv); return s_argv; } static void FreeCommandLine() { HeapFree(GetProcessHeap(), 0, s_argv); HeapFree(GetProcessHeap(), 0, argvContents); } bool wutil_HasCommandLineArgument(const wchar_t* arg) { for(int i = 0; i < s_argc; i++) { if(!wcscmp(s_argv[i], arg)) return true; } return false; } //----------------------------------------------------------------------------- // directories // (NB: wutil_Init is called before static ctors => use placement new) static OsPath* systemPath; static OsPath* executablePath; static OsPath* appdataPath; const OsPath& wutil_SystemPath() { return *systemPath; } const OsPath& wutil_ExecutablePath() { return *executablePath; } const OsPath& wutil_AppdataPath() { return *appdataPath; } static void GetDirectories() { WinScopedPreserveLastError s; // system directory { const UINT length = GetSystemDirectoryW(0, 0); debug_assert(length != 0); std::wstring path(length, '\0'); const UINT charsWritten = GetSystemDirectoryW(&path[0], length); debug_assert(charsWritten == length-1); systemPath = new(wutil_Allocate(sizeof(OsPath))) OsPath(path); } // executable's directory executablePath = new(wutil_Allocate(sizeof(OsPath))) OsPath(sys_ExecutablePathname().Parent()); // application data { HWND hwnd = 0; // ignored unless a dial-up connection is needed to access the folder HANDLE token = 0; wchar_t path[MAX_PATH]; // mandated by SHGetFolderPathW const HRESULT ret = SHGetFolderPathW(hwnd, CSIDL_APPDATA, token, 0, path); debug_assert(SUCCEEDED(ret)); appdataPath = new(wutil_Allocate(sizeof(OsPath))) OsPath(path); } } static void FreeDirectories() { systemPath->~OsPath(); wutil_Free(systemPath); executablePath->~OsPath(); wutil_Free(executablePath); appdataPath->~OsPath(); wutil_Free(appdataPath); } //----------------------------------------------------------------------------- // user32 fix // HACK: make sure a reference to user32 is held, even if someone // decides to delay-load it. this fixes bug #66, which was the // Win32 mouse cursor (set via user32!SetCursor) appearing as a // black 32x32(?) rectangle. the underlying cause was as follows: // powrprof.dll was the first client of user32, causing it to be // loaded. after we were finished with powrprof, we freed it, in turn // causing user32 to unload. later code would then reload user32, // which apparently terminally confused the cursor implementation. // // since we hold a reference here, user32 will never unload. // of course, the benefits of delay-loading are lost for this DLL, // but that is unavoidable. it is safer to force loading it, rather // than documenting the problem and asking it not be delay-loaded. static HMODULE hUser32Dll; static void ForciblyLoadUser32Dll() { hUser32Dll = LoadLibraryW(L"user32.dll"); } // avoids Boundschecker warning static void FreeUser32Dll() { FreeLibrary(hUser32Dll); } //----------------------------------------------------------------------------- // memory static void EnableLowFragmentationHeap() { #if WINVER >= 0x0501 WUTIL_FUNC(pHeapSetInformation, BOOL, (HANDLE, HEAP_INFORMATION_CLASS, void*, size_t)); WUTIL_IMPORT_KERNEL32(HeapSetInformation, pHeapSetInformation); if(pHeapSetInformation) { ULONG flags = 2; // enable LFH pHeapSetInformation(GetProcessHeap(), HeapCompatibilityInformation, &flags, sizeof(flags)); } #endif // #if WINVER >= 0x0501 } //----------------------------------------------------------------------------- // Wow64 // Wow64 'helpfully' redirects all 32-bit apps' accesses of // %windir%\\system32\\drivers to %windir%\\system32\\drivers\\SysWOW64. // that's bad, because the actual drivers are not in the subdirectory. to // work around this, provide for temporarily disabling redirection. static WUTIL_FUNC(pIsWow64Process, BOOL, (HANDLE, PBOOL)); static WUTIL_FUNC(pWow64DisableWow64FsRedirection, BOOL, (PVOID*)); static WUTIL_FUNC(pWow64RevertWow64FsRedirection, BOOL, (PVOID)); static bool isWow64; static void ImportWow64Functions() { WUTIL_IMPORT_KERNEL32(IsWow64Process, pIsWow64Process); WUTIL_IMPORT_KERNEL32(Wow64DisableWow64FsRedirection, pWow64DisableWow64FsRedirection); WUTIL_IMPORT_KERNEL32(Wow64RevertWow64FsRedirection, pWow64RevertWow64FsRedirection); } static void DetectWow64() { // function not found => running on 32-bit Windows if(!pIsWow64Process) { isWow64 = false; return; } BOOL isWow64Process = FALSE; const BOOL ok = pIsWow64Process(GetCurrentProcess(), &isWow64Process); WARN_IF_FALSE(ok); isWow64 = (isWow64Process == TRUE); } bool wutil_IsWow64() { return isWow64; } WinScopedDisableWow64Redirection::WinScopedDisableWow64Redirection() { // note: don't just check if the function pointers are valid. 32-bit // Vista includes them but isn't running Wow64, so calling the functions // would fail. since we have to check if actually on Wow64, there's no // more need to verify the pointers (their existence is implied). if(!wutil_IsWow64()) return; const BOOL ok = pWow64DisableWow64FsRedirection(&m_wasRedirectionEnabled); WARN_IF_FALSE(ok); } WinScopedDisableWow64Redirection::~WinScopedDisableWow64Redirection() { if(!wutil_IsWow64()) return; const BOOL ok = pWow64RevertWow64FsRedirection(m_wasRedirectionEnabled); WARN_IF_FALSE(ok); } //----------------------------------------------------------------------------- + +LibError wutil_SetPrivilege(const wchar_t* privilege, bool enable) +{ + WinScopedPreserveLastError s; + + HANDLE hToken; + if(!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES|TOKEN_QUERY, &hToken)) + return ERR::_1; + + TOKEN_PRIVILEGES tp; + if (!LookupPrivilegeValueW(NULL, privilege, &tp.Privileges[0].Luid)) + return ERR::_2; + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = enable? SE_PRIVILEGE_ENABLED : 0; + + SetLastError(0); + const BOOL ok = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, 0, 0); + if(!ok || GetLastError() != 0) + return ERR::_3; + + WARN_IF_FALSE(CloseHandle(hToken)); + return INFO::OK; +} + + +//----------------------------------------------------------------------------- // module handle #ifndef LIB_STATIC_LINK #include "lib/sysdep/os/win/wdll_main.h" HMODULE wutil_LibModuleHandle() { HMODULE hModule; const DWORD flags = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT; const BOOL ok = GetModuleHandleEx(flags, (LPCWSTR)&wutil_LibModuleHandle, &hModule); // (avoid debug_assert etc. because we're called from debug_DisplayError) wdbg_assert(ok); return hModule; } #else HMODULE wutil_LibModuleHandle() { return GetModuleHandle(0); } #endif //----------------------------------------------------------------------------- // find main window // this is required by the error dialog and clipboard code. // note that calling from wutil_Init won't work, because the app will not // have created its window by then. static HWND hAppWindow; static BOOL CALLBACK FindAppWindowByPid(HWND hWnd, LPARAM UNUSED(lParam)) { DWORD pid; (void)GetWindowThreadProcessId(hWnd, &pid); // (function always succeeds) if(pid == GetCurrentProcessId()) { hAppWindow = hWnd; return FALSE; // done } return TRUE; // keep calling } HWND wutil_AppWindow() { if(!hAppWindow) { // to avoid wasting time, FindAppWindowByPid returns FALSE after // finding the desired window, which causes EnumWindows to 'fail'. // we detect actual errors by checking GetLastError. WinScopedPreserveLastError s; SetLastError(0); (void)EnumWindows(FindAppWindowByPid, 0); // (see above) debug_assert(GetLastError() == 0); } return hAppWindow; } //----------------------------------------------------------------------------- static LibError wutil_Init() { InitLocks(); ForciblyLoadUser32Dll(); EnableLowFragmentationHeap(); ReadCommandLine(); GetDirectories(); ImportWow64Functions(); DetectWow64(); return INFO::OK; } static LibError wutil_Shutdown() { FreeCommandLine(); FreeUser32Dll(); ShutdownLocks(); FreeDirectories(); return INFO::OK; } Index: ps/trunk/source/lib/sysdep/os/win/wnuma.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wnuma.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wnuma.cpp (revision 9350) @@ -1,568 +1,568 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "precompiled.h" #include "lib/sysdep/numa.h" -#include "lib/bits.h" // round_up, PopulationCount +#include "lib/bits.h" // PopulationCount #include "lib/timer.h" #include "lib/module_init.h" #include "lib/allocators/allocators.h" // page_aligned_alloc #include "lib/sysdep/os_cpu.h" #include "lib/sysdep/acpi.h" #include "lib/sysdep/os/win/win.h" #include "lib/sysdep/os/win/wutil.h" #include "lib/sysdep/os/win/wcpu.h" #include #if ARCH_X86_X64 #include "lib/sysdep/arch/x86_x64/topology.h" // ApicIds #endif //----------------------------------------------------------------------------- // nodes struct Node // POD { // (Windows doesn't guarantee node numbers are contiguous, so // we associate them with contiguous indices in nodes[]) UCHAR nodeNumber; u32 proximityDomainNumber; uintptr_t processorMask; }; static Node nodes[os_cpu_MaxProcessors]; static size_t numNodes; static Node* AddNode() { debug_assert(numNodes < ARRAY_SIZE(nodes)); return &nodes[numNodes++]; } static Node* FindNodeWithProcessorMask(uintptr_t processorMask) { for(size_t node = 0; node < numNodes; node++) { if(nodes[node].processorMask == processorMask) return &nodes[node]; } return 0; } static Node* FindNodeWithProcessor(size_t processor) { for(size_t node = 0; node < numNodes; node++) { if(IsBitSet(nodes[node].processorMask, processor)) return &nodes[node]; } return 0; } // cached results of FindNodeWithProcessor for each processor static size_t processorsNode[os_cpu_MaxProcessors]; static void FillProcessorsNode() { for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) { Node* node = FindNodeWithProcessor(processor); if(node) processorsNode[processor] = node-nodes; else debug_assert(0); } } //----------------------------------------------------------------------------- // Windows topology static UCHAR HighestNodeNumber() { WUTIL_FUNC(pGetNumaHighestNodeNumber, BOOL, (PULONG)); WUTIL_IMPORT_KERNEL32(GetNumaHighestNodeNumber, pGetNumaHighestNodeNumber); if(!pGetNumaHighestNodeNumber) return 0; // NUMA not supported => only one node ULONG highestNodeNumber; const BOOL ok = pGetNumaHighestNodeNumber(&highestNodeNumber); WARN_IF_FALSE(ok); return (UCHAR)highestNodeNumber; } static void PopulateNodes() { WUTIL_FUNC(pGetNumaNodeProcessorMask, BOOL, (UCHAR, PULONGLONG)); WUTIL_IMPORT_KERNEL32(GetNumaNodeProcessorMask, pGetNumaNodeProcessorMask); if(!pGetNumaNodeProcessorMask) return; DWORD_PTR processAffinity, systemAffinity; { const BOOL ok = GetProcessAffinityMask(GetCurrentProcess(), &processAffinity, &systemAffinity); WARN_IF_FALSE(ok); } debug_assert(PopulationCount(processAffinity) <= PopulationCount(systemAffinity)); for(UCHAR nodeNumber = 0; nodeNumber <= HighestNodeNumber(); nodeNumber++) { ULONGLONG affinity; { const BOOL ok = pGetNumaNodeProcessorMask(nodeNumber, &affinity); WARN_IF_FALSE(ok); } if(!affinity) continue; // empty node, skip Node* node = AddNode(); node->nodeNumber = nodeNumber; node->processorMask = wcpu_ProcessorMaskFromAffinity(processAffinity, (DWORD_PTR)affinity); } } //----------------------------------------------------------------------------- // ACPI SRAT topology #if ARCH_X86_X64 #pragma pack(push, 1) // fields common to Affinity* structures struct AffinityHeader { u8 type; u8 length; // size [bytes], including this header }; struct AffinityAPIC { static const u8 type = 0; AffinityHeader header; u8 proximityDomainNumber0; u8 apicId; u32 flags; u8 sapicId; u8 proximityDomainNumber123[3]; u32 clockDomain; u32 ProximityDomainNumber() const { // (this is the apparent result of backwards compatibility, ugh.) u32 proximityDomainNumber; memcpy(&proximityDomainNumber, &proximityDomainNumber123[0]-1, sizeof(proximityDomainNumber)); proximityDomainNumber &= ~0xFF; proximityDomainNumber |= proximityDomainNumber0; return proximityDomainNumber; } }; struct AffinityMemory { static const u8 type = 1; AffinityHeader header; u32 proximityDomainNumber; u16 reserved1; u64 baseAddress; u64 length; u32 reserved2; u32 flags; u64 reserved3; }; // AffinityX2APIC omitted, since the APIC ID is sufficient for our purposes // Static Resource Affinity Table struct SRAT { AcpiTable header; u32 reserved1; u8 reserved2[8]; AffinityHeader affinities[1]; }; #pragma pack(pop) template static const Affinity* DynamicCastFromHeader(const AffinityHeader* header) { if(header->type != Affinity::type) return 0; // sanity check: ensure no padding was inserted debug_assert(header->length == sizeof(Affinity)); const Affinity* affinity = (const Affinity*)header; if(!IsBitSet(affinity->flags, 0)) // not enabled return 0; return affinity; } static void PopulateProcessorMaskFromApicId(u32 apicId, uintptr_t& processorMask) { const u8* apicIds = ApicIds(); for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) { if(apicIds[processor] == apicId) { processorMask |= Bit(processor); return; } } debug_assert(0); // APIC ID not found } struct ProximityDomain { uintptr_t processorMask; // (AffinityMemory's fields are not currently needed) }; typedef std::map ProximityDomains; static ProximityDomains ExtractProximityDomainsFromSRAT(const SRAT* srat) { ProximityDomains proximityDomains; for(const AffinityHeader* header = srat->affinities; header < (const AffinityHeader*)(uintptr_t(srat)+srat->header.size); header = (const AffinityHeader*)(uintptr_t(header) + header->length)) { const AffinityAPIC* affinityAPIC = DynamicCastFromHeader(header); if(affinityAPIC) { const u32 proximityDomainNumber = affinityAPIC->ProximityDomainNumber(); ProximityDomain& proximityDomain = proximityDomains[proximityDomainNumber]; PopulateProcessorMaskFromApicId(affinityAPIC->apicId, proximityDomain.processorMask); } } return proximityDomains; } static void PopulateNodesFromProximityDomains(const ProximityDomains& proximityDomains) { for(ProximityDomains::const_iterator it = proximityDomains.begin(); it != proximityDomains.end(); ++it) { const u32 proximityDomainNumber = it->first; const ProximityDomain& proximityDomain = it->second; Node* node = FindNodeWithProcessorMask(proximityDomain.processorMask); if(!node) node = AddNode(); node->proximityDomainNumber = proximityDomainNumber; node->processorMask = proximityDomain.processorMask; } } #endif // #if ARCH_X86_X64 //----------------------------------------------------------------------------- static ModuleInitState initState; static LibError InitTopology() { PopulateNodes(); #if ARCH_X86_X64 const SRAT* srat = (const SRAT*)acpi_GetTable("SRAT"); if(srat) { const ProximityDomains proximityDomains = ExtractProximityDomainsFromSRAT(srat); PopulateNodesFromProximityDomains(proximityDomains); } #endif // neither OS nor ACPI information is available if(numNodes == 0) { // add dummy node that contains all system processors Node* node = AddNode(); node->nodeNumber = 0; node->proximityDomainNumber = 0; node->processorMask = os_cpu_ProcessorMask(); } FillProcessorsNode(); return INFO::OK; } size_t numa_NumNodes() { (void)ModuleInit(&initState, InitTopology); return numNodes; } size_t numa_NodeFromProcessor(size_t processor) { (void)ModuleInit(&initState, InitTopology); debug_assert(processor < os_cpu_NumProcessors()); return processorsNode[processor]; } uintptr_t numa_ProcessorMaskFromNode(size_t node) { (void)ModuleInit(&initState, InitTopology); debug_assert(node < numNodes); return nodes[node].processorMask; } static UCHAR NodeNumberFromNode(size_t node) { (void)ModuleInit(&initState, InitTopology); debug_assert(node < numa_NumNodes()); return nodes[node].nodeNumber; } //----------------------------------------------------------------------------- // memory info size_t numa_AvailableMemory(size_t node) { // note: it is said that GetNumaAvailableMemoryNode sometimes incorrectly // reports zero bytes. the actual cause may however be unexpected // RAM configuration, e.g. not all slots filled. WUTIL_FUNC(pGetNumaAvailableMemoryNode, BOOL, (UCHAR, PULONGLONG)); WUTIL_IMPORT_KERNEL32(GetNumaAvailableMemoryNode, pGetNumaAvailableMemoryNode); if(pGetNumaAvailableMemoryNode) { const UCHAR nodeNumber = NodeNumberFromNode(node); ULONGLONG availableBytes; const BOOL ok = pGetNumaAvailableMemoryNode(nodeNumber, &availableBytes); WARN_IF_FALSE(ok); const size_t availableMiB = size_t(availableBytes / MiB); return availableMiB; } // NUMA not supported - return available system memory else return os_cpu_MemoryAvailable(); } #pragma pack(push, 1) // ACPI System Locality Information Table // (System Locality == Proximity Domain) struct SLIT { AcpiTable header; u64 numSystemLocalities; u8 entries[1]; // numSystemLocalities*numSystemLocalities entries }; #pragma pack(pop) static double ReadRelativeDistanceFromSLIT(const SLIT* slit) { const size_t n = slit->numSystemLocalities; debug_assert(slit->header.size == sizeof(SLIT)-sizeof(slit->entries)+n*n); // diagonals are specified to be 10 for(size_t i = 0; i < n; i++) debug_assert(slit->entries[i*n+i] == 10); // entries = relativeDistance * 10 return *std::max_element(slit->entries, slit->entries+n*n) / 10.0; } // @return ratio between max/min time required to access one node's // memory from each processor. static double MeasureRelativeDistance() { const size_t size = 16*MiB; void* mem = page_aligned_alloc(size); const uintptr_t previousProcessorMask = os_cpu_SetThreadAffinityMask(os_cpu_ProcessorMask()); double minTime = 1e10, maxTime = 0.0; for(size_t node = 0; node < numa_NumNodes(); node++) { const uintptr_t processorMask = numa_ProcessorMaskFromNode(node); os_cpu_SetThreadAffinityMask(processorMask); const double startTime = timer_Time(); memset(mem, 0, size); const double elapsedTime = timer_Time() - startTime; minTime = std::min(minTime, elapsedTime); maxTime = std::max(maxTime, elapsedTime); } (void)os_cpu_SetThreadAffinityMask(previousProcessorMask); page_aligned_free(mem, size); return maxTime / minTime; } static double relativeDistance; static LibError InitRelativeDistance() { // early-out for non-NUMA systems (saves some time) if(numa_NumNodes() == 1) { relativeDistance = 1.0; return INFO::OK; } // trust values reported by the BIOS, if available const SLIT* slit = (const SLIT*)acpi_GetTable("SLIT"); if(slit) relativeDistance = ReadRelativeDistanceFromSLIT(slit); else relativeDistance = MeasureRelativeDistance(); debug_assert(relativeDistance >= 1.0); debug_assert(relativeDistance <= 3.0); // (Microsoft guideline for NUMA systems) return INFO::OK; } double numa_Factor() { static ModuleInitState initState; (void)ModuleInit(&initState, InitRelativeDistance); return relativeDistance; } static bool IsMemoryInterleaved() { if(numa_NumNodes() == 1) return false; if(!acpi_GetTable("FACP")) // no ACPI tables available return false; // indeterminate, assume not interleaved if(acpi_GetTable("SRAT")) // present iff not interleaved return false; return true; } static bool isMemoryInterleaved; static LibError InitMemoryInterleaved() { isMemoryInterleaved = IsMemoryInterleaved(); return INFO::OK; } bool numa_IsMemoryInterleaved() { static ModuleInitState initState; (void)ModuleInit(&initState, InitMemoryInterleaved); return isMemoryInterleaved; } //----------------------------------------------------------------------------- // allocator // //static bool VerifyPages(void* mem, size_t size, size_t pageSize, size_t node) //{ // WUTIL_FUNC(pQueryWorkingSetEx, BOOL, (HANDLE, PVOID, DWORD)); // WUTIL_IMPORT_KERNEL32(QueryWorkingSetEx, pQueryWorkingSetEx); // if(!pQueryWorkingSetEx) // return true; // can't do anything // //#if WINVER >= 0x600 // size_t largePageSize = os_cpu_LargePageSize(); // debug_assert(largePageSize != 0); // this value is needed for later // // // retrieve attributes of all pages constituting mem // const size_t numPages = (size + pageSize-1) / pageSize; // PSAPI_WORKING_SET_EX_INFORMATION* wsi = new PSAPI_WORKING_SET_EX_INFORMATION[numPages]; // for(size_t i = 0; i < numPages; i++) // wsi[i].VirtualAddress = (u8*)mem + i*pageSize; // pQueryWorkingSetEx(GetCurrentProcess(), wsi, DWORD(sizeof(PSAPI_WORKING_SET_EX_INFORMATION)*numPages)); // // // ensure each is valid and allocated on the correct node // for(size_t i = 0; i < numPages; i++) // { // const PSAPI_WORKING_SET_EX_BLOCK& attributes = wsi[i].VirtualAttributes; // if(!attributes.Valid) // return false; // if((attributes.LargePage != 0) != (pageSize == largePageSize)) // { // debug_printf(L"NUMA: is not a large page\n"); // return false; // } // if(attributes.Node != node) // { // debug_printf(L"NUMA: allocated from remote node\n"); // return false; // } // } // // delete[] wsi; //#else // UNUSED2(mem); // UNUSED2(size); // UNUSED2(pageSize); // UNUSED2(node); //#endif // // return true; //} // // //void* numa_AllocateOnNode(size_t node, size_t size, LargePageDisposition largePageDisposition, size_t* ppageSize) //{ // debug_assert(node < numa_NumNodes()); // // // see if there will be enough memory (non-authoritative, for debug purposes only) // { // const size_t sizeMiB = size/MiB; // const size_t availableMiB = numa_AvailableMemory(node); // if(availableMiB < sizeMiB) // debug_printf(L"NUMA: warning: node reports insufficient memory (%d vs %d MB)\n", availableMiB, sizeMiB); // } // // size_t pageSize; // (used below even if ppageSize is zero) // void* const mem = numa_Allocate(size, largePageDisposition, &pageSize); // if(ppageSize) // *ppageSize = pageSize; // // // we can't use VirtualAllocExNuma - it's only available in Vista and Server 2008. // // workaround: fault in all pages now to ensure they are allocated from the // // current node, then verify page attributes. // const uintptr_t previousProcessorMask = os_cpu_SetThreadAffinityMask(numa_ProcessorMaskFromNode(node)); // memset(mem, 0, size); // (void)os_cpu_SetThreadAffinityMask(previousProcessorMask); // // VerifyPages(mem, size, pageSize, node); // // return mem; //} Index: ps/trunk/source/lib/sysdep/filesystem.h =================================================================== --- ps/trunk/source/lib/sysdep/filesystem.h (revision 9349) +++ ps/trunk/source/lib/sysdep/filesystem.h (revision 9350) @@ -1,118 +1,123 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * wchar_t versions of POSIX filesystem functions */ #ifndef INCLUDED_FILESYSTEM #define INCLUDED_FILESYSTEM #include "lib/os_path.h" #include "lib/posix/posix_filesystem.h" // mode_t // // dirent.h // struct WDIR; struct wdirent { // note: SUSv3 describes this as a "char array" but of unspecified size. // since that precludes using sizeof(), we may as well declare as a // pointer to avoid copying in the implementation. wchar_t* d_name; }; extern WDIR* wopendir(const OsPath& path); extern struct wdirent* wreaddir(WDIR*); // return status for the file returned by the last successful // wreaddir call from the given directory stream. // currently sets st_size, st_mode, and st_mtime; the rest are zeroed. // non-portable, but considerably faster than stat(). used by dir_ForEachSortedEntry. extern int wreaddir_stat_np(WDIR*, struct stat*); extern int wclosedir(WDIR*); // // fcntl.h // // Win32 _wsopen_s flags not specified by POSIX: #define O_TEXT_NP 0x4000 // file mode is text (translated) #define O_BINARY_NP 0x8000 // file mode is binary (untranslated) // waio flags not specified by POSIX nor implemented by Win32 _wsopen_s: // do not open a separate AIO-capable handle. // (this can be used for small files where AIO overhead isn't worthwhile, // thus speeding up loading and reducing resource usage.) #define O_NO_AIO_NP 0x20000 // POSIX flags not supported by the underlying Win32 _wsopen_s: #if OS_WIN #define O_NONBLOCK 0x1000000 #endif extern int wopen(const OsPath& pathname, int oflag); extern int wopen(const OsPath& pathname, int oflag, mode_t mode); extern int wclose(int fd); // // unistd.h // +// waio requires offsets and sizes to be multiples of the sector size. +// to allow arbitrarily sized files, we truncate them after I/O. +// however, ftruncate cannot be used since it is also subject to the +// sector-alignment requirement. instead, the file must be closed and +// this function called. LIB_API int wtruncate(const OsPath& pathname, off_t length); LIB_API int wunlink(const OsPath& pathname); LIB_API int wrmdir(const OsPath& path); // // stdio.h // LIB_API int wrename(const OsPath& pathnameOld, const OsPath& pathnameNew); // // stdlib.h // LIB_API OsPath wrealpath(const OsPath& pathname); // // sys/stat.h // LIB_API int wstat(const OsPath& pathname, struct stat* buf); LIB_API int wmkdir(const OsPath& path, mode_t mode); #endif // #ifndef INCLUDED_FILESYSTEM Index: ps/trunk/source/lib/alignment.h =================================================================== --- ps/trunk/source/lib/alignment.h (nonexistent) +++ ps/trunk/source/lib/alignment.h (revision 9350) @@ -0,0 +1,105 @@ +#ifndef INCLUDED_ALIGNMENT +#define INCLUDED_ALIGNMENT + +#include "lib/sysdep/compiler.h" // MSC_VERSION + +template +inline bool IsAligned(T t, uintptr_t multiple) +{ + return (uintptr_t(t) % multiple) == 0; +} + +template +inline size_t Align(size_t n) +{ + cassert(multiple != 0 && ((multiple & (multiple-1)) == 0)); // is power of 2 + return (n + multiple-1) & ~(multiple-1); +} + + +// +// SIMD vector +// + +static const size_t vectorSize = 16; + +#define VERIFY_VECTOR_MULTIPLE(size)\ + VERIFY(IsAligned(size, vectorSize)) + +#define VERIFY_VECTOR_ALIGNED(pointer)\ + VERIFY_VECTOR_MULTIPLE(pointer);\ + ASSUME_ALIGNED(pointer, vectorSize) + + +// +// CPU cache +// + +static const size_t cacheLineSize = 64; // (L2) + +#if MSC_VERSION +#define CACHE_ALIGNED __declspec(align(64)) // align() requires a literal; keep in sync with cacheLineSize +#endif + + +// +// MMU pages +// + +static const size_t pageSize = 0x1000; // 4 KB +static const size_t largePageSize = 0x200000; // 2 MB + + +// waio opens files with FILE_FLAG_NO_BUFFERING, so Windows requires +// file offsets / buffers and sizes to be sector-aligned. querying the +// actual sector size via GetDiskFreeSpace is inconvenient and slow. +// we always request large blocks anyway, so just check whether inputs +// are aligned to a `maximum' sector size. this catches common mistakes +// before they cause scary "IO failed" errors. if the value turns out +// to be too low, the Windows APIs will still complain. +static const uintptr_t maxSectorSize = 0x1000; + +#endif // #ifndef INCLUDED_ALIGNMENT +#ifndef INCLUDED_ALIGNMENT +#define INCLUDED_ALIGNMENT + +template +inline bool IsAligned(T t, uintptr_t multiple) +{ + return (uintptr_t(t) % multiple) == 0; +} + + +// +// SIMD vector +// + +static const size_t vectorSize = 16; + +#define VERIFY_VECTOR_MULTIPLE(size)\ + VERIFY(IsAligned(size, vectorSize)) + +#define VERIFY_VECTOR_ALIGNED(pointer)\ + VERIFY_VECTOR_MULTIPLE(pointer);\ + ASSUME_ALIGNED(pointer, vectorSize) + + +// +// CPU cache +// + +static const size_t cacheLineSize = 64; // (L2) + +#if MSC_VERSION +#define CACHE_ALIGNED __declspec(align(64)) // align() requires a literal; keep in sync with cacheLineSize +#endif + + +// +// MMU pages +// + +static const size_t pageSize = 0x1000; // 4 KB +static const size_t largePageSize = 0x200000; // 2 MB + +#endif // #ifndef INCLUDED_ALIGNMENT Index: ps/trunk/source/lib/tex/tex_jpg.cpp =================================================================== --- ps/trunk/source/lib/tex/tex_jpg.cpp (revision 9349) +++ ps/trunk/source/lib/tex/tex_jpg.cpp (revision 9350) @@ -1,628 +1,631 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * JPEG codec using IJG jpeglib. */ #include "precompiled.h" +#include + #include "lib/external_libraries/libjpeg.h" +#include "lib/allocators/shared_ptr.h" #include "tex_codec.h" -#include // squelch "dtor / setjmp interaction" warnings. // all attempts to resolve the underlying problem failed; apparently // the warning is generated if setjmp is used at all in C++ mode. // (jpg_*code have no code that would trigger ctors/dtors, nor are any // called in their prolog/epilog code). #if MSC_VERSION # pragma warning(disable: 4611) #endif /* IMPORTANT: we assume that JOCTET is 8 bits. */ cassert(sizeof(JOCTET) == 1 && CHAR_BIT == 8); //----------------------------------------------------------------------------- // mem source manager //----------------------------------------------------------------------------- /* Expanded data source object for memory input */ typedef struct { struct jpeg_source_mgr pub; /* public fields */ DynArray* da; } SrcMgr; typedef SrcMgr* SrcPtr; /* * Initialize source --- called by jpeg_read_header * before any data is actually read. */ METHODDEF(void) src_init(j_decompress_ptr UNUSED(cinfo)) { } /* * Fill the input buffer --- called whenever buffer is emptied. * * In typical applications, this should read fresh data into the buffer * (ignoring the current state of next_input_byte & bytes_in_buffer), * reset the pointer & count to the start of the buffer, and return TRUE * indicating that the buffer has been reloaded. It is not necessary to * fill the buffer entirely, only to obtain at least one more byte. * * There is no such thing as an EOF return. If the end of the file has been * reached, the routine has a choice of ERREXIT() or inserting fake data into * the buffer. In most cases, generating a warning message and inserting a * fake EOI marker is the best course of action --- this will allow the * decompressor to output however much of the image is there. However, * the resulting error message is misleading if the real problem is an empty * input file, so we handle that case specially. */ METHODDEF(boolean) src_fill_buffer(j_decompress_ptr cinfo) { SrcPtr src = (SrcPtr)cinfo->src; static const JOCTET eoi[2] = { 0xFF, JPEG_EOI }; /* * since jpeg_mem_src fills the buffer with everything we've got, * jpeg is trying to read beyond end of buffer. return a fake EOI marker. * note: don't modify input buffer: it might be read-only. */ WARNMS(cinfo, JWRN_JPEG_EOF); src->pub.next_input_byte = eoi; src->pub.bytes_in_buffer = 2; return TRUE; } /* * Skip data --- used to skip over a potentially large amount of * uninteresting data (such as an APPn marker). */ METHODDEF(void) src_skip_data(j_decompress_ptr cinfo, long num_bytes) { SrcPtr src = (SrcPtr)cinfo->src; size_t skip_count = (size_t)num_bytes; /* docs say non-positive num_byte skips should be ignored */ if(num_bytes <= 0) return; /* * just subtract bytes available in buffer, * making sure we don't underflow the size_t. * note: if we skip to or beyond end of buffer, * bytes_in_buffer = 0 => fill_input_buffer called => abort. */ if(skip_count > src->pub.bytes_in_buffer) skip_count = src->pub.bytes_in_buffer; src->pub.bytes_in_buffer -= skip_count; src->pub.next_input_byte += skip_count; } /* * An additional method that can be provided by data source modules is the * resync_to_restart method for error recovery in the presence of RST markers. * For the moment, this source module just uses the default resync method * provided by the JPEG library. That method assumes that no backtracking * is possible. */ /* * Terminate source --- called by jpeg_finish_decompress * after all data has been read. Often a no-op. * * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding * application must deal with any cleanup that should happen even * for error exit. */ METHODDEF(void) src_term(j_decompress_ptr UNUSED(cinfo)) { /* * no-op (we don't own the buffer and shouldn't, * to make possible multiple images in a source). */ } /* * Prepare for input from a buffer. * The caller is responsible for freeing it after finishing decompression. */ GLOBAL(void) src_prepare(j_decompress_ptr cinfo, DynArray* da) { SrcPtr src; const u8* p = da->base; const size_t size = da->cur_size; /* Treat 0-length buffer as fatal error */ if(size == 0) ERREXIT(cinfo, JERR_INPUT_EMPTY); /* * The source object is made permanent so that * a series of JPEG images can be read from the same file * by calling jpeg_mem_src only before the first one. * This makes it unsafe to use this manager and a different source * manager serially with the same JPEG object. Caveat programmer. */ /* first time for this JPEG object? */ if(!cinfo->src) cinfo->src = (struct jpeg_source_mgr*) (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(SrcMgr)); /* (takes care of raising error if out of memory) */ src = (SrcPtr)cinfo->src; src->pub.init_source = src_init; src->pub.fill_input_buffer = src_fill_buffer; src->pub.skip_input_data = src_skip_data; src->pub.resync_to_restart = jpeg_resync_to_restart; /* default */ src->pub.term_source = src_term; /* * fill buffer with everything we have. * if fill_input_buffer is called, the buffer was overrun. */ src->pub.bytes_in_buffer = size; src->pub.next_input_byte = (JOCTET*)p; } //----------------------------------------------------------------------------- // mem destination manager //----------------------------------------------------------------------------- /* Expanded data destination object for memory output */ typedef struct { struct jpeg_destination_mgr pub; /* public fields */ DynArray* da; } DstMgr; typedef DstMgr* DstPtr; // this affects how often dst_empty_output_buffer is called (which // efficiently expands the DynArray) and how much tail memory we waste // (not an issue because it is freed immediately after compression). #define OUTPUT_BUF_SIZE 64*KiB /* choose an efficiently writeable size */ // note: can't call dst_empty_output_buffer from dst_init or vice versa // because only the former must advance da->pos. static void make_room_in_buffer(j_compress_ptr cinfo) { DstPtr dst = (DstPtr)cinfo->dest; DynArray* da = dst->da; void* start = da->base + da->cur_size; if(da_set_size(da, da->cur_size+OUTPUT_BUF_SIZE) != 0) ERREXIT(cinfo, JERR_FILE_WRITE); dst->pub.next_output_byte = (JOCTET*)start; dst->pub.free_in_buffer = OUTPUT_BUF_SIZE; } /* * Initialize destination --- called by jpeg_start_compress * before any data is actually written. */ METHODDEF(void) dst_init(j_compress_ptr cinfo) { make_room_in_buffer(cinfo); } /* * Empty the output buffer --- called whenever buffer fills up. * * In typical applications, this should write the entire output buffer * (ignoring the current state of next_output_byte & free_in_buffer), * reset the pointer & count to the start of the buffer, and return TRUE * indicating that the buffer has been dumped. * * */ METHODDEF(boolean) dst_empty_output_buffer(j_compress_ptr cinfo) { DstPtr dst = (DstPtr)cinfo->dest; DynArray* da = dst->da; // writing out OUTPUT_BUF_SIZE-dst->pub.free_in_buffer bytes // sounds reasonable, but makes for broken output. da->pos += OUTPUT_BUF_SIZE; make_room_in_buffer(cinfo); return TRUE; // not suspended } /* * Terminate destination --- called by jpeg_finish_compress * after all data has been written. Usually needs to flush buffer. * * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding * application must deal with any cleanup that should happen even * for error exit. */ METHODDEF(void) dst_term(j_compress_ptr cinfo) { DstPtr dst = (DstPtr)cinfo->dest; DynArray* da = dst->da; // account for nbytes left in buffer da->pos += OUTPUT_BUF_SIZE - dst->pub.free_in_buffer; } /* * Prepare for output to a buffer. * The caller is responsible for allocating and writing out to disk after * compression is complete. */ GLOBAL(void) dst_prepare(j_compress_ptr cinfo, DynArray* da) { /* The destination object is made permanent so that multiple JPEG images * can be written to the same file without re-executing dst_prepare. * This makes it dangerous to use this manager and a different destination * manager serially with the same JPEG object, because their private object * sizes may be different. Caveat programmer. */ if (cinfo->dest == NULL) { /* first time for this JPEG object? */ cinfo->dest = (struct jpeg_destination_mgr*)(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(DstMgr)); } DstPtr dst = (DstPtr)cinfo->dest; dst->pub.init_destination = dst_init; dst->pub.empty_output_buffer = dst_empty_output_buffer; dst->pub.term_destination = dst_term; dst->da = da; } //----------------------------------------------------------------------------- // error handler, shared by jpg_(en|de)code //----------------------------------------------------------------------------- // the JPEG library's standard error handler (jerror.c) is divided into // several "methods" which we can override individually. This allows // adjusting the behavior without duplicating a lot of code, which may // have to be updated with each future release. // // we here override error_exit to return control to the library's caller // (i.e. jpg_(de|en)code) when a fatal error occurs, rather than calling exit. // // the replacement error_exit does a longjmp back to the caller's // setjmp return point. it needs access to the jmp_buf, // so we store it in a "subclass" of jpeg_error_mgr. struct JpgErrorMgr { struct jpeg_error_mgr pub; // "public" fields // jump here (back to JPEG lib caller) on error jmp_buf call_site; // description of first error encountered; must store in JPEG context // for thread safety. initialized in setup_err_mgr. char msg[JMSG_LENGTH_MAX]; JpgErrorMgr(jpeg_compress_struct& cinfo); JpgErrorMgr(jpeg_decompress_struct& cinfo); private: void init(); }; METHODDEF(void) err_error_exit(j_common_ptr cinfo) { // get subclass JpgErrorMgr* err_mgr = (JpgErrorMgr*)cinfo->err; // "output" error message (i.e. store in JpgErrorMgr; // call_site is responsible for displaying it via debug_printf) (*cinfo->err->output_message)(cinfo); // jump back to call site, i.e. jpg_(de|en)code longjmp(err_mgr->call_site, 1); } // stores message in JpgErrorMgr for later output by jpg_(de|en)code. // note: don't display message here, so the caller can // add some context (whether encoding or decoding, and filename). METHODDEF(void) err_output_message(j_common_ptr cinfo) { // get subclass JpgErrorMgr* err_mgr = (JpgErrorMgr*)cinfo->err; // this context already had an error message; don't overwrite it. // (subsequent errors probably aren't related to the real problem). // note: was set to '\0' by ctor. if(err_mgr->msg[0] != '\0') return; // generate the message and store it (*cinfo->err->format_message)(cinfo, err_mgr->msg); } void JpgErrorMgr::init() { // fill in pub fields jpeg_std_error(&pub); // .. and override some methods: pub.error_exit = err_error_exit; pub.output_message = err_output_message; // required for "already have message" check in err_output_message msg[0] = '\0'; } JpgErrorMgr::JpgErrorMgr(jpeg_compress_struct& cinfo) { init(); // hack: register this error manager with cinfo. // must be done before jpeg_create_* in case that fails // (unlikely, but possible if out of memory). cinfo.err = &pub; } JpgErrorMgr::JpgErrorMgr(jpeg_decompress_struct& cinfo) { init(); // hack: register this error manager with cinfo. // must be done before jpeg_create_* in case that fails // (unlikely, but possible if out of memory). cinfo.err = &pub; } //----------------------------------------------------------------------------- static LibError jpg_transform(Tex* UNUSED(t), size_t UNUSED(transforms)) { return INFO::TEX_CODEC_CANNOT_HANDLE; } // note: jpg_encode and jpg_decode cannot be combined due to // libjpg interface differences. // we do split them up into interface and impl to simplify // resource cleanup and avoid "dtor / setjmp interaction" warnings. // // rationale for row array: jpeg won't output more than a few // scanlines at a time, so we need an output loop anyway. however, // passing at least 2..4 rows is more efficient in low-quality modes // due to less copying. static LibError jpg_decode_impl(DynArray* da, jpeg_decompress_struct* cinfo, Tex* t) { src_prepare(cinfo, da); // ignore return value since: // - suspension is not possible with the mem data source // - we passed TRUE to raise an error if table-only JPEG file (void)jpeg_read_header(cinfo, TRUE); // set libjpg output format. we cannot go with the default because // Photoshop writes non-standard CMYK files that must be converted to RGB. size_t flags = 0; cinfo->out_color_space = JCS_RGB; if(cinfo->num_components == 1) { flags |= TEX_GREY; cinfo->out_color_space = JCS_GRAYSCALE; } // lower quality, but faster cinfo->dct_method = JDCT_IFAST; cinfo->do_fancy_upsampling = FALSE; // ignore return value since suspension is not possible with the // mem data source. // note: since we've set out_color_space, JPEG will always // return an acceptable image format; no need to check. (void)jpeg_start_decompress(cinfo); // scaled output image dimensions and final bpp are now available. int w = cinfo->output_width; int h = cinfo->output_height; int bpp = cinfo->output_components * 8; // alloc destination buffer const size_t pitch = w * bpp / 8; const size_t img_size = pitch * h; // for allow_rows - shared_ptr data = io_Allocate(img_size); + shared_ptr data; + AllocateAligned(data, img_size, pageSize); // read rows shared_ptr rows = tex_codec_alloc_rows(data.get(), h, pitch, TEX_TOP_DOWN, 0); // could use cinfo->output_scanline to keep track of progress, // but we need to count lines_left anyway (paranoia). JSAMPARRAY row = (JSAMPARRAY)rows.get(); JDIMENSION lines_left = h; while(lines_left != 0) { JDIMENSION lines_read = jpeg_read_scanlines(cinfo, row, lines_left); row += lines_read; lines_left -= lines_read; // we've decoded in-place; no need to further process } // ignore return value since suspension is not possible with the // mem data source. (void)jpeg_finish_decompress(cinfo); LibError ret = INFO::OK; if(cinfo->err->num_warnings != 0) ret = WARN::TEX_INVALID_DATA; // store image info t->data = data; t->dataSize = img_size; t->ofs = 0; t->w = w; t->h = h; t->bpp = bpp; t->flags = flags; return ret; } static LibError jpg_encode_impl(Tex* t, jpeg_compress_struct* cinfo, DynArray* da) { dst_prepare(cinfo, da); // describe image format // required: cinfo->image_width = (JDIMENSION)t->w; cinfo->image_height = (JDIMENSION)t->h; cinfo->input_components = (int)t->bpp / 8; cinfo->in_color_space = (t->bpp == 8)? JCS_GRAYSCALE : JCS_RGB; // defaults depend on cinfo->in_color_space already having been set! jpeg_set_defaults(cinfo); // (add optional settings, e.g. quality, here) // TRUE ensures that we will write a complete interchange-JPEG file. // don't change unless you are very sure of what you're doing. jpeg_start_compress(cinfo, TRUE); // if BGR, convert to RGB. WARN_ERR(tex_transform_to(t, t->flags & ~TEX_BGR)); const size_t pitch = t->w * t->bpp / 8; u8* data = tex_get_data(t); shared_ptr rows = tex_codec_alloc_rows(data, t->h, pitch, t->flags, TEX_TOP_DOWN); // could use cinfo->output_scanline to keep track of progress, // but we need to count lines_left anyway (paranoia). JSAMPARRAY row = (JSAMPARRAY)rows.get(); JDIMENSION lines_left = (JDIMENSION)t->h; while(lines_left != 0) { JDIMENSION lines_read = jpeg_write_scanlines(cinfo, row, lines_left); row += lines_read; lines_left -= lines_read; // we've decoded in-place; no need to further process } jpeg_finish_compress(cinfo); LibError ret = INFO::OK; if(cinfo->err->num_warnings != 0) ret = WARN::TEX_INVALID_DATA; return ret; } static bool jpg_is_hdr(const u8* file) { // JFIF requires SOI marker at start of stream. // we compare single bytes to be endian-safe. return (file[0] == 0xff && file[1] == 0xd8); } static bool jpg_is_ext(const OsPath& extension) { return extension == L".jpg" || extension == L".jpeg"; } static size_t jpg_hdr_size(const u8* UNUSED(file)) { return 0; // libjpg returns decoded image data; no header } static LibError jpg_decode(DynArray* RESTRICT da, Tex* RESTRICT t) { // contains the JPEG decompression parameters and pointers to // working space (allocated as needed by the JPEG library). struct jpeg_decompress_struct cinfo; JpgErrorMgr jerr(cinfo); if(setjmp(jerr.call_site)) return ERR::FAIL; jpeg_create_decompress(&cinfo); LibError ret = jpg_decode_impl(da, &cinfo, t); jpeg_destroy_decompress(&cinfo); // releases a "good deal" of memory return ret; } // limitation: palette images aren't supported static LibError jpg_encode(Tex* RESTRICT t, DynArray* RESTRICT da) { // contains the JPEG compression parameters and pointers to // working space (allocated as needed by the JPEG library). struct jpeg_compress_struct cinfo; JpgErrorMgr jerr(cinfo); if(setjmp(jerr.call_site)) WARN_RETURN(ERR::FAIL); jpeg_create_compress(&cinfo); LibError ret = jpg_encode_impl(t, &cinfo, da); jpeg_destroy_compress(&cinfo); // releases a "good deal" of memory return ret; } TEX_CODEC_REGISTER(jpg); Index: ps/trunk/source/lib/tex/tex_dds.cpp =================================================================== --- ps/trunk/source/lib/tex/tex_dds.cpp (revision 9349) +++ ps/trunk/source/lib/tex/tex_dds.cpp (revision 9350) @@ -1,637 +1,641 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * DDS (DirectDraw Surface) codec. */ #include "precompiled.h" #include "lib/byte_order.h" -#include "tex_codec.h" #include "lib/bits.h" #include "lib/timer.h" +#include "lib/allocators/shared_ptr.h" +#include "tex_codec.h" + // NOTE: the convention is bottom-up for DDS, but there's no way to tell. + //----------------------------------------------------------------------------- // S3TC decompression //----------------------------------------------------------------------------- // note: this code may not be terribly efficient. it's only used to // emulate hardware S3TC support - if that isn't available, performance // will suffer anyway due to increased video memory usage. // for efficiency, we precalculate as much as possible about a block // and store it here. class S3tcBlock { public: S3tcBlock(size_t dxt, const u8* RESTRICT block) : dxt(dxt) { // (careful, 'dxt != 1' doesn't work - there's also DXT1a) const u8* a_block = block; const u8* c_block = (dxt == 3 || dxt == 5)? block+8 : block; PrecalculateAlpha(dxt, a_block); PrecalculateColor(dxt, c_block); } void WritePixel(size_t pixel_idx, u8* RESTRICT out) const { debug_assert(pixel_idx < 16); // pixel index -> color selector (2 bit) -> color const size_t c_selector = access_bit_tbl(c_selectors, pixel_idx, 2); for(int i = 0; i < 3; i++) out[i] = (u8)c[c_selector][i]; // if no alpha, done if(dxt == 1) return; size_t a; if(dxt == 3) { // table of 4-bit alpha entries a = access_bit_tbl(a_bits, pixel_idx, 4); a |= a << 4; // expand to 8 bits (replicate high into low!) } else if(dxt == 5) { // pixel index -> alpha selector (3 bit) -> alpha const size_t a_selector = access_bit_tbl(a_bits, pixel_idx, 3); a = dxt5_a_tbl[a_selector]; } // (dxt == DXT1A) else a = c[c_selector][A]; out[A] = (u8)a; } private: // pixel colors are stored as size_t[4]. size_t rather than u8 protects from // overflow during calculations, and padding to an even size is a bit // more efficient (even though we don't need the alpha component). enum RGBA { R, G, B, A }; static inline void mix_2_3(size_t dst[4], size_t c0[4], size_t c1[4]) { for(int i = 0; i < 3; i++) dst[i] = (c0[i]*2 + c1[i] + 1)/3; } static inline void mix_avg(size_t dst[4], size_t c0[4], size_t c1[4]) { for(int i = 0; i < 3; i++) dst[i] = (c0[i]+c1[i])/2; } template static inline size_t access_bit_tbl(T tbl, size_t idx, size_t bit_width) { size_t val = (tbl >> (idx*bit_width)) & bit_mask(bit_width); return val; } // extract a range of bits and expand to 8 bits (by replicating // MS bits - see http://www.mindcontrol.org/~hplus/graphics/expand-bits.html ; // this is also the algorithm used by graphics cards when decompressing S3TC). // used to convert 565 to 32bpp RGB. static inline size_t unpack_to_8(u16 c, size_t bits_below, size_t num_bits) { const size_t num_filler_bits = 8-num_bits; const size_t field = (size_t)bits(c, bits_below, bits_below+num_bits-1); const size_t filler = field >> (num_bits-num_filler_bits); return (field << num_filler_bits) | filler; } void PrecalculateAlpha(size_t dxt, const u8* RESTRICT a_block) { // read block contents const u8 a0 = a_block[0], a1 = a_block[1]; a_bits = read_le64(a_block); // see below if(dxt == 5) { // skip a0,a1 bytes (data is little endian) a_bits >>= 16; const bool is_dxt5_special_combination = (a0 <= a1); u8* a = dxt5_a_tbl; // shorthand if(is_dxt5_special_combination) { a[0] = a0; a[1] = a1; a[2] = (4*a0 + 1*a1 + 2)/5; a[3] = (3*a0 + 2*a1 + 2)/5; a[4] = (2*a0 + 3*a1 + 2)/5; a[5] = (1*a0 + 4*a1 + 2)/5; a[6] = 0; a[7] = 255; } else { a[0] = a0; a[1] = a1; a[2] = (6*a0 + 1*a1 + 3)/7; a[3] = (5*a0 + 2*a1 + 3)/7; a[4] = (4*a0 + 3*a1 + 3)/7; a[5] = (3*a0 + 4*a1 + 3)/7; a[6] = (2*a0 + 5*a1 + 3)/7; a[7] = (1*a0 + 6*a1 + 3)/7; } } } void PrecalculateColor(size_t dxt, const u8* RESTRICT c_block) { // read block contents // .. S3TC reference colors (565 format). the color table is generated // from some combination of these, depending on their ordering. u16 rc[2]; for(int i = 0; i < 2; i++) rc[i] = read_le16(c_block + 2*i); // .. table of 2-bit color selectors c_selectors = read_le32(c_block+4); const bool is_dxt1_special_combination = (dxt == 1 || dxt == DXT1A) && rc[0] <= rc[1]; // c0 and c1 are the values of rc[], converted to 32bpp for(int i = 0; i < 2; i++) { c[i][R] = unpack_to_8(rc[i], 11, 5); c[i][G] = unpack_to_8(rc[i], 5, 6); c[i][B] = unpack_to_8(rc[i], 0, 5); } // c2 and c3 are combinations of c0 and c1: if(is_dxt1_special_combination) { mix_avg(c[2], c[0], c[1]); // c2 = (c0+c1)/2 for(int i = 0; i < 3; i++) c[3][i] = 0; // c3 = black c[3][A] = (dxt == DXT1A)? 0 : 255; // (transparent iff DXT1a) } else { mix_2_3(c[2], c[0], c[1]); // c2 = 2/3*c0 + 1/3*c1 mix_2_3(c[3], c[1], c[0]); // c3 = 1/3*c0 + 2/3*c1 } } // the 4 color choices for each pixel (RGBA) size_t c[4][4]; // c[i][RGBA_component] // (DXT5 only) the 8 alpha choices u8 dxt5_a_tbl[8]; // alpha block; interpretation depends on dxt. u64 a_bits; // table of 2-bit color selectors u32 c_selectors; size_t dxt; }; struct S3tcDecompressInfo { size_t dxt; size_t s3tc_block_size; size_t out_Bpp; u8* out; }; static void s3tc_decompress_level(size_t UNUSED(level), size_t level_w, size_t level_h, const u8* RESTRICT level_data, size_t level_data_size, void* RESTRICT cbData) { S3tcDecompressInfo* di = (S3tcDecompressInfo*)cbData; const size_t dxt = di->dxt; const size_t s3tc_block_size = di->s3tc_block_size; // note: 1x1 images are legitimate (e.g. in mipmaps). they report their // width as such for glTexImage, but the S3TC data is padded to // 4x4 pixel block boundaries. const size_t blocks_w = round_up(level_w, size_t(4)) / 4u; const size_t blocks_h = round_up(level_h, size_t(4)) / 4u; const u8* s3tc_data = level_data; debug_assert(level_data_size % s3tc_block_size == 0); for(size_t block_y = 0; block_y < blocks_h; block_y++) { for(size_t block_x = 0; block_x < blocks_w; block_x++) { S3tcBlock block(dxt, s3tc_data); s3tc_data += s3tc_block_size; size_t pixel_idx = 0; for(int y = 0; y < 4; y++) { // this is ugly, but advancing after x, y and block_y loops // is no better. u8* out = (u8*)di->out + ((block_y*4+y)*blocks_w*4 + block_x*4) * di->out_Bpp; for(int x = 0; x < 4; x++) { block.WritePixel(pixel_idx, out); out += di->out_Bpp; pixel_idx++; } } } } debug_assert(s3tc_data == level_data + level_data_size); di->out += blocks_w*blocks_h * 16 * di->out_Bpp; } // decompress the given image (which is known to be stored as DXTn) // effectively in-place. updates Tex fields. static LibError s3tc_decompress(Tex* t) { // alloc new image memory // notes: // - dxt == 1 is the only non-alpha case. // - adding or stripping alpha channels during transform is not // our job; we merely output the same pixel format as given // (tex.cpp's plain transform could cover it, if ever needed). const size_t dxt = t->flags & TEX_DXT; const size_t out_bpp = (dxt != 1)? 32 : 24; const size_t out_size = tex_img_size(t) * out_bpp / t->bpp; - shared_ptr decompressedData = io_Allocate(out_size); + shared_ptr decompressedData; + AllocateAligned(decompressedData, out_size, pageSize); const size_t s3tc_block_size = (dxt == 3 || dxt == 5)? 16 : 8; S3tcDecompressInfo di = { dxt, s3tc_block_size, out_bpp/8, decompressedData.get() }; const u8* s3tc_data = tex_get_data(t); const int levels_to_skip = (t->flags & TEX_MIPMAPS)? 0 : TEX_BASE_LEVEL_ONLY; tex_util_foreach_mipmap(t->w, t->h, t->bpp, s3tc_data, levels_to_skip, 4, s3tc_decompress_level, &di); t->data = decompressedData; t->dataSize = out_size; t->ofs = 0; t->bpp = out_bpp; t->flags &= ~TEX_DXT; return INFO::OK; } //----------------------------------------------------------------------------- // DDS file format //----------------------------------------------------------------------------- // bit values and structure definitions taken from // http://msdn.microsoft.com/en-us/library/ee417785(VS.85).aspx #pragma pack(push, 1) // DDS_PIXELFORMAT.dwFlags // we've seen some DXT3 files that don't have this set (which is nonsense; // any image lacking alpha should be stored as DXT1). it's authoritative // if fourcc is DXT1 (there's no other way to tell DXT1 and DXT1a apart) // and ignored otherwise. #define DDPF_ALPHAPIXELS 0x00000001 #define DDPF_FOURCC 0x00000004 #define DDPF_RGB 0x00000040 struct DDS_PIXELFORMAT { u32 dwSize; // size of structure (32) u32 dwFlags; // indicates which fields are valid u32 dwFourCC; // (DDPF_FOURCC) FOURCC code, "DXTn" u32 dwRGBBitCount; // (DDPF_RGB) bits per pixel u32 dwRBitMask; u32 dwGBitMask; u32 dwBBitMask; u32 dwABitMask; // (DDPF_ALPHAPIXELS) }; // DDS_HEADER.dwFlags (none are optional) #define DDSD_CAPS 0x00000001 #define DDSD_HEIGHT 0x00000002 #define DDSD_WIDTH 0x00000004 #define DDSD_PITCH 0x00000008 // used when texture is uncompressed #define DDSD_PIXELFORMAT 0x00001000 #define DDSD_MIPMAPCOUNT 0x00020000 #define DDSD_LINEARSIZE 0x00080000 // used when texture is compressed #define DDSD_DEPTH 0x00800000 // DDS_HEADER.dwCaps #define DDSCAPS_MIPMAP 0x00400000 // optional #define DDSCAPS_TEXTURE 0x00001000 // required struct DDS_HEADER { // (preceded by the FOURCC "DDS ") u32 dwSize; // size of structure (124) u32 dwFlags; // indicates which fields are valid u32 dwHeight; // (DDSD_HEIGHT) height of main image (pixels) u32 dwWidth; // (DDSD_WIDTH ) width of main image (pixels) u32 dwPitchOrLinearSize; // (DDSD_LINEARSIZE) size [bytes] of top level // (DDSD_PITCH) bytes per row (%4 = 0) u32 dwDepth; // (DDSD_DEPTH) vol. textures: vol. depth u32 dwMipMapCount; // (DDSD_MIPMAPCOUNT) total # levels u32 dwReserved1[11]; // reserved DDS_PIXELFORMAT ddpf; // (DDSD_PIXELFORMAT) surface description u32 dwCaps; // (DDSD_CAPS) misc. surface flags u32 dwCaps2; u32 dwCaps3; u32 dwCaps4; u32 dwReserved2; // reserved }; #pragma pack(pop) static bool is_valid_dxt(size_t dxt) { switch(dxt) { case 0: case 1: case DXT1A: case 3: case 5: return true; default: return false; } } // extract all information from DDS pixel format and store in bpp, flags. // pf points to the DDS file's header; all fields must be endian-converted // before use. // output parameters invalid on failure. static LibError decode_pf(const DDS_PIXELFORMAT* pf, size_t& bpp, size_t& flags) { bpp = 0; flags = 0; // check struct size if(read_le32(&pf->dwSize) != sizeof(DDS_PIXELFORMAT)) WARN_RETURN(ERR::TEX_INVALID_SIZE); // determine type const size_t pf_flags = (size_t)read_le32(&pf->dwFlags); // .. uncompressed if(pf_flags & DDPF_RGB) { const size_t pf_bpp = (size_t)read_le32(&pf->dwRGBBitCount); const size_t pf_r_mask = (size_t)read_le32(&pf->dwRBitMask); const size_t pf_g_mask = (size_t)read_le32(&pf->dwGBitMask); const size_t pf_b_mask = (size_t)read_le32(&pf->dwBBitMask); const size_t pf_a_mask = (size_t)read_le32(&pf->dwABitMask); // (checked below; must be set in case below warning is to be // skipped) bpp = pf_bpp; if(pf_flags & DDPF_ALPHAPIXELS) { // something weird other than RGBA or BGRA if(pf_a_mask != 0xFF000000) goto unsupported_component_ordering; flags |= TEX_ALPHA; } // make sure component ordering is 0xBBGGRR = RGB (see below) if(pf_r_mask != 0xFF || pf_g_mask != 0xFF00 || pf_b_mask != 0xFF0000) { // DDS_PIXELFORMAT in theory supports any ordering of R,G,B,A. // we need to upload to OpenGL, which can only receive BGR(A) or // RGB(A). the former still requires conversion (done by driver), // so it's slower. since the very purpose of supporting uncompressed // DDS is storing images in a format that requires no processing, // we do not allow any weird orderings that require runtime work. // instead, the artists must export with the correct settings. unsupported_component_ordering: WARN_RETURN(ERR::TEX_FMT_INVALID); } RETURN_ERR(tex_validate_plain_format(bpp, (int)flags)); } // .. compressed else if(pf_flags & DDPF_FOURCC) { // set effective bpp and store DXT format in flags & TEX_DXT. // no endian conversion necessary - FOURCC() takes care of that. switch(pf->dwFourCC) { case FOURCC('D','X','T','1'): bpp = 4; if(pf_flags & DDPF_ALPHAPIXELS) flags |= DXT1A | TEX_ALPHA; else flags |= 1; break; case FOURCC('D','X','T','3'): bpp = 8; flags |= 3; flags |= TEX_ALPHA; // see DDPF_ALPHAPIXELS decl break; case FOURCC('D','X','T','5'): bpp = 8; flags |= 5; flags |= TEX_ALPHA; // see DDPF_ALPHAPIXELS decl break; default: WARN_RETURN(ERR::TEX_FMT_INVALID); } } // .. neither uncompressed nor compressed - invalid else WARN_RETURN(ERR::TEX_FMT_INVALID); return INFO::OK; } // extract all information from DDS header and store in w, h, bpp, flags. // sd points to the DDS file's header; all fields must be endian-converted // before use. // output parameters invalid on failure. static LibError decode_sd(const DDS_HEADER* sd, size_t& w, size_t& h, size_t& bpp, size_t& flags) { // check header size if(read_le32(&sd->dwSize) != sizeof(*sd)) WARN_RETURN(ERR::CORRUPTED); // flags (indicate which fields are valid) const size_t sd_flags = (size_t)read_le32(&sd->dwFlags); // .. not all required fields are present // note: we can't guess dimensions - the image may not be square. const size_t sd_req_flags = DDSD_CAPS|DDSD_HEIGHT|DDSD_WIDTH|DDSD_PIXELFORMAT; if((sd_flags & sd_req_flags) != sd_req_flags) WARN_RETURN(ERR::TEX_INCOMPLETE_HEADER); // image dimensions h = (size_t)read_le32(&sd->dwHeight); w = (size_t)read_le32(&sd->dwWidth); // pixel format RETURN_ERR(decode_pf(&sd->ddpf, bpp, flags)); // if the image is not aligned with the S3TC block size, it is stored // with extra pixels on the bottom left to fill up the space, so we need // to account for those when calculating how big it should be size_t stored_h, stored_w; if(flags & TEX_DXT) { stored_h = round_up(h, size_t(4)); stored_w = round_up(w, size_t(4)); } else { stored_h = h; stored_w = w; } // verify pitch or linear size, if given const size_t pitch = stored_w*bpp/8; const size_t sd_pitch_or_size = (size_t)read_le32(&sd->dwPitchOrLinearSize); if(sd_flags & DDSD_PITCH) { if(sd_pitch_or_size != round_up(pitch, size_t(4))) DEBUG_WARN_ERR(ERR::CORRUPTED); } if(sd_flags & DDSD_LINEARSIZE) { // some DDS tools mistakenly store the total size of all levels, // so allow values close to that as well const ssize_t totalSize = ssize_t(pitch*stored_h*1.333333f); if(sd_pitch_or_size != pitch*stored_h && abs(ssize_t(sd_pitch_or_size)-totalSize) > 64) DEBUG_WARN_ERR(ERR::CORRUPTED); } // note: both flags set would be invalid; no need to check for that, // though, since one of the above tests would fail. // mipmaps if(sd_flags & DDSD_MIPMAPCOUNT) { const size_t mipmap_count = (size_t)read_le32(&sd->dwMipMapCount); if(mipmap_count) { // mipmap chain is incomplete // note: DDS includes the base level in its count, hence +1. if(mipmap_count != ceil_log2(std::max(w,h))+1) WARN_RETURN(ERR::TEX_FMT_INVALID); flags |= TEX_MIPMAPS; } } // check for volume textures if(sd_flags & DDSD_DEPTH) { const size_t depth = (size_t)read_le32(&sd->dwDepth); if(depth) WARN_RETURN(ERR::NOT_IMPLEMENTED); } // check caps // .. this is supposed to be set, but don't bail if not (pointless) debug_assert(sd->dwCaps & DDSCAPS_TEXTURE); // .. sanity check: warn if mipmap flag not set (don't bail if not // because we've already made the decision). const bool mipmap_cap = (sd->dwCaps & DDSCAPS_MIPMAP) != 0; const bool mipmap_flag = (flags & TEX_MIPMAPS) != 0; debug_assert(mipmap_cap == mipmap_flag); // note: we do not check for cubemaps and volume textures (not supported) // because the file may still have useful data we can read. return INFO::OK; } //----------------------------------------------------------------------------- static bool dds_is_hdr(const u8* file) { return *(u32*)file == FOURCC('D','D','S',' '); } static bool dds_is_ext(const OsPath& extension) { return extension == L".dds"; } static size_t dds_hdr_size(const u8* UNUSED(file)) { return 4+sizeof(DDS_HEADER); } static LibError dds_decode(DynArray* RESTRICT da, Tex* RESTRICT t) { u8* file = da->base; const DDS_HEADER* sd = (const DDS_HEADER*)(file+4); RETURN_ERR(decode_sd(sd, t->w, t->h, t->bpp, t->flags)); return INFO::OK; } static LibError dds_encode(Tex* RESTRICT UNUSED(t), DynArray* RESTRICT UNUSED(da)) { // note: do not return ERR::NOT_IMPLEMENTED et al. because that would // break tex_write (which assumes either this, 0 or errors are returned). return INFO::TEX_CODEC_CANNOT_HANDLE; } TIMER_ADD_CLIENT(tc_dds_transform); static LibError dds_transform(Tex* t, size_t transforms) { TIMER_ACCRUE(tc_dds_transform); size_t mipmaps = t->flags & TEX_MIPMAPS; size_t dxt = t->flags & TEX_DXT; debug_assert(is_valid_dxt(dxt)); const size_t transform_mipmaps = transforms & TEX_MIPMAPS; const size_t transform_dxt = transforms & TEX_DXT; // requesting removal of mipmaps if(mipmaps && transform_mipmaps) { // we don't need to actually change anything except the flag - the // mipmap levels will just be treated as trailing junk t->flags &= ~TEX_MIPMAPS; return INFO::OK; } // requesting decompression if(dxt && transform_dxt) { RETURN_ERR(s3tc_decompress(t)); return INFO::OK; } // both are DXT (unsupported; there are no flags we can change while // compressed) or requesting compression (not implemented) or // both not DXT (nothing we can do) - bail. return INFO::TEX_CODEC_CANNOT_HANDLE; } TEX_CODEC_REGISTER(dds); Index: ps/trunk/source/lib/allocators/pool.cpp =================================================================== --- ps/trunk/source/lib/allocators/pool.cpp (revision 9349) +++ ps/trunk/source/lib/allocators/pool.cpp (revision 9350) @@ -1,126 +1,131 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * pool allocator */ #include "precompiled.h" #include "lib/allocators/pool.h" #include "lib/allocators/mem_util.h" +#include "lib/timer.h" + +TIMER_ADD_CLIENT(tc_pool_alloc); + LibError pool_create(Pool* p, size_t max_size, size_t el_size) { if(el_size == POOL_VARIABLE_ALLOCS) p->el_size = 0; else p->el_size = mem_RoundUpToAlignment(el_size); p->freelist = mem_freelist_Sentinel(); RETURN_ERR(da_alloc(&p->da, max_size)); return INFO::OK; } LibError pool_destroy(Pool* p) { // don't be picky and complain if the freelist isn't empty; // we don't care since it's all part of the da anyway. // however, zero it to prevent further allocs from succeeding. p->freelist = mem_freelist_Sentinel(); return da_free(&p->da); } bool pool_contains(const Pool* p, void* el) { // outside of our range if(!(p->da.base <= el && el < p->da.base+p->da.pos)) return false; // sanity check: it should be aligned (if pool has fixed-size elements) if(p->el_size) debug_assert((uintptr_t)((u8*)el - p->da.base) % p->el_size == 0); return true; } void* pool_alloc(Pool* p, size_t size) { + TIMER_ACCRUE(tc_pool_alloc); // if pool allows variable sizes, go with the size parameter, // otherwise the pool el_size setting. const size_t el_size = p->el_size? p->el_size : mem_RoundUpToAlignment(size); // note: this can never happen in pools with variable-sized elements // because they disallow pool_free. void* el = mem_freelist_Detach(p->freelist); if(el) goto have_el; // alloc a new entry { // expand, if necessary if(da_reserve(&p->da, el_size) < 0) return 0; el = p->da.base + p->da.pos; p->da.pos += el_size; } have_el: debug_assert(pool_contains(p, el)); // paranoia return el; } void pool_free(Pool* p, void* el) { // only allowed to free items if we were initialized with // fixed el_size. (this avoids having to pass el_size here and // check if requested_size matches that when allocating) if(p->el_size == 0) { debug_assert(0); // cannot free variable-size items return; } if(pool_contains(p, el)) mem_freelist_AddToFront(p->freelist, el); else debug_assert(0); // invalid pointer (not in pool) } void pool_free_all(Pool* p) { p->freelist = mem_freelist_Sentinel(); // must be reset before da_set_size or CHECK_DA will complain. p->da.pos = 0; da_set_size(&p->da, 0); } size_t pool_committed(Pool* p) { return p->da.cur_size; } Index: ps/trunk/source/lib/sysdep/smbios.cpp =================================================================== --- ps/trunk/source/lib/sysdep/smbios.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/smbios.cpp (revision 9350) @@ -1,694 +1,694 @@ /* Copyright (c) 2011 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * provide access to System Management BIOS information */ #include "precompiled.h" #include "lib/sysdep/smbios.h" #include "lib/bits.h" #include "lib/byte_order.h" // FOURCC_BE #include "lib/module_init.h" #if OS_WIN # include "lib/sysdep/os/win/wutil.h" # include "lib/sysdep/os/win/wfirmware.h" #endif namespace SMBIOS { //----------------------------------------------------------------------------- // GetTable #if OS_WIN static LibError GetTable(wfirmware::Table& table) { // (MSDN mentions 'RSMB', but multi-character literals are implementation-defined.) const DWORD provider = FOURCC_BE('R','S','M','B'); // (MSDN says this will be 0, but we'll retrieve it for 100% correctness.) wfirmware::TableIds tableIds = wfirmware::GetTableIDs(provider); if(tableIds.empty()) return ERR::_1; // NOWARN (happens on 32-bit XP) table = wfirmware::GetTable(provider, tableIds[0]); if(table.empty()) WARN_RETURN(ERR::_2); // strip the WmiHeader struct WmiHeader { u8 used20CallingMethod; u8 majorVersion; u8 minorVersion; u8 dmiRevision; u32 length; }; const WmiHeader* wmiHeader = (const WmiHeader*)&table[0]; debug_assert(table.size() == sizeof(WmiHeader) + wmiHeader->length); memmove(&table[0], &table[sizeof(WmiHeader)], table.size()-sizeof(WmiHeader)); return INFO::OK; } #endif // OS_WIN //----------------------------------------------------------------------------- // strings // pointers to the strings (if any) at the end of an SMBIOS structure typedef std::vector Strings; static Strings ExtractStrings(const Header* header, const char* end, const Header*& next) { Strings strings; const char* pos = ((const char*)header) + header->length; while(pos <= end-2) { if(*pos == '\0') { pos++; if(*pos == 0) { pos++; break; } } strings.push_back(pos); pos += strlen(pos); } next = (const Header*)pos; return strings; } // storage for all structures' strings (must be copied from the original // wfirmware table since its std::vector container cannot be stored in a // static variable because we may be called before _cinit) static char* stringStorage; static char* stringStoragePos; // pointers to dynamically allocated structures static Structures structures; static void Cleanup() // called via atexit { SAFE_FREE(stringStorage); stringStoragePos = 0; // free each allocated structure #define STRUCTURE(name, id) SAFE_FREE(structures.name##_); STRUCTURES #undef STRUCTURE } //----------------------------------------------------------------------------- // FieldInitializer // define function templates that invoke a Visitor for each of a structure's fields #define FIELD(flags, type, name, units) visitor(flags, p.name, #name, units); #define STRUCTURE(name, id) template void VisitFields(name& p, Visitor& visitor) { name##_FIELDS } STRUCTURES #undef STRUCTURE #undef FIELD // initialize each of a structure's fields by copying from the SMBIOS data class FieldInitializer { NONCOPYABLE(FieldInitializer); // reference member public: FieldInitializer(const Header* header, const Strings& strings) : data((const u8*)(header+1)) , end((const u8*)header + header->length) , strings(strings) { } template void operator()(size_t flags, Field& field, const char* UNUSED(name), const char* UNUSED(units)) { if((flags & F_DERIVED) || data >= end) { field = Field(); return; } Read(field, 0); // SFINAE } private: template T ReadValue() { T value; memcpy(&value, data, sizeof(value)); data += sizeof(value); return value; } // construct from SMBIOS representations that don't match the // actual type (e.g. enum) template void Read(Field& field, typename Field::T*) { field = Field(ReadValue()); } template void Read(Field& field, ...) { field = ReadValue(); } const u8* data; const u8* end; const Strings& strings; }; // C++03 14.7.3(2): "An explicit specialization shall be declared [..] in the // namespace of which the enclosing class [..] is a member. // (this specialization avoids a "forcing value to bool true or false" warning) template<> void FieldInitializer::operator()(size_t flags, bool& UNUSED(t), const char* UNUSED(name), const char* UNUSED(units)) { // SMBIOS doesn't specify any individual booleans, so we're only called for // derived fields and don't need to do anything. debug_assert(flags & F_DERIVED); } template<> void FieldInitializer::operator()(size_t flags, const char*& t, const char* UNUSED(name), const char* UNUSED(units)) { t = 0; // (allow immediate `return' when the string is found to be invalid) u8 number; operator()(flags, number, 0, 0); if(number == 0) // no string given return; if(number > strings.size()) { debug_printf(L"SMBIOS: invalid string number %d (count=%d)\n", number, strings.size()); return; } // copy to stringStorage strcpy(stringStoragePos, strings[number-1]); t = stringStoragePos; stringStoragePos += strlen(t)+1; } //----------------------------------------------------------------------------- // Fixup (e.g. compute derived fields) template void Fixup(Structure& UNUSED(structure)) { // primary template: do nothing } template<> void Fixup(Bios& p) { p.size = size_t(p.encodedSize+1) * 64*KiB; } template<> void Fixup(Processor& p) { p.populated = (p.status & 0x40) != 0; p.status = (ProcessorStatus)bits(p.status, 0, 2); if(p.voltage & 0x80) p.voltage &= ~0x80; else { // (arbitrarily) report the lowest supported value if(IsBitSet(p.voltage, 0)) p.voltage = 50; if(IsBitSet(p.voltage, 1)) p.voltage = 33; if(IsBitSet(p.voltage, 2)) p.voltage = 29; } } template<> void Fixup(Cache& p) { struct DecodeSize { u64 operator()(u16 size) const { const size_t granularity = IsBitSet(size, 15)? 64*KiB : 1*KiB; return u64(bits(size, 0, 14)) * granularity; } }; p.maxSize = DecodeSize()(p.maxSize16); p.installedSize = DecodeSize()(p.installedSize16); p.level = bits(p.configuration, 0, 2)+1; p.location = (CacheLocation)bits(p.configuration, 5, 6); p.mode = (CacheMode)bits(p.configuration, 8, 9); p.configuration = (CacheConfigurationFlags)(p.configuration & ~0x367); } template<> void Fixup(SystemSlot& p) { // (only initialize function and device numbers if functionAndDeviceNumber is valid) if(p.functionAndDeviceNumber != 0xFF) { p.functionNumber = bits(p.functionAndDeviceNumber, 0, 2); p.deviceNumber = bits(p.functionAndDeviceNumber, 3, 7); } } template<> void Fixup(OnBoardDevices& p) { p.enabled = (p.type.value & 0x80) != 0; p.type = (OnBoardDeviceType)(p.type & ~0x80); } template<> void Fixup(MemoryArray& p) { if(p.maxCapacity32 != (u32)INT32_MIN) p.maxCapacity = u64(p.maxCapacity32) * KiB; } template<> void Fixup(MemoryDevice& p) { if(p.size16 != INT16_MAX) p.size = u64(bits(p.size16, 0, 14)) * (IsBitSet(p.size16, 15)? 1*KiB : 1*MiB); else p.size = u64(bits(p.size32, 0, 30)) * MiB; p.rank = bits(p.attributes, 0, 3); } template<> void Fixup(MemoryArrayMappedAddress& p) { if(p.startAddress32 != UINT32_MAX) p.startAddress = u64(p.startAddress32) * KiB; if(p.endAddress32 != UINT32_MAX) p.endAddress = u64(p.endAddress32) * KiB; } template<> void Fixup(MemoryDeviceMappedAddress& p) { if(p.startAddress32 != UINT32_MAX) p.startAddress = u64(p.startAddress32) * KiB; if(p.endAddress32 != UINT32_MAX) p.endAddress = u64(p.endAddress32) * KiB; } template<> void Fixup(VoltageProbe& p) { p.location = (VoltageProbeLocation)bits(p.locationAndStatus, 0, 4); p.status = (Status)bits(p.locationAndStatus, 5, 7); } template<> void Fixup(CoolingDevice& p) { p.type = (CoolingDeviceType)bits(p.typeAndStatus, 0, 4); p.status = (Status)bits(p.typeAndStatus, 5, 7); } template<> void Fixup(TemperatureProbe& p) { p.location = (TemperatureProbeLocation)bits(p.locationAndStatus, 0, 4); p.status = (Status)bits(p.locationAndStatus, 5, 7); } //----------------------------------------------------------------------------- // InitStructures template void AddStructure(const Header* header, const Strings& strings, Structure*& listHead) { Structure* const p = (Structure*)calloc(1, sizeof(Structure)); // freed in Cleanup p->header = *header; if(listHead) { // insert at end of list to preserve order of caches/slots Structure* last = listHead; while(last->next) last = last->next; last->next = p; } else listHead = p; FieldInitializer fieldInitializer(header, strings); VisitFields(*p, fieldInitializer); Fixup(*p); } static LibError InitStructures() { #if OS_WIN wfirmware::Table table; RETURN_ERR(GetTable(table)); #else std::vector table; return ERR::NOT_IMPLEMENTED; #endif // (instead of counting the total string size, just use the // SMBIOS size - typically 1-2 KB - as an upper bound.) stringStoragePos = stringStorage = (char*)calloc(table.size(), sizeof(char)); // freed in Cleanup if(!stringStorage) WARN_RETURN(ERR::NO_MEM); atexit(Cleanup); const Header* header = (const Header*)&table[0]; const Header* const end = (const Header*)(&table[0] + table.size()); for(;;) { if(header+1 > end) { debug_printf(L"SMBIOS: table not terminated\n"); break; } if(header->id == 127) // end break; if(header->length < sizeof(Header)) WARN_RETURN(ERR::_3); const Header* next; const Strings strings = ExtractStrings(header, (const char*)end, next); switch(header->id) { #define STRUCTURE(name, id) case id: AddStructure(header, strings, structures.name##_); break; STRUCTURES #undef STRUCTURE default: if(32 < header->id && header->id < 126) // only mention non-proprietary structures of which we are not aware debug_printf(L"SMBIOS: unknown structure type %d\n", header->id); break; } header = next; } return INFO::OK; } //----------------------------------------------------------------------------- // StringFromEnum template std::string StringFromEnum(Enum UNUSED(field)) { return "(unknown enumeration)"; } #define ENUM(enumerator, VALUE)\ if(field.value == VALUE) /* single bit flag or matching enumerator */\ return #enumerator;\ if(!is_pow2(VALUE)) /* these aren't bit flags */\ {\ allowFlags = false;\ string.clear();\ }\ if(allowFlags && (field.value & (VALUE)))\ {\ if(!string.empty())\ string += "|";\ string += #enumerator;\ } #define ENUMERATION(name, type)\ template<>\ std::string StringFromEnum(name field)\ {\ std::string string;\ bool allowFlags = true;\ name##_ENUMERATORS\ /* (don't warn about the value 0, e.g. optional fields) */\ if(string.empty() && field != 0)\ {\ std::stringstream ss;\ ss << "(unknown " << #name << " " << field.value << ")";\ return ss.str();\ }\ return string;\ } ENUMERATIONS #undef ENUMERATION #undef ENUM //----------------------------------------------------------------------------- // FieldStringizer class FieldStringizer { NONCOPYABLE(FieldStringizer); // reference member public: FieldStringizer(std::stringstream& ss) : ss(ss) { } template void operator()(size_t flags, Field& field, const char* name, const char* units) { if(flags & F_INTERNAL) return; Write(flags, field, name, units, 0); // SFINAE } // special case for sizes [bytes] template void operator()(size_t flags, Size& size, const char* name, const char* units) { if(flags & F_INTERNAL) return; const u64 value = (u64)size.value; if(value == 0) return; u64 divisor; if(value > GiB) { divisor = GiB; units = " GiB"; } else if(value > MiB) { divisor = MiB; units = " MiB"; } else if(value > KiB) { divisor = KiB; units = " KiB"; } else { divisor = 1; units = " bytes"; } WriteName(name); // (avoid floating-point output unless division would truncate the value) if(value % divisor == 0) ss << (value/divisor); else ss << (double(value)/divisor); WriteUnits(units); } private: void WriteName(const char* name) { ss << " "; // indent ss << name << ": "; } void WriteUnits(const char* units) { ss << units; ss << "\n"; } // enumerations and bit flags template void Write(size_t UNUSED(flags), Field& field, const char* name, const char* units, typename Field::Enum*) { // 0 usually means "not included in structure", but some packed // enumerations actually use that value. therefore, only skip this // field if it is zero AND no matching enumerator is found. const std::string string = StringFromEnum(field); if(string.empty()) return; WriteName(name); ss << StringFromEnum(field); WriteUnits(units); } // all other field types template void Write(size_t flags, Field& field, const char* name, const char* units, ...) { // SMBIOS uses the smallest and sometimes also largest representable // signed/unsigned value to indicate `unknown' (except enumerators - // but those are handled in the other function overload), so skip them. if(field == std::numeric_limits::min() || field == std::numeric_limits::max()) return; WriteName(name); if(flags & F_HEX) ss << std::hex << std::uppercase; if(sizeof(field) == 1) // avoid printing as a character ss << unsigned(field); else ss << field; if(flags & F_HEX) ss << std::dec; // (revert to decimal, e.g. for displaying sizes) WriteUnits(units); } std::stringstream& ss; }; template<> void FieldStringizer::operator()(size_t flags, bool& value, const char* name, const char* units) { if(flags & F_INTERNAL) return; WriteName(name); ss << (value? "true" : "false"); WriteUnits(units); } template<> void FieldStringizer::operator()(size_t flags, Handle& handle, const char* name, const char* units) { if(flags & F_INTERNAL) return; // don't display useless handles if(handle.value == 0 || handle.value == 0xFFFE || handle.value == 0xFFFF) return; WriteName(name); ss << handle.value; WriteUnits(units); } template<> void FieldStringizer::operator()(size_t flags, const char*& value, const char* name, const char* units) { if(flags & F_INTERNAL) return; // don't display useless strings if(value == 0) return; std::string string(value); const size_t lastChar = string.find_last_not_of(' '); if(lastChar == std::string::npos) // nothing but spaces return; string.resize(lastChar+1); // strip trailing spaces - if(string == "To Be Filled By O.E.M.") + if(!stricmp(value, "To Be Filled By O.E.M.")) return; WriteName(name); ss << "\"" << string << "\""; WriteUnits(units); } //----------------------------------------------------------------------------- // public interface const Structures* GetStructures() { static ModuleInitState initState; LibError ret = ModuleInit(&initState, InitStructures); // (callers have to check if member pointers are nonzero anyway, so // we always return a valid pointer to simplify most use cases.) UNUSED2(ret); return &structures; } template void StringizeStructure(const char* name, Structure* p, std::stringstream& ss) { for(; p; p = p->next) { ss << "\n[" << name << "]\n"; FieldStringizer fieldStringizer(ss); VisitFields(*p, fieldStringizer); } } std::string StringizeStructures(const Structures* structures) { std::stringstream ss; #define STRUCTURE(name, id) StringizeStructure(#name, structures->name##_, ss); STRUCTURES #undef STRUCTURE return ss.str(); } } // namespace SMBIOS Index: ps/trunk/source/lib/res/sound/ogg.cpp =================================================================== --- ps/trunk/source/lib/res/sound/ogg.cpp (revision 9349) +++ ps/trunk/source/lib/res/sound/ogg.cpp (revision 9350) @@ -1,282 +1,283 @@ #include "precompiled.h" #include "ogg.h" #include "lib/external_libraries/openal.h" #include "lib/external_libraries/vorbis.h" #include "lib/byte_order.h" -#include "lib/file/file.h" +#include "lib/file/io/io.h" #include "lib/file/file_system_util.h" static LibError LibErrorFromVorbis(int err) { switch(err) { case 0: return INFO::OK; case OV_HOLE: return ERR::AGAIN; case OV_EREAD: return ERR::IO; case OV_EFAULT: return ERR::LOGIC; case OV_EIMPL: return ERR::NOT_IMPLEMENTED; case OV_EINVAL: return ERR::INVALID_PARAM; case OV_ENOTVORBIS: return ERR::NOT_SUPPORTED; case OV_EBADHEADER: return ERR::CORRUPTED; case OV_EVERSION: return ERR::VERSION; case OV_ENOTAUDIO: return ERR::_1; case OV_EBADPACKET: return ERR::_2; case OV_EBADLINK: return ERR::_3; case OV_ENOSEEK: return ERR::_4; default: return ERR::FAIL; } } //----------------------------------------------------------------------------- class VorbisFileAdapter { public: VorbisFileAdapter(const PFile& openedFile) : file(openedFile) , size(fs_util::FileSize(openedFile->Pathname())) , offset(0) { } static size_t Read(void* bufferToFill, size_t itemSize, size_t numItems, void* context) { VorbisFileAdapter* adapter = (VorbisFileAdapter*)context; const off_t sizeRequested = numItems*itemSize; const off_t sizeRemaining = adapter->size - adapter->offset; const size_t sizeToRead = (size_t)std::min(sizeRequested, sizeRemaining); - if(adapter->file->Read(adapter->offset, (u8*)bufferToFill, sizeToRead) == INFO::OK) + io::Operation op(*adapter->file.get(), bufferToFill, sizeToRead, adapter->offset); + if(io::Run(op) == INFO::OK) { adapter->offset += sizeToRead; return sizeToRead; } errno = EIO; return 0; } static int Seek(void* context, ogg_int64_t offset, int whence) { VorbisFileAdapter* adapter = (VorbisFileAdapter*)context; off_t origin = 0; switch(whence) { case SEEK_SET: origin = 0; break; case SEEK_CUR: origin = adapter->offset; break; case SEEK_END: origin = adapter->size+1; break; NODEFAULT; } adapter->offset = Clamp(off_t(origin+offset), off_t(0), adapter->size); return 0; } static int Close(void* context) { VorbisFileAdapter* adapter = (VorbisFileAdapter*)context; adapter->file.reset(); return 0; // return value is ignored } static long Tell(void* context) { VorbisFileAdapter* adapter = (VorbisFileAdapter*)context; return adapter->offset; } private: PFile file; off_t size; off_t offset; }; //----------------------------------------------------------------------------- class VorbisBufferAdapter { public: VorbisBufferAdapter(const shared_ptr& buffer, size_t size) : buffer(buffer) , size(size) , offset(0) { } static size_t Read(void* bufferToFill, size_t itemSize, size_t numItems, void* context) { VorbisBufferAdapter* adapter = (VorbisBufferAdapter*)context; const off_t sizeRequested = numItems*itemSize; const off_t sizeRemaining = adapter->size - adapter->offset; const size_t sizeToRead = (size_t)std::min(sizeRequested, sizeRemaining); memcpy(bufferToFill, adapter->buffer.get() + adapter->offset, sizeToRead); adapter->offset += sizeToRead; return sizeToRead; } static int Seek(void* context, ogg_int64_t offset, int whence) { VorbisBufferAdapter* adapter = (VorbisBufferAdapter*)context; off_t origin = 0; switch(whence) { case SEEK_SET: origin = 0; break; case SEEK_CUR: origin = adapter->offset; break; case SEEK_END: origin = adapter->size+1; break; NODEFAULT; } adapter->offset = Clamp(off_t(origin+offset), off_t(0), adapter->size); return 0; } static int Close(void* context) { VorbisBufferAdapter* adapter = (VorbisBufferAdapter*)context; adapter->buffer.reset(); return 0; // return value is ignored } static long Tell(void* context) { VorbisBufferAdapter* adapter = (VorbisBufferAdapter*)context; return adapter->offset; } private: shared_ptr buffer; off_t size; off_t offset; }; //----------------------------------------------------------------------------- template class OggStreamImpl : public OggStream { public: OggStreamImpl(const Adapter& adapter) : adapter(adapter) { } LibError Open() { ov_callbacks callbacks; callbacks.read_func = Adapter::Read; callbacks.close_func = Adapter::Close; callbacks.seek_func = Adapter::Seek; callbacks.tell_func = Adapter::Tell; const int ret = ov_open_callbacks(&adapter, &vf, 0, 0, callbacks); if(ret != 0) WARN_RETURN(LibErrorFromVorbis(ret)); const int link = -1; // retrieve info for current bitstream info = ov_info(&vf, link); if(!info) WARN_RETURN(ERR::INVALID_HANDLE); return INFO::OK; } virtual ALenum Format() { return (info->channels == 1)? AL_FORMAT_MONO16 : AL_FORMAT_STEREO16; } virtual ALsizei SamplingRate() { return info->rate; } virtual LibError GetNextChunk(u8* buffer, size_t size) { // we may have to call ov_read multiple times because it // treats the buffer size "as a limit and not a request" size_t bytesRead = 0; for(;;) { const int isBigEndian = (BYTE_ORDER == BIG_ENDIAN); const int wordSize = sizeof(i16); const int isSigned = 1; int bitstream; // unused const int ret = ov_read(&vf, (char*)buffer+bytesRead, int(size-bytesRead), isBigEndian, wordSize, isSigned, &bitstream); if(ret == 0) // EOF return (LibError)bytesRead; else if(ret < 0) WARN_RETURN(LibErrorFromVorbis(ret)); else // success { bytesRead += ret; if(bytesRead == size) return (LibError)bytesRead; } } } private: Adapter adapter; OggVorbis_File vf; vorbis_info* info; }; //----------------------------------------------------------------------------- LibError OpenOggStream(const OsPath& pathname, OggStreamPtr& stream) { PFile file(new File); RETURN_ERR(file->Open(pathname, L'r')); shared_ptr > tmp(new OggStreamImpl(VorbisFileAdapter(file))); RETURN_ERR(tmp->Open()); stream = tmp; return INFO::OK; } LibError OpenOggNonstream(const PIVFS& vfs, const VfsPath& pathname, OggStreamPtr& stream) { shared_ptr contents; size_t size; RETURN_ERR(vfs->LoadFile(pathname, contents, size)); shared_ptr > tmp(new OggStreamImpl(VorbisBufferAdapter(contents, size))); RETURN_ERR(tmp->Open()); stream = tmp; return INFO::OK; } Index: ps/trunk/source/lib/tex/tex_internal.h =================================================================== --- ps/trunk/source/lib/tex/tex_internal.h (revision 9349) +++ ps/trunk/source/lib/tex/tex_internal.h (revision 9350) @@ -1,60 +1,60 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * private texture loader helper functions */ #ifndef INCLUDED_TEX_INTERNAL #define INCLUDED_TEX_INTERNAL #include "lib/allocators/dynarray.h" -#include "lib/file/io/io.h" // io_Allocate +#include "lib/file/io/io.h" // io::Allocate /** * check if the given texture format is acceptable: 8bpp grey, * 24bpp color or 32bpp color+alpha (BGR / upside down are permitted). * basically, this is the "plain" format understood by all codecs and * tex_codec_plain_transform. * @param bpp bits per pixel * @param flags TexFlags * @return LibError **/ extern LibError tex_validate_plain_format(size_t bpp, size_t flags); /** * indicate if the two vertical orientations match. * * used by tex_codec. * * @param src_flags TexFlags, used to extract the orientation. * we ask for this instead of src_orientation so callers don't have to * mask off TEX_ORIENTATION. * @param dst_orientation orientation to compare against. * can be one of TEX_BOTTOM_UP, TEX_TOP_DOWN, or 0 for the * "global orientation". * @return bool **/ extern bool tex_orientations_match(size_t src_flags, size_t dst_orientation); #endif // #ifndef INCLUDED_TEX_INTERNAL Index: ps/trunk/source/lib/allocators/shared_ptr.h =================================================================== --- ps/trunk/source/lib/allocators/shared_ptr.h (revision 9349) +++ ps/trunk/source/lib/allocators/shared_ptr.h (revision 9350) @@ -1,79 +1,84 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef INCLUDED_SHARED_PTR #define INCLUDED_SHARED_PTR -#include "lib/sysdep/arch/x86_x64/cache.h" +#include "lib/alignment.h" #include "lib/sysdep/rtl.h" // rtl_AllocateAligned struct DummyDeleter { template void operator()(T*) { } }; template inline shared_ptr DummySharedPtr(T* ptr) { return shared_ptr(ptr, DummyDeleter()); } struct ArrayDeleter { template void operator()(T* p) { delete[] p; } }; struct FreeDeleter { template void operator()(T* p) { free(p); } }; // (note: uses CheckedArrayDeleter) LIB_API shared_ptr Allocate(size_t size); + struct AlignedDeleter { template void operator()(T* t) { rtl_FreeAligned(t); } }; template -inline shared_ptr AllocateAligned(size_t size) +static inline LibError AllocateAligned(shared_ptr& p, size_t size, size_t alignment = cacheLineSize) { - return shared_ptr((T*)rtl_AllocateAligned(size, x86_x64_Caches(L2D)->entrySize), AlignedDeleter()); + void* mem = rtl_AllocateAligned(size, alignment); + if(!mem) + WARN_RETURN(ERR::NO_MEM); + p.reset((T*)mem, AlignedDeleter()); + return INFO::OK; } #endif // #ifndef INCLUDED_SHARED_PTR Index: ps/trunk/source/ps/Util.cpp =================================================================== --- ps/trunk/source/ps/Util.cpp (revision 9349) +++ ps/trunk/source/ps/Util.cpp (revision 9350) @@ -1,401 +1,401 @@ /* Copyright (C) 2009 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" #include "ps/Util.h" #include "lib/posix/posix_utsname.h" #include "lib/posix/posix_sock.h" #include "lib/ogl.h" #include "lib/timer.h" #include "lib/bits.h" // round_up #include "lib/allocators/shared_ptr.h" #include "lib/sysdep/sysdep.h" // sys_OpenFile #include "lib/sysdep/gfx.h" #include "lib/sysdep/snd.h" #include "lib/sysdep/cpu.h" #include "lib/sysdep/os_cpu.h" #include "lib/sysdep/arch/x86_x64/topology.h" #include "lib/sysdep/smbios.h" #include "lib/tex/tex.h" -#include "lib/file/io/io_align.h" // BLOCK_SIZE #include "ps/GameSetup/Config.h" #include "ps/GameSetup/GameSetup.h" #include "ps/Game.h" #include "ps/CLogger.h" #include "ps/Filesystem.h" #include "ps/VideoMode.h" #include "renderer/Renderer.h" #include "maths/MathUtil.h" #include "graphics/GameView.h" extern CStrW g_CursorName; static std::string SplitExts(const char *exts) { std::string str = exts; std::string ret = ""; size_t idx = str.find_first_of(" "); while(idx != std::string::npos) { if(idx >= str.length() - 1) { ret += str; break; } ret += str.substr(0, idx); ret += "\n"; str = str.substr(idx + 1); idx = str.find_first_of(" "); } return ret; } void WriteSystemInfo() { TIMER(L"write_sys_info"); // get_cpu_info and gfx_detect already called during init - see call site snd_detect(); struct utsname un; uname(&un); OsPath pathname = psLogDir()/"system_info.txt"; FILE* f = sys_OpenFile(pathname, "w"); if(!f) return; // current timestamp (redundant WRT OS timestamp, but that is not // visible when people are posting this file's contents online) { wchar_t timestampBuf[100] = {'\0'}; time_t seconds; time(&seconds); struct tm* t = gmtime(&seconds); const size_t charsWritten = wcsftime(timestampBuf, ARRAY_SIZE(timestampBuf), L"(generated %Y-%m-%d %H:%M:%S UTC)", t); debug_assert(charsWritten != 0); fprintf(f, "%ls\n\n", timestampBuf); } // OS fprintf(f, "OS : %s %s (%s)\n", un.sysname, un.release, un.version); // CPU fprintf(f, "CPU : %s, %s (%dx%dx%d)", un.machine, cpu_IdentifierString(), (int)cpu_topology_NumPackages(), (int)cpu_topology_CoresPerPackage(), (int)cpu_topology_LogicalPerCore()); const double cpu_freq = os_cpu_ClockFrequency(); if(cpu_freq != 0.0f) { if(cpu_freq < 1e9) fprintf(f, ", %.2f MHz\n", cpu_freq*1e-6); else fprintf(f, ", %.2f GHz\n", cpu_freq*1e-9); } else fprintf(f, "\n"); // memory fprintf(f, "Memory : %u MiB; %u MiB free\n", (unsigned)os_cpu_MemorySize(), (unsigned)os_cpu_MemoryAvailable()); // graphics fprintf(f, "Graphics Card : %ls\n", gfx_card); fprintf(f, "OpenGL Drivers : %s; %ls\n", glGetString(GL_VERSION), gfx_drv_ver); fprintf(f, "Video Mode : %dx%d:%d\n", g_VideoMode.GetXRes(), g_VideoMode.GetYRes(), g_VideoMode.GetBPP()); // sound fprintf(f, "Sound Card : %ls\n", snd_card); fprintf(f, "Sound Drivers : %ls\n", snd_drv_ver); // // network name / ips // // note: can't use un.nodename because it is for an // "implementation-defined communications network". char hostname[128] = "(unknown)"; (void)gethostname(hostname, sizeof(hostname)-1); // -1 makes sure it's 0-terminated. if the function fails, // we display "(unknown)" and will skip IP output below. fprintf(f, "Network Name : %s", hostname); { // ignore exception here - see https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=114032 hostent* host = gethostbyname(hostname); if(!host) goto no_ip; struct in_addr** ips = (struct in_addr**)host->h_addr_list; if(!ips) goto no_ip; // output all IPs (> 1 if using VMware or dual ethernet) fprintf(f, " ("); for(size_t i = 0; i < 256 && ips[i]; i++) // safety { // separate entries but avoid trailing comma if(i != 0) fprintf(f, ", "); fprintf(f, "%s", inet_ntoa(*ips[i])); } fprintf(f, ")"); } no_ip: fprintf(f, "\n"); // OpenGL extensions (write them last, since it's a lot of text) const char* exts = ogl_ExtensionString(); if (!exts) exts = "{unknown}"; fprintf(f, "\nOpenGL Extensions: \n%s\n", SplitExts(exts).c_str()); // System Management BIOS (even more text than OpenGL extensions) std::string smbios = SMBIOS::StringizeStructures(SMBIOS::GetStructures()); fprintf(f, "\nSMBIOS: \n%s\n", smbios.c_str()); fclose(f); f = 0; } // not thread-safe! static const wchar_t* HardcodedErrorString(int err) { static wchar_t description[200]; error_description_r((LibError)err, description, ARRAY_SIZE(description)); return description; } // not thread-safe! const wchar_t* ErrorString(int err) { // language file not available (yet) return HardcodedErrorString(err); // TODO: load from language file } // write the specified texture to disk. // note: cannot be made const because the image may have to be // transformed to write it out in the format determined by 's extension. LibError tex_write(Tex* t, const VfsPath& filename) { DynArray da; RETURN_ERR(tex_encode(t, filename.Extension(), &da)); // write to disk LibError ret = INFO::OK; { - (void)da_set_size(&da, round_up(da.cur_size, BLOCK_SIZE)); shared_ptr file = DummySharedPtr(da.base); const ssize_t bytes_written = g_VFS->CreateFile(filename, file, da.pos); if(bytes_written > 0) debug_assert(bytes_written == (ssize_t)da.pos); else ret = (LibError)bytes_written; } (void)da_free(&da); return ret; } static size_t s_nextScreenshotNumber; // identifies the file format that is to be written // (case-insensitive). examples: "bmp", "png", "jpg". // BMP is good for quick output at the expense of large files. void WriteScreenshot(const VfsPath& extension) { // get next available numbered filename // note: %04d -> always 4 digits, so sorting by filename works correctly. const VfsPath basenameFormat(L"screenshots/screenshot%04d"); const VfsPath filenameFormat = basenameFormat.ChangeExtension(extension); VfsPath filename; fs_util::NextNumberedFilename(g_VFS, filenameFormat, s_nextScreenshotNumber, filename); const size_t w = (size_t)g_xres, h = (size_t)g_yres; const size_t bpp = 24; GLenum fmt = GL_RGB; int flags = TEX_BOTTOM_UP; // we want writing BMP to be as fast as possible, // so read data from OpenGL in BMP format to obviate conversion. if(extension == L".bmp") { fmt = GL_BGR; flags |= TEX_BGR; } // Hide log messages and re-render RenderLogger(false); Render(); RenderLogger(true); const size_t img_size = w * h * bpp/8; const size_t hdr_size = tex_hdr_size(filename); - shared_ptr buf = io_Allocate(hdr_size+img_size); + shared_ptr buf; + AllocateAligned(buf, hdr_size+img_size, maxSectorSize); GLvoid* img = buf.get() + hdr_size; Tex t; if(tex_wrap(w, h, bpp, flags, buf, hdr_size, &t) < 0) return; glReadPixels(0, 0, (GLsizei)w, (GLsizei)h, fmt, GL_UNSIGNED_BYTE, img); if (tex_write(&t, filename) == INFO::OK) { OsPath realPath; g_VFS->GetRealPath(filename, realPath); LOGMESSAGERENDER(L"Screenshot written to '%ls'", realPath.string().c_str()); } else LOGERROR(L"Error writing screenshot to '%ls'", filename.string().c_str()); tex_free(&t); } // Similar to WriteScreenshot, but generates an image of size 640*tiles x 480*tiles. void WriteBigScreenshot(const VfsPath& extension, int tiles) { // If the game hasn't started yet then use WriteScreenshot to generate the image. if(g_Game == NULL){ WriteScreenshot(L".bmp"); return; } // get next available numbered filename // note: %04d -> always 4 digits, so sorting by filename works correctly. const VfsPath basenameFormat(L"screenshots/screenshot%04d"); const VfsPath filenameFormat = basenameFormat.ChangeExtension(extension); VfsPath filename; fs_util::NextNumberedFilename(g_VFS, filenameFormat, s_nextScreenshotNumber, filename); // Slightly ugly and inflexible: Always draw 640*480 tiles onto the screen, and // hope the screen is actually large enough for that. const int tile_w = 640, tile_h = 480; debug_assert(g_xres >= tile_w && g_yres >= tile_h); const int img_w = tile_w*tiles, img_h = tile_h*tiles; const int bpp = 24; GLenum fmt = GL_RGB; int flags = TEX_BOTTOM_UP; // we want writing BMP to be as fast as possible, // so read data from OpenGL in BMP format to obviate conversion. if(extension == L".bmp") { fmt = GL_BGR; flags |= TEX_BGR; } const size_t img_size = img_w * img_h * bpp/8; const size_t tile_size = tile_w * tile_h * bpp/8; const size_t hdr_size = tex_hdr_size(filename); void* tile_data = malloc(tile_size); if(!tile_data) WARN_ERR_RETURN(ERR::NO_MEM); - shared_ptr img_buf = io_Allocate(hdr_size+img_size); + shared_ptr img_buf; + AllocateAligned(img_buf, hdr_size+img_size, maxSectorSize); Tex t; GLvoid* img = img_buf.get() + hdr_size; if(tex_wrap(img_w, img_h, bpp, flags, img_buf, hdr_size, &t) < 0) { free(tile_data); return; } ogl_WarnIfError(); // Resize various things so that the sizes and aspect ratios are correct { g_Renderer.Resize(tile_w, tile_h); SViewPort vp = { 0, 0, tile_w, tile_h }; g_Game->GetView()->GetCamera()->SetViewPort(vp); g_Game->GetView()->GetCamera()->SetProjection(CGameView::defaultNear, CGameView::defaultFar, CGameView::defaultFOV); } // Temporarily move everything onto the front buffer, so the user can // see the exciting progress as it renders (and can tell when it's finished). // (It doesn't just use SwapBuffers, because it doesn't know whether to // call the SDL version or the Atlas version.) GLint oldReadBuffer, oldDrawBuffer; glGetIntegerv(GL_READ_BUFFER, &oldReadBuffer); glGetIntegerv(GL_DRAW_BUFFER, &oldDrawBuffer); glDrawBuffer(GL_FRONT); glReadBuffer(GL_FRONT); // Hide the cursor CStrW oldCursor = g_CursorName; g_CursorName = L""; // Render each tile for (int tile_y = 0; tile_y < tiles; ++tile_y) { for (int tile_x = 0; tile_x < tiles; ++tile_x) { // Adjust the camera to render the appropriate region g_Game->GetView()->GetCamera()->SetProjectionTile(tiles, tile_x, tile_y); RenderLogger(false); RenderGui(false); Render(); RenderGui(true); RenderLogger(true); // Copy the tile pixels into the main image glReadPixels(0, 0, tile_w, tile_h, fmt, GL_UNSIGNED_BYTE, tile_data); for (int y = 0; y < tile_h; ++y) { void* dest = (char*)img + ((tile_y*tile_h + y) * img_w + (tile_x*tile_w)) * bpp/8; void* src = (char*)tile_data + y * tile_w * bpp/8; memcpy(dest, src, tile_w * bpp/8); } } } // Restore the old cursor g_CursorName = oldCursor; // Restore the buffer settings glDrawBuffer(oldDrawBuffer); glReadBuffer(oldReadBuffer); // Restore the viewport settings { g_Renderer.Resize(g_xres, g_yres); SViewPort vp = { 0, 0, g_xres, g_yres }; g_Game->GetView()->GetCamera()->SetViewPort(vp); g_Game->GetView()->GetCamera()->SetProjection(CGameView::defaultNear, CGameView::defaultFar, CGameView::defaultFOV); g_Game->GetView()->GetCamera()->SetProjectionTile(1, 0, 0); } if (tex_write(&t, filename) == INFO::OK) { OsPath realPath; g_VFS->GetRealPath(filename, realPath); LOGMESSAGERENDER(L"Screenshot written to '%ls'", realPath.string().c_str()); } else LOGERROR(L"Error writing screenshot to '%ls'", filename.string().c_str()); tex_free(&t); free(tile_data); } Index: ps/trunk/source/ps/XML/XMLWriter.cpp =================================================================== --- ps/trunk/source/ps/XML/XMLWriter.cpp (revision 9349) +++ ps/trunk/source/ps/XML/XMLWriter.cpp (revision 9350) @@ -1,323 +1,325 @@ /* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" #include "XMLWriter.h" #include "ps/CLogger.h" #include "ps/Filesystem.h" #include "ps/XML/Xeromyces.h" #include "lib/utf8.h" +#include "lib/allocators/shared_ptr.h" #include "lib/sysdep/cpu.h" #include "maths/Fixed.h" // TODO (maybe): Write to the file frequently, instead of buffering // the entire file, so that large files get written faster. namespace { CStr escapeAttributeValue(const char* input) { // Spec says: // AttValue ::= '"' ([^<&"] | Reference)* '"' // so > is allowed in attribute values, so we don't bother escaping it. CStr ret = input; ret.Replace("&", "&"); ret.Replace("<", "<"); ret.Replace("\"", """); return ret; } CStr escapeCharacterData(const char* input) { // CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) CStr ret = input; ret.Replace("&", "&"); ret.Replace("<", "<"); ret.Replace("]]>", "]]>"); return ret; } CStr escapeCDATA(const char* input) { CStr ret = input; ret.Replace("]]>", "]]>]]>' // This just avoids double-hyphens, and doesn't enforce the no-hyphen-at-end // rule, since it's only used in contexts where there's already a space // between this data and the -->. CStr ret = input; ret.Replace("--", "\xE2\x80\x90\xE2\x80\x90"); // replace with U+2010 HYPHEN, because it's close enough and it's // probably nicer than inserting spaces or deleting hyphens or // any alternative return ret; } } enum { EL_ATTR, EL_TEXT, EL_SUBEL }; XMLWriter_File::XMLWriter_File() : m_Indent(0), m_LastElement(NULL), m_PrettyPrint(true) { // Encoding is always UTF-8 - that's one of the only two guaranteed to be // supported by XML parsers (along with UTF-16), and there's not much need // to let people choose another. m_Data = "\n"; } bool XMLWriter_File::StoreVFS(const PIVFS& vfs, const VfsPath& pathname) { if (m_LastElement) debug_warn(L"ERROR: Saving XML while an element is still open"); const size_t size = m_Data.length(); - shared_ptr data = io_Allocate(size); + shared_ptr data; + AllocateAligned(data, size, maxSectorSize); memcpy(data.get(), m_Data.data(), size); LibError ret = vfs->CreateFile(pathname, data, size); if (ret < 0) { LOGERROR(L"Error saving XML data through VFS: %ld", ret); return false; } return true; } const CStr& XMLWriter_File::GetOutput() { return m_Data; } void XMLWriter_File::XMB(const XMBFile& file) { ElementXMB(file, file.GetRoot()); } void XMLWriter_File::ElementXMB(const XMBFile& file, XMBElement el) { XMLWriter_Element writer(*this, file.GetElementString(el.GetNodeName()).c_str()); XERO_ITER_ATTR(el, attr) writer.Attribute(file.GetAttributeString(attr.Name).c_str(), attr.Value); XERO_ITER_EL(el, child) ElementXMB(file, child); } void XMLWriter_File::Comment(const char* text) { ElementStart(NULL, "!-- "); m_Data += escapeComment(text); m_Data += " -->"; --m_Indent; } CStr XMLWriter_File::Indent() { return std::string(m_Indent, '\t'); } void XMLWriter_File::ElementStart(XMLWriter_Element* element, const char* name) { if (m_LastElement) m_LastElement->Close(EL_SUBEL); m_LastElement = element; if (m_PrettyPrint) { m_Data += "\n"; m_Data += Indent(); } m_Data += "<"; m_Data += name; ++m_Indent; } void XMLWriter_File::ElementClose() { m_Data += ">"; } void XMLWriter_File::ElementEnd(const char* name, int type) { --m_Indent; m_LastElement = NULL; switch (type) { case EL_ATTR: m_Data += "/>"; break; case EL_TEXT: m_Data += ""; break; case EL_SUBEL: if (m_PrettyPrint) { m_Data += "\n"; m_Data += Indent(); } m_Data += ""; break; default: debug_assert(0); } } void XMLWriter_File::ElementText(const char* text, bool cdata) { if (cdata) { m_Data += ""; } else { m_Data += escapeCharacterData(text); } } XMLWriter_Element::XMLWriter_Element(XMLWriter_File& file, const char* name) : m_File(&file), m_Name(name), m_Type(EL_ATTR) { m_File->ElementStart(this, name); } XMLWriter_Element::~XMLWriter_Element() { m_File->ElementEnd(m_Name.c_str(), m_Type); } void XMLWriter_Element::Close(int type) { if (m_Type == type) return; m_File->ElementClose(); m_Type = type; } // Template specialisations for various string types: template <> void XMLWriter_Element::Text(const char* text, bool cdata) { Close(EL_TEXT); m_File->ElementText(text, cdata); } template <> void XMLWriter_Element::Text(const wchar_t* text, bool cdata) { Text( CStrW(text).ToUTF8().c_str(), cdata ); } // template <> void XMLWriter_File::ElementAttribute(const char* name, const char* const& value, bool newelement) { if (newelement) { ElementStart(NULL, name); m_Data += ">"; ElementText(value, false); ElementEnd(name, EL_TEXT); } else { debug_assert(m_LastElement && m_LastElement->m_Type == EL_ATTR); m_Data += " "; m_Data += name; m_Data += "=\""; m_Data += escapeAttributeValue(value); m_Data += "\""; } } // Attribute/setting value-to-string template specialisations. // // These only deal with basic types. Anything more complicated should // be converted into a basic type by whatever is making use of XMLWriter, // to keep game-related logic out of the not-directly-game-related code here. template <> void XMLWriter_File::ElementAttribute(const char* name, const CStr& value, bool newelement) { ElementAttribute(name, value.c_str(), newelement); } template <> void XMLWriter_File::ElementAttribute(const char* name, const std::string& value, bool newelement) { ElementAttribute(name, value.c_str(), newelement); } // Encode Unicode strings as UTF-8 template <> void XMLWriter_File::ElementAttribute(const char* name, const CStrW& value, bool newelement) { ElementAttribute(name, value.ToUTF8(), newelement); } template <> void XMLWriter_File::ElementAttribute(const char* name, const std::wstring& value, bool newelement) { ElementAttribute(name, utf8_from_wstring(value).c_str(), newelement); } template <> void XMLWriter_File::ElementAttribute(const char* name, const fixed& value, bool newelement) { ElementAttribute(name, value.ToString().c_str(), newelement); } template <> void XMLWriter_File::ElementAttribute(const char* name, const int& value, bool newelement) { std::stringstream ss; ss << value; ElementAttribute(name, ss.str().c_str(), newelement); } template <> void XMLWriter_File::ElementAttribute(const char* name, const unsigned int& value, bool newelement) { std::stringstream ss; ss << value; ElementAttribute(name, ss.str().c_str(), newelement); } template <> void XMLWriter_File::ElementAttribute(const char* name, const float& value, bool newelement) { std::stringstream ss; ss << value; ElementAttribute(name, ss.str().c_str(), newelement); } template <> void XMLWriter_File::ElementAttribute(const char* name, const double& value, bool newelement) { std::stringstream ss; ss << value; ElementAttribute(name, ss.str().c_str(), newelement); } Index: ps/trunk/source/graphics/TextureConverter.cpp =================================================================== --- ps/trunk/source/graphics/TextureConverter.cpp (revision 9349) +++ ps/trunk/source/graphics/TextureConverter.cpp (revision 9350) @@ -1,536 +1,537 @@ /* Copyright (C) 2010 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" #include "TextureConverter.h" #include "lib/regex.h" #include "lib/timer.h" #include "lib/allocators/shared_ptr.h" #include "lib/file/io/io.h" #include "lib/tex/tex.h" #include "maths/MD5.h" #include "ps/CLogger.h" #include "ps/CStr.h" #include "ps/XML/Xeromyces.h" #include "nvtt/nvtt.h" /** * Output handler to collect NVTT's output into a simplistic buffer. * WARNING: Used in the worker thread - must be thread-safe. */ struct BufferOutputHandler : public nvtt::OutputHandler { std::vector buffer; virtual void beginImage(int UNUSED(size), int UNUSED(width), int UNUSED(height), int UNUSED(depth), int UNUSED(face), int UNUSED(miplevel)) { } virtual bool writeData(const void* data, int size) { size_t off = buffer.size(); buffer.resize(off + size); memcpy(&buffer[off], data, size); return true; } }; /** * Request for worker thread to process. */ struct CTextureConverter::ConversionRequest { VfsPath dest; CTexturePtr texture; nvtt::InputOptions inputOptions; nvtt::CompressionOptions compressionOptions; nvtt::OutputOptions outputOptions; bool isDXT1a; // see comment in RunThread }; /** * Result from worker thread. */ struct CTextureConverter::ConversionResult { VfsPath dest; CTexturePtr texture; BufferOutputHandler output; bool ret; // true if the conversion succeeded }; void CTextureConverter::Settings::Hash(MD5& hash) { hash.Update((const u8*)&format, sizeof(format)); hash.Update((const u8*)&mipmap, sizeof(mipmap)); hash.Update((const u8*)&normal, sizeof(normal)); hash.Update((const u8*)&alpha, sizeof(alpha)); hash.Update((const u8*)&filter, sizeof(filter)); hash.Update((const u8*)&kaiserWidth, sizeof(kaiserWidth)); hash.Update((const u8*)&kaiserAlpha, sizeof(kaiserAlpha)); hash.Update((const u8*)&kaiserStretch, sizeof(kaiserStretch)); } CTextureConverter::SettingsFile* CTextureConverter::LoadSettings(const VfsPath& path) const { CXeromyces XeroFile; if (XeroFile.Load(m_VFS, path) != PSRETURN_OK) return NULL; // Define all the elements used in the XML file #define EL(x) int el_##x = XeroFile.GetElementID(#x) #define AT(x) int at_##x = XeroFile.GetAttributeID(#x) EL(textures); EL(file); AT(pattern); AT(format); AT(mipmap); AT(normal); AT(alpha); AT(filter); AT(kaiserwidth); AT(kaiseralpha); AT(kaiserstretch); #undef AT #undef EL XMBElement root = XeroFile.GetRoot(); if (root.GetNodeName() != el_textures) { LOGERROR(L"Invalid texture settings file \"%ls\" (unrecognised root element)", path.string().c_str()); return NULL; } std::auto_ptr settings(new SettingsFile()); XERO_ITER_EL(root, child) { if (child.GetNodeName() == el_file) { Match p; XERO_ITER_ATTR(child, attr) { if (attr.Name == at_pattern) { p.pattern = attr.Value.FromUTF8(); } else if (attr.Name == at_format) { CStr v(attr.Value); if (v == "dxt1") p.settings.format = FMT_DXT1; else if (v == "dxt3") p.settings.format = FMT_DXT3; else if (v == "dxt5") p.settings.format = FMT_DXT5; else if (v == "rgba") p.settings.format = FMT_RGBA; else LOGERROR(L"Invalid attribute value ", v.c_str()); } else if (attr.Name == at_mipmap) { CStr v(attr.Value); if (v == "true") p.settings.mipmap = MIP_TRUE; else if (v == "false") p.settings.mipmap = MIP_FALSE; else LOGERROR(L"Invalid attribute value ", v.c_str()); } else if (attr.Name == at_normal) { CStr v(attr.Value); if (v == "true") p.settings.normal = NORMAL_TRUE; else if (v == "false") p.settings.normal = NORMAL_FALSE; else LOGERROR(L"Invalid attribute value ", v.c_str()); } else if (attr.Name == at_alpha) { CStr v(attr.Value); if (v == "none") p.settings.alpha = ALPHA_NONE; else if (v == "player") p.settings.alpha = ALPHA_PLAYER; else if (v == "transparency") p.settings.alpha = ALPHA_TRANSPARENCY; else LOGERROR(L"Invalid attribute value ", v.c_str()); } else if (attr.Name == at_filter) { CStr v(attr.Value); if (v == "box") p.settings.filter = FILTER_BOX; else if (v == "triangle") p.settings.filter = FILTER_TRIANGLE; else if (v == "kaiser") p.settings.filter = FILTER_KAISER; else LOGERROR(L"Invalid attribute value ", v.c_str()); } else if (attr.Name == at_kaiserwidth) { p.settings.kaiserWidth = CStr(attr.Value).ToFloat(); } else if (attr.Name == at_kaiseralpha) { p.settings.kaiserAlpha = CStr(attr.Value).ToFloat(); } else if (attr.Name == at_kaiserstretch) { p.settings.kaiserStretch = CStr(attr.Value).ToFloat(); } else { LOGERROR(L"Invalid attribute name ", XeroFile.GetAttributeString(attr.Name).c_str()); } } settings->patterns.push_back(p); } } return settings.release(); } CTextureConverter::Settings CTextureConverter::ComputeSettings(const std::wstring& filename, const std::vector& settingsFiles) const { // Set sensible defaults Settings settings; settings.format = FMT_DXT1; settings.mipmap = MIP_TRUE; settings.normal = NORMAL_FALSE; settings.alpha = ALPHA_NONE; settings.filter = FILTER_BOX; settings.kaiserWidth = 3.f; settings.kaiserAlpha = 4.f; settings.kaiserStretch = 1.f; for (size_t i = 0; i < settingsFiles.size(); ++i) { for (size_t j = 0; j < settingsFiles[i]->patterns.size(); ++j) { Match p = settingsFiles[i]->patterns[j]; // Check that the pattern matches the texture file if (!match_wildcard(filename.c_str(), p.pattern.c_str())) continue; if (p.settings.format != FMT_UNSPECIFIED) settings.format = p.settings.format; if (p.settings.mipmap != MIP_UNSPECIFIED) settings.mipmap = p.settings.mipmap; if (p.settings.normal != NORMAL_UNSPECIFIED) settings.normal = p.settings.normal; if (p.settings.alpha != ALPHA_UNSPECIFIED) settings.alpha = p.settings.alpha; if (p.settings.filter != FILTER_UNSPECIFIED) settings.filter = p.settings.filter; if (p.settings.kaiserWidth != -1.f) settings.kaiserWidth = p.settings.kaiserWidth; if (p.settings.kaiserAlpha != -1.f) settings.kaiserAlpha = p.settings.kaiserAlpha; if (p.settings.kaiserStretch != -1.f) settings.kaiserStretch = p.settings.kaiserStretch; } } return settings; } CTextureConverter::CTextureConverter(PIVFS vfs, bool highQuality) : m_VFS(vfs), m_HighQuality(highQuality), m_Shutdown(false) { // Verify that we are running with at least the version we were compiled with, // to avoid bugs caused by ABI changes debug_assert(nvtt::version() >= NVTT_VERSION); // Set up the worker thread: int ret; // Use SDL semaphores since OS X doesn't implement sem_init m_WorkerSem = SDL_CreateSemaphore(0); debug_assert(m_WorkerSem); ret = pthread_mutex_init(&m_WorkerMutex, NULL); debug_assert(ret == 0); ret = pthread_create(&m_WorkerThread, NULL, &RunThread, this); debug_assert(ret == 0); // Maybe we should share some centralised pool of worker threads? // For now we'll just stick with a single thread for this specific use. } CTextureConverter::~CTextureConverter() { // Tell the thread to shut down pthread_mutex_lock(&m_WorkerMutex); m_Shutdown = true; pthread_mutex_unlock(&m_WorkerMutex); // Wake it up so it sees the notification SDL_SemPost(m_WorkerSem); // Wait for it to shut down cleanly pthread_join(m_WorkerThread, NULL); // Clean up resources SDL_DestroySemaphore(m_WorkerSem); pthread_mutex_destroy(&m_WorkerMutex); } bool CTextureConverter::ConvertTexture(const CTexturePtr& texture, const VfsPath& src, const VfsPath& dest, const Settings& settings) { shared_ptr file; size_t fileSize; if (m_VFS->LoadFile(src, file, fileSize) < 0) { LOGERROR(L"Failed to load texture \"%ls\"", src.string().c_str()); return false; } Tex tex; if (tex_decode(file, fileSize, &tex) < 0) { LOGERROR(L"Failed to decode texture \"%ls\"", src.string().c_str()); return false; } // Check whether there's any alpha channel bool hasAlpha = ((tex.flags & TEX_ALPHA) != 0); // Convert to uncompressed BGRA with no mipmaps if (tex_transform_to(&tex, (tex.flags | TEX_BGR | TEX_ALPHA) & ~(TEX_DXT | TEX_MIPMAPS)) < 0) { LOGERROR(L"Failed to transform texture \"%ls\"", src.string().c_str()); tex_free(&tex); return false; } // Check if the texture has all alpha=255, so we can automatically // switch from DXT3/DXT5 to DXT1 with no loss if (hasAlpha) { hasAlpha = false; u8* data = tex_get_data(&tex); for (size_t i = 0; i < tex.w * tex.h; ++i) { if (data[i*4+3] != 0xFF) { hasAlpha = true; break; } } } shared_ptr request(new ConversionRequest); request->dest = dest; request->texture = texture; // Apply the chosen settings: request->inputOptions.setMipmapGeneration(settings.mipmap == MIP_TRUE); if (settings.alpha == ALPHA_TRANSPARENCY) request->inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency); else request->inputOptions.setAlphaMode(nvtt::AlphaMode_None); request->isDXT1a = false; if (settings.format == FMT_RGBA) { request->compressionOptions.setFormat(nvtt::Format_RGBA); // Change the default component order (see tex_dds.cpp decode_pf) request->compressionOptions.setPixelFormat(32, 0xFF, 0xFF00, 0xFF0000, 0xFF000000u); } else if (!hasAlpha) { // if no alpha channel then there's no point using DXT3 or DXT5 request->compressionOptions.setFormat(nvtt::Format_DXT1); } else if (settings.format == FMT_DXT1) { request->compressionOptions.setFormat(nvtt::Format_DXT1a); request->isDXT1a = true; } else if (settings.format == FMT_DXT3) { request->compressionOptions.setFormat(nvtt::Format_DXT3); } else if (settings.format == FMT_DXT5) { request->compressionOptions.setFormat(nvtt::Format_DXT5); } if (settings.filter == FILTER_BOX) request->inputOptions.setMipmapFilter(nvtt::MipmapFilter_Box); else if (settings.filter == FILTER_TRIANGLE) request->inputOptions.setMipmapFilter(nvtt::MipmapFilter_Triangle); else if (settings.filter == FILTER_KAISER) request->inputOptions.setMipmapFilter(nvtt::MipmapFilter_Kaiser); if (settings.normal == NORMAL_TRUE) request->inputOptions.setNormalMap(true); request->inputOptions.setKaiserParameters(settings.kaiserWidth, settings.kaiserAlpha, settings.kaiserStretch); request->inputOptions.setWrapMode(nvtt::WrapMode_Mirror); // TODO: should this be configurable? request->compressionOptions.setQuality(m_HighQuality ? nvtt::Quality_Production : nvtt::Quality_Fastest); // TODO: normal maps, gamma, etc // Load the texture data request->inputOptions.setTextureLayout(nvtt::TextureType_2D, tex.w, tex.h); request->inputOptions.setMipmapData(tex_get_data(&tex), tex.w, tex.h); // NVTT copies the texture data so we can free it now tex_free(&tex); pthread_mutex_lock(&m_WorkerMutex); m_RequestQueue.push_back(request); pthread_mutex_unlock(&m_WorkerMutex); // Wake up the worker thread SDL_SemPost(m_WorkerSem); return true; } bool CTextureConverter::Poll(CTexturePtr& texture, VfsPath& dest, bool& ok) { shared_ptr result; // Grab the first result (if any) pthread_mutex_lock(&m_WorkerMutex); if (!m_ResultQueue.empty()) { result = m_ResultQueue.front(); m_ResultQueue.pop_front(); } pthread_mutex_unlock(&m_WorkerMutex); if (!result) { // no work to do return false; } if (!result->ret) { // conversion had failed ok = false; return true; } // Move output into a correctly-aligned buffer size_t size = result->output.buffer.size(); - shared_ptr file = io_Allocate(size); + shared_ptr file; + AllocateAligned(file, size, maxSectorSize); memcpy(file.get(), &result->output.buffer[0], size); if (m_VFS->CreateFile(result->dest, file, size) < 0) { // error writing file ok = false; return true; } // Succeeded in converting texture texture = result->texture; dest = result->dest; ok = true; return true; } bool CTextureConverter::IsBusy() { pthread_mutex_lock(&m_WorkerMutex); bool busy = !m_RequestQueue.empty(); pthread_mutex_unlock(&m_WorkerMutex); return busy; } void* CTextureConverter::RunThread(void* data) { debug_SetThreadName("TextureConverter"); CTextureConverter* textureConverter = static_cast(data); // Wait until the main thread wakes us up while (SDL_SemWait(textureConverter->m_WorkerSem) == 0) { pthread_mutex_lock(&textureConverter->m_WorkerMutex); if (textureConverter->m_Shutdown) { pthread_mutex_unlock(&textureConverter->m_WorkerMutex); break; } // If we weren't woken up for shutdown, we must have been woken up for // a new request, so grab it from the queue shared_ptr request = textureConverter->m_RequestQueue.front(); textureConverter->m_RequestQueue.pop_front(); pthread_mutex_unlock(&textureConverter->m_WorkerMutex); // Set up the result object shared_ptr result(new ConversionResult()); result->dest = request->dest; result->texture = request->texture; request->outputOptions.setOutputHandler(&result->output); // TIMER(L"TextureConverter compress"); // Perform the compression nvtt::Compressor compressor; result->ret = compressor.process(request->inputOptions, request->compressionOptions, request->outputOptions); // Ugly hack: NVTT 2.0 doesn't set DDPF_ALPHAPIXELS for DXT1a, so we can't // distinguish it from DXT1. (It's fixed in trunk by // http://code.google.com/p/nvidia-texture-tools/source/detail?r=924&path=/trunk). // Rather than using a trunk NVTT (unstable, makes packaging harder) // or patching our copy (makes packaging harder), we'll just manually // set the flag here. if (request->isDXT1a && result->ret && result->output.buffer.size() > 80) result->output.buffer[80] |= 1; // DDPF_ALPHAPIXELS in DDS_PIXELFORMAT.dwFlags // Push the result onto the queue pthread_mutex_lock(&textureConverter->m_WorkerMutex); textureConverter->m_ResultQueue.push_back(result); pthread_mutex_unlock(&textureConverter->m_WorkerMutex); } return NULL; } Index: ps/trunk/source/graphics/tests/test_MeshManager.h =================================================================== --- ps/trunk/source/graphics/tests/test_MeshManager.h (revision 9349) +++ ps/trunk/source/graphics/tests/test_MeshManager.h (revision 9350) @@ -1,252 +1,256 @@ /* Copyright (C) 2009 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "lib/self_test.h" #include "lib/file/file_system_util.h" #include "lib/file/vfs/vfs.h" #include "lib/file/io/io.h" +#include "lib/allocators/shared_ptr.h" #include "graphics/ColladaManager.h" #include "graphics/MeshManager.h" #include "graphics/ModelDef.h" #include "ps/CLogger.h" #include "ps/XML/RelaxNG.h" static OsPath MOD_PATH(DataDir()/"mods/_test.mesh"); static OsPath CACHE_PATH(DataDir()/"_testcache"); const OsPath srcDAE(L"collada/sphere.dae"); const OsPath srcPMD(L"collada/sphere.pmd"); const OsPath testDAE(L"art/skeletons/test.dae"); const OsPath testPMD(L"art/skeletons/test.pmd"); const OsPath testBase(L"art/skeletons/test"); const OsPath srcSkeletonDefs(L"collada/skeletons.xml"); const OsPath testSkeletonDefs(L"art/skeletons/skeletons.xml"); extern PIVFS g_VFS; class TestMeshManager : public CxxTest::TestSuite { void initVfs() { // Initialise VFS: // Set up a mod directory to work in: // Make sure the required directories doesn't exist when we start, // in case the previous test aborted and left them full of junk if(fs_util::DirectoryExists(MOD_PATH)) DeleteDirectory(MOD_PATH); if(fs_util::DirectoryExists(CACHE_PATH)) DeleteDirectory(CACHE_PATH); g_VFS = CreateVfs(20*MiB); TS_ASSERT_OK(g_VFS->Mount(L"", MOD_PATH)); TS_ASSERT_OK(g_VFS->Mount(L"collada/", DataDir()/"tests/collada", VFS_MOUNT_MUST_EXIST)); // Mount _testcache onto virtual /cache - don't use the normal cache // directory because that's full of loads of cached files from the // proper game and takes a long time to load. TS_ASSERT_OK(g_VFS->Mount(L"cache/", CACHE_PATH)); } void deinitVfs() { g_VFS.reset(); DeleteDirectory(MOD_PATH); DeleteDirectory(CACHE_PATH); } void copyFile(const VfsPath& src, const VfsPath& dst) { // Copy a file into the mod directory, so we can work on it: shared_ptr data; size_t size = 0; TS_ASSERT_OK(g_VFS->LoadFile(src, data, size)); TS_ASSERT_OK(g_VFS->CreateFile(dst, data, size)); } void buildArchive() { // Create a junk trace file first, because vfs_opt_auto_build requires one // std::string trace = "000.000000: L \"-\" 0 0000\n"; // vfs_store("trace.txt", (const u8*)trace.c_str(), trace.size(), FILE_NO_AIO); // then make the archive // TS_ASSERT_OK(vfs_opt_rebuild_main_archive(MOD_PATH"/trace.txt", MOD_PATH"/test%02d.zip")); } CColladaManager* colladaManager; CMeshManager* meshManager; public: void setUp() { initVfs(); colladaManager = new CColladaManager(); meshManager = new CMeshManager(*colladaManager); } void tearDown() { delete meshManager; delete colladaManager; deinitVfs(); } void IRRELEVANT_test_archived() { copyFile(srcDAE, testDAE); //buildArchive(); - shared_ptr buf = io_Allocate(100); + shared_ptr buf; + AllocateAligned(buf, 100, maxSectorSize); strcpy_s((char*)buf.get(), 5, "Test"); g_VFS->CreateFile(testDAE, buf, 4); } void test_load_pmd_with_extension() { copyFile(srcPMD, testPMD); CModelDefPtr modeldef = meshManager->GetMesh(testPMD); TS_ASSERT(modeldef); if (modeldef) TS_ASSERT_PATH_EQUALS(modeldef->GetName(), testBase); } void test_load_pmd_without_extension() { copyFile(srcPMD, testPMD); CModelDefPtr modeldef = meshManager->GetMesh(testBase); TS_ASSERT(modeldef); if (modeldef) TS_ASSERT_PATH_EQUALS(modeldef->GetName(), testBase); } void test_caching() { copyFile(srcPMD, testPMD); CModelDefPtr modeldef1 = meshManager->GetMesh(testPMD); CModelDefPtr modeldef2 = meshManager->GetMesh(testPMD); TS_ASSERT(modeldef1 && modeldef2); if (modeldef1 && modeldef2) TS_ASSERT_EQUALS(modeldef1.get(), modeldef2.get()); } void test_load_dae() { copyFile(srcDAE, testDAE); copyFile(srcSkeletonDefs, testSkeletonDefs); CModelDefPtr modeldef = meshManager->GetMesh(testDAE); TS_ASSERT(modeldef); if (modeldef) TS_ASSERT_PATH_EQUALS(modeldef->GetName(), testBase); } void test_load_dae_caching() { copyFile(srcDAE, testDAE); copyFile(srcSkeletonDefs, testSkeletonDefs); VfsPath daeName1 = colladaManager->GetLoadableFilename(testBase, CColladaManager::PMD); VfsPath daeName2 = colladaManager->GetLoadableFilename(testBase, CColladaManager::PMD); TS_ASSERT(!daeName1.empty()); TS_ASSERT_PATH_EQUALS(daeName1, daeName2); // TODO: it'd be nice to test that it really isn't doing the DAE->PMD // conversion a second time, but there doesn't seem to be an easy way // to check that } void test_invalid_skeletons() { TestLogger logger; copyFile(srcDAE, testDAE); - shared_ptr buf = io_Allocate(100); + shared_ptr buf; + AllocateAligned(buf, 100, maxSectorSize); strcpy_s((char*)buf.get(), 100, "Not valid XML"); g_VFS->CreateFile(testSkeletonDefs, buf, 13); CModelDefPtr modeldef = meshManager->GetMesh(testDAE); TS_ASSERT(! modeldef); TS_ASSERT_WSTR_CONTAINS(logger.GetOutput(), L"parser error"); } void test_invalid_dae() { TestLogger logger; copyFile(srcSkeletonDefs, testSkeletonDefs); - shared_ptr buf = io_Allocate(100); + shared_ptr buf; + AllocateAligned(buf, 100, maxSectorSize); strcpy_s((char*)buf.get(), 100, "Not valid XML"); g_VFS->CreateFile(testDAE, buf, 13); CModelDefPtr modeldef = meshManager->GetMesh(testDAE); TS_ASSERT(! modeldef); TS_ASSERT_WSTR_CONTAINS(logger.GetOutput(), L"parser error"); } void test_load_nonexistent_pmd() { TestLogger logger; CModelDefPtr modeldef = meshManager->GetMesh(testPMD); TS_ASSERT(! modeldef); } void test_load_nonexistent_dae() { TestLogger logger; CModelDefPtr modeldef = meshManager->GetMesh(testDAE); TS_ASSERT(! modeldef); } void test_load_across_relaxng() { // Verify that loading meshes doesn't invalidate other users of libxml2 by calling xmlCleanupParser // (Run this in Valgrind and check for use-of-freed-memory errors) RelaxNGValidator v; TS_ASSERT(v.LoadGrammar("")); TS_ASSERT(v.Validate(L"doc", L"2.0")); copyFile(srcDAE, testDAE); copyFile(srcSkeletonDefs, testSkeletonDefs); CModelDefPtr modeldef = meshManager->GetMesh(testDAE); TS_ASSERT(modeldef); if (modeldef) TS_ASSERT_PATH_EQUALS(modeldef->GetName(), testBase); TS_ASSERT(v.Validate(L"doc", L"2.0")); } ////////////////////////////////////////////////////////////////////////// // Tests based on real DAE files: void test_load_dae_bogus_material_target() { copyFile(L"collada/bogus_material_target.dae", testDAE); copyFile(srcSkeletonDefs, testSkeletonDefs); CModelDefPtr modeldef = meshManager->GetMesh(testDAE); TS_ASSERT(modeldef); } }; Index: ps/trunk/source/lib/bits.h =================================================================== --- ps/trunk/source/lib/bits.h (revision 9349) +++ ps/trunk/source/lib/bits.h (revision 9350) @@ -1,255 +1,248 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * bit-twiddling. */ #ifndef INCLUDED_BITS #define INCLUDED_BITS /** * value of bit number \. * * @param n bit index. * * requirements: * - T should be an unsigned type * - n must be in [0, CHAR_BIT*sizeof(T)), else the result is undefined! **/ template inline T Bit(size_t n) { const T one = T(1); return (T)(one << n); } /** * pretty much the same as Bit\. * this is intended for the initialization of enum values, where a * compile-time constant is required. **/ #define BIT(n) (1u << (n)) template inline bool IsBitSet(T value, size_t index) { const T bit = Bit(index); return (value & bit) != 0; } // these are declared in the header and inlined to aid compiler optimizations // (they can easily end up being time-critical). // note: GCC can't inline extern functions, while VC's "Whole Program // Optimization" can. /** * a mask that includes the lowest N bits * * @param numBits Number of bits in mask. **/ template inline T bit_mask(size_t numBits) { const T bitsInT = sizeof(T)*CHAR_BIT; const T allBits = (T)~T(0); // (shifts of at least bitsInT are undefined) if(numBits >= bitsInT) return allBits; // (note: the previous allBits >> (bitsInT-numBits) is not safe // because right-shifts of negative numbers are undefined.) const T mask = (T)((T(1) << numBits)-1); return mask; } /** * extract the value of bits hi_idx:lo_idx within num * * example: bits(0x69, 2, 5) == 0x0A * * @param num number whose bits are to be extracted * @param lo_idx bit index of lowest bit to include * @param hi_idx bit index of highest bit to include * @return value of extracted bits. **/ template inline T bits(T num, size_t lo_idx, size_t hi_idx) { const size_t numBits = (hi_idx - lo_idx)+1; // # bits to return T result = T(num >> lo_idx); result = T(result & bit_mask(numBits)); return result; } /** * set the value of bits hi_idx:lo_idx * * @param lo_idx bit index of lowest bit to include * @param hi_idx bit index of highest bit to include * @param value new value to be assigned to these bits **/ template inline T SetBitsTo(T num, size_t lo_idx, size_t hi_idx, size_t value) { const size_t numBits = (hi_idx - lo_idx)+1; debug_assert(value < (T(1) << numBits)); const T mask = bit_mask(numBits) << lo_idx; T result = num & ~mask; result = T(result | (value << lo_idx)); return result; } /** * @return number of 1-bits in mask **/ template inline size_t PopulationCount(T mask) { // note: a more complex but probably faster method is given at // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel size_t num1Bits = 0; while(mask) { mask &= mask-1; // clear least significant 1-bit num1Bits++; } return num1Bits; } /** * @return whether the given number is a power of two. **/ template inline bool is_pow2(T n) { // 0 would pass the test below but isn't a POT. if(n == 0) return false; return (n & (n-1)) == 0; } template inline T LeastSignificantBit(T x) { const T negX = T(~x + 1); // 2's complement (avoids 'negating unsigned type' warning) return x & negX; } template inline T ClearLeastSignificantBit(T x) { return x & (x-1); } /** * ceil(log2(x)) * * @param x (unsigned integer) * @return ceiling of the base-2 logarithm (i.e. rounded up) or * zero if the input is zero. **/ template inline size_t ceil_log2(T x) { T bit = 1; size_t log = 0; while(bit < x && bit != 0) // must detect overflow { log++; bit *= 2; } return log; } /** * floor(log2(f)) * fast, uses the FPU normalization hardware. * * @param x (float) input; MUST be > 0, else results are undefined. * @return floor of the base-2 logarithm (i.e. rounded down). **/ extern int floor_log2(const float x); /** * round up to next larger power of two. **/ template inline T round_up_to_pow2(T x) { return T(1) << ceil_log2(x); } /** * round number up/down to the next given multiple. * * @param n Number to round. * @param multiple Must be a power of two. **/ template inline T round_up(T n, T multiple) { debug_assert(is_pow2(multiple)); const T result = (n + multiple-1) & ~(multiple-1); debug_assert(n <= result && result < n+multiple); return result; } template inline T round_down(T n, T multiple) { debug_assert(is_pow2(multiple)); const T result = n & ~(multiple-1); debug_assert(result <= n && n < result+multiple); return result; } template -inline bool IsAligned(T t, uintptr_t multiple) -{ - return ((uintptr_t)t % multiple) == 0; -} - - -template inline T MaxPowerOfTwoDivisor(T value) { debug_assert(value != T(0)); for(size_t log2 = 0; log2 < sizeof(T)*CHAR_BIT; log2++) { if(IsBitSet(value, log2)) return T(1) << log2; } debug_assert(0); // unreachable (!= 0 => there is a set bit) return 0; } #endif // #ifndef INCLUDED_BITS Index: ps/trunk/source/lib/file/file.cpp =================================================================== --- ps/trunk/source/lib/file/file.cpp (revision 9349) +++ ps/trunk/source/lib/file/file.cpp (revision 9350) @@ -1,166 +1,74 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * simple POSIX file wrapper. */ #include "precompiled.h" #include "lib/file/file.h" -#include "lib/config2.h" #include "lib/sysdep/filesystem.h" // O_*, S_* -#include "lib/posix/posix_aio.h" #include "lib/file/common/file_stats.h" ERROR_ASSOCIATE(ERR::FILE_ACCESS, L"Insufficient access rights to open file", EACCES); -ERROR_ASSOCIATE(ERR::IO, L"Error during IO", EIO); -namespace FileImpl { - -LibError Open(const OsPath& pathname, wchar_t accessType, int& fd) +LibError FileOpen(const OsPath& pathname, int opcode, int& fd) { int oflag = 0; - switch(accessType) + switch(opcode) { - case 'r': + case LIO_READ: oflag = O_RDONLY; break; - case 'w': + case LIO_WRITE: oflag = O_WRONLY|O_CREAT|O_TRUNC; break; - case '+': - oflag = O_RDWR; - break; default: debug_assert(0); + break; } #if OS_WIN oflag |= O_BINARY_NP; #endif // prevent exploits by disallowing writes to our files by other users. // note that the system-wide installed cache is read-only. const mode_t mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH; // 0644 fd = wopen(pathname, oflag, mode); if(fd < 0) return LibError_from_errno(false); stats_open(); return INFO::OK; } -void Close(int& fd) +void FileClose(int& fd) { if(fd >= 0) { wclose(fd); fd = -1; } } - - -LibError IO(int fd, wchar_t accessType, off_t ofs, u8* buf, size_t size) -{ - debug_assert(accessType == 'r' || accessType == 'w'); - - ScopedIoMonitor monitor; - - lseek(fd, ofs, SEEK_SET); - - errno = 0; - const ssize_t ret = (accessType == 'w')? write(fd, buf, size) : read(fd, buf, size); - if(ret < 0) - return LibError_from_errno(); - - const size_t totalTransferred = (size_t)ret; -#if CONFIG2_FILE_ENABLE_AIO - // we won't be called from Issue, i.e. size is always the exact - // value without padding and can be checked. - if(totalTransferred != size) - WARN_RETURN(ERR::IO); -#endif - - monitor.NotifyOfSuccess(FI_LOWIO, accessType, totalTransferred); - return INFO::OK; -} - - -LibError Issue(aiocb& req, int fd, wchar_t accessType, off_t alignedOfs, u8* alignedBuf, size_t alignedSize) -{ - memset(&req, 0, sizeof(req)); - req.aio_lio_opcode = (accessType == 'w')? LIO_WRITE : LIO_READ; - req.aio_buf = (volatile void*)alignedBuf; - req.aio_fildes = fd; - req.aio_offset = alignedOfs; - req.aio_nbytes = alignedSize; -#if CONFIG2_FILE_ENABLE_AIO - struct sigevent* sig = 0; // no notification signal - aiocb* const reqs = &req; - if(lio_listio(LIO_NOWAIT, &reqs, 1, sig) != 0) - return LibError_from_errno(); - return INFO::OK; -#else - // quick and dirty workaround (see CONFIG2_FILE_ENABLE_AIO) - return IO(fd, accessType, alignedOfs, alignedBuf, alignedSize); -#endif -} - - -LibError WaitUntilComplete(aiocb& req, u8*& alignedBuf, size_t& alignedSize) -{ -#if CONFIG2_FILE_ENABLE_AIO - const int err = aio_error(&req); - if(err == EINPROGRESS) - { -SUSPEND_AGAIN: - aiocb* const reqs = &req; - errno = 0; - const int ret = aio_suspend(&reqs, 1, (timespec*)0); // no timeout - if(ret != 0) - { - if(errno == EINTR) // interrupted by signal - goto SUSPEND_AGAIN; - return LibError_from_errno(); - } - } - else if(err != 0) - { - errno = err; - return LibError_from_errno(); - } - - const ssize_t bytesTransferred = aio_return(&req); - if(bytesTransferred == -1) // transfer failed - WARN_RETURN(ERR::IO); - - alignedSize = bytesTransferred; -#else - alignedSize = req.aio_nbytes; -#endif - alignedBuf = (u8*)req.aio_buf; // cast from volatile void* - return INFO::OK; -} - -} // namespace FileImpl Index: ps/trunk/source/lib/file/archive/stream.h =================================================================== --- ps/trunk/source/lib/file/archive/stream.h (revision 9349) +++ ps/trunk/source/lib/file/archive/stream.h (revision 9350) @@ -1,115 +1,113 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * output buffer and 'stream' layered on top of a compression codec */ #ifndef INCLUDED_STREAM #define INCLUDED_STREAM #include "lib/file/archive/codec.h" // note: this is similar in function to std::vector, but we don't need // iterators etc. and would prefer to avoid initializing each byte. class OutputBufferManager { public: OutputBufferManager(); void Reset(); void SetBuffer(u8* buffer, size_t size); /** * allocate a new output buffer. * * @param size [bytes] to allocate. * * notes: * - if a buffer had previously been allocated and is large enough, * it is reused (this reduces the number of allocations). * - this class manages the lifetime of the buffer. **/ void AllocateBuffer(size_t size); u8* Buffer() const { return m_buffer; } size_t Size() const { return m_size; } private: bool IsAllowableBuffer(u8* buffer, size_t size); u8* m_buffer; size_t m_size; shared_ptr m_mem; // size of m_mem. allows reusing previously allocated buffers // (user-specified buffers can't be reused because we have no control // over their lifetime) size_t m_capacity; }; class Stream { public: Stream(const PICodec& codec); void SetOutputBuffer(u8* out, size_t outSize); void AllocateOutputBuffer(size_t outSizeMax); /** * 'feed' the codec with a data block. **/ LibError Feed(const u8* in, size_t inSize); LibError Finish(); size_t OutSize() const { return m_outProduced; } u32 Checksum() const { return m_checksum; } private: PICodec m_codec; OutputBufferManager m_outputBufferManager; size_t m_inConsumed; size_t m_outProduced; u32 m_checksum; }; -extern LibError FeedStream(uintptr_t cbData, const u8* in, size_t inSize); - #endif // #ifndef INCLUDED_STREAM Index: ps/trunk/source/lib/file/archive/archive_zip.cpp =================================================================== --- ps/trunk/source/lib/file/archive/archive_zip.cpp (revision 9349) +++ ps/trunk/source/lib/file/archive/archive_zip.cpp (revision 9350) @@ -1,675 +1,716 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * archive backend for Zip files. */ #include "precompiled.h" #include "lib/file/archive/archive_zip.h" #include #include #include "lib/utf8.h" #include "lib/bits.h" #include "lib/byte_order.h" -#include "lib/fat_time.h" #include "lib/allocators/pool.h" #include "lib/sysdep/filesystem.h" #include "lib/file/archive/archive.h" #include "lib/file/archive/codec_zlib.h" #include "lib/file/archive/stream.h" #include "lib/file/file.h" #include "lib/file/io/io.h" -#include "lib/file/io/io_align.h" // BLOCK_SIZE -#include "lib/file/io/write_buffer.h" + +//----------------------------------------------------------------------------- +// timestamp conversion: DOS FAT <-> Unix time_t +//----------------------------------------------------------------------------- + +static time_t time_t_from_FAT(u32 fat_timedate) +{ + const u32 fat_time = bits(fat_timedate, 0, 15); + const u32 fat_date = bits(fat_timedate, 16, 31); + + struct tm t; // struct tm format: + t.tm_sec = bits(fat_time, 0,4) * 2; // [0,59] + t.tm_min = bits(fat_time, 5,10); // [0,59] + t.tm_hour = bits(fat_time, 11,15); // [0,23] + t.tm_mday = bits(fat_date, 0,4); // [1,31] + t.tm_mon = bits(fat_date, 5,8) - 1; // [0,11] + t.tm_year = bits(fat_date, 9,15) + 80; // since 1900 + t.tm_isdst = -1; // unknown - let libc determine + + // otherwise: totally bogus, and at the limit of 32-bit time_t + debug_assert(t.tm_year < 138); + + time_t ret = mktime(&t); + debug_assert(ret != (time_t)-1); // mktime shouldn't fail + return ret; +} + + +static u32 FAT_from_time_t(time_t time) +{ + // (values are adjusted for DST) + struct tm* t = localtime(&time); + + const u16 fat_time = u16( + (t->tm_sec/2) | // 5 + (u16(t->tm_min) << 5) | // 6 + (u16(t->tm_hour) << 11) // 5 + ); + + const u16 fat_date = u16( + (t->tm_mday) | // 5 + (u16(t->tm_mon+1) << 5) | // 4 + (u16(t->tm_year-80) << 9) // 7 + ); + + u32 fat_timedate = u32_from_u16(fat_date, fat_time); + return fat_timedate; +} //----------------------------------------------------------------------------- // Zip archive definitions //----------------------------------------------------------------------------- static const u32 cdfh_magic = FOURCC_LE('P','K','\1','\2'); static const u32 lfh_magic = FOURCC_LE('P','K','\3','\4'); static const u32 ecdr_magic = FOURCC_LE('P','K','\5','\6'); enum ZipMethod { ZIP_METHOD_NONE = 0, ZIP_METHOD_DEFLATE = 8 }; #pragma pack(push, 1) class LFH { public: void Init(const FileInfo& fileInfo, off_t csize, ZipMethod method, u32 checksum, const Path& pathname) { const std::string pathnameUTF8 = utf8_from_wstring(pathname.string()); const size_t pathnameSize = pathnameUTF8.length(); m_magic = lfh_magic; m_x1 = to_le16(0); m_flags = to_le16(0); m_method = to_le16(u16_from_larger(method)); m_fat_mtime = to_le32(FAT_from_time_t(fileInfo.MTime())); m_crc = to_le32(checksum); m_csize = to_le32(u32_from_larger(csize)); m_usize = to_le32(u32_from_larger(fileInfo.Size())); m_fn_len = to_le16(u16_from_larger(pathnameSize)); m_e_len = to_le16(0); memcpy((char*)this + sizeof(LFH), pathnameUTF8.c_str(), pathnameSize); } size_t Size() const { debug_assert(m_magic == lfh_magic); size_t size = sizeof(LFH); size += read_le16(&m_fn_len); size += read_le16(&m_e_len); // note: LFH doesn't have a comment field! return size; } private: u32 m_magic; u16 m_x1; // version needed u16 m_flags; u16 m_method; u32 m_fat_mtime; // last modified time (DOS FAT format) u32 m_crc; u32 m_csize; u32 m_usize; u16 m_fn_len; u16 m_e_len; }; cassert(sizeof(LFH) == 30); class CDFH { public: void Init(const FileInfo& fileInfo, off_t ofs, off_t csize, ZipMethod method, u32 checksum, const Path& pathname, size_t slack) { const std::string pathnameUTF8 = utf8_from_wstring(pathname.string()); const size_t pathnameLength = pathnameUTF8.length(); m_magic = cdfh_magic; m_x1 = to_le32(0); m_flags = to_le16(0); m_method = to_le16(u16_from_larger(method)); m_fat_mtime = to_le32(FAT_from_time_t(fileInfo.MTime())); m_crc = to_le32(checksum); m_csize = to_le32(u32_from_larger(csize)); m_usize = to_le32(u32_from_larger(fileInfo.Size())); m_fn_len = to_le16(u16_from_larger(pathnameLength)); m_e_len = to_le16(0); m_c_len = to_le16(u16_from_larger((size_t)slack)); m_x2 = to_le32(0); m_x3 = to_le32(0); m_lfh_ofs = to_le32(u32_from_larger(ofs)); memcpy((char*)this + sizeof(CDFH), pathnameUTF8.c_str(), pathnameLength); } Path Pathname() const { const size_t length = (size_t)read_le16(&m_fn_len); const char* pathname = (const char*)this + sizeof(CDFH); // not 0-terminated! return Path(std::string(pathname, length)); } off_t HeaderOffset() const { return read_le32(&m_lfh_ofs); } off_t USize() const { return (off_t)read_le32(&m_usize); } off_t CSize() const { return (off_t)read_le32(&m_csize); } ZipMethod Method() const { return (ZipMethod)read_le16(&m_method); } u32 Checksum() const { return read_le32(&m_crc); } time_t MTime() const { const u32 fat_mtime = read_le32(&m_fat_mtime); return time_t_from_FAT(fat_mtime); } size_t Size() const { size_t size = sizeof(CDFH); size += read_le16(&m_fn_len); size += read_le16(&m_e_len); size += read_le16(&m_c_len); return size; } private: u32 m_magic; u32 m_x1; // versions u16 m_flags; u16 m_method; u32 m_fat_mtime; // last modified time (DOS FAT format) u32 m_crc; u32 m_csize; u32 m_usize; u16 m_fn_len; u16 m_e_len; u16 m_c_len; u32 m_x2; // spanning u32 m_x3; // attributes u32 m_lfh_ofs; }; cassert(sizeof(CDFH) == 46); class ECDR { public: void Init(size_t cd_numEntries, off_t cd_ofs, size_t cd_size) { m_magic = ecdr_magic; m_diskNum = to_le16(0); m_cd_diskNum = to_le16(0); m_cd_numEntriesOnDisk = to_le16(u16_from_larger(cd_numEntries)); m_cd_numEntries = m_cd_numEntriesOnDisk; m_cd_size = to_le32(u32_from_larger(cd_size)); m_cd_ofs = to_le32(u32_from_larger(cd_ofs)); m_comment_len = to_le16(0); } void Decompose(size_t& cd_numEntries, off_t& cd_ofs, size_t& cd_size) const { cd_numEntries = (size_t)read_le16(&m_cd_numEntries); cd_ofs = (off_t)read_le32(&m_cd_ofs); cd_size = (size_t)read_le32(&m_cd_size); } private: u32 m_magic; u16 m_diskNum; u16 m_cd_diskNum; u16 m_cd_numEntriesOnDisk; u16 m_cd_numEntries; u32 m_cd_size; u32 m_cd_ofs; u16 m_comment_len; }; cassert(sizeof(ECDR) == 22); #pragma pack(pop) //----------------------------------------------------------------------------- // ArchiveFile_Zip //----------------------------------------------------------------------------- class ArchiveFile_Zip : public IArchiveFile { public: ArchiveFile_Zip(const PFile& file, off_t ofs, off_t csize, u32 checksum, ZipMethod method) : m_file(file), m_ofs(ofs) , m_csize(csize), m_checksum(checksum), m_method((u16)method) , m_flags(NeedsFixup) { } virtual size_t Precedence() const { return 2u; } virtual wchar_t LocationCode() const { return 'A'; } virtual OsPath Path() const { return m_file->Pathname(); } virtual LibError Load(const OsPath& UNUSED(name), const shared_ptr& buf, size_t size) const { AdjustOffset(); PICodec codec; switch(m_method) { case ZIP_METHOD_NONE: codec = CreateCodec_ZLibNone(); break; case ZIP_METHOD_DEFLATE: codec = CreateDecompressor_ZLibDeflate(); break; default: WARN_RETURN(ERR::ARCHIVE_UNKNOWN_METHOD); } Stream stream(codec); stream.SetOutputBuffer(buf.get(), size); - RETURN_ERR(io_Scan(m_file, m_ofs, m_csize, FeedStream, (uintptr_t)&stream)); + io::Operation op(*m_file.get(), 0, m_csize, m_ofs); + RETURN_ERR(io::Run(op, io::Parameters(), std::bind(&Stream::Feed, &stream, std::placeholders::_1, std::placeholders::_2))); RETURN_ERR(stream.Finish()); #if CODEC_COMPUTE_CHECKSUM debug_assert(m_checksum == stream.Checksum()); #endif return INFO::OK; } private: enum Flags { // indicates m_ofs points to a "local file header" instead of // the file data. a fixup routine is called when reading the file; // it skips past the LFH and clears this flag. // this is somewhat of a hack, but vital to archive open performance. // without it, we'd have to scan through the entire archive file, // which can take *seconds*. // (we cannot use the information in CDFH, because its 'extra' field // has been observed to differ from that of the LFH) // since we read the LFH right before the rest of the file, the block // cache will absorb the IO cost. NeedsFixup = 1 }; struct LFH_Copier { - u8* lfh_dst; - size_t lfh_bytes_remaining; - }; + LFH_Copier(u8* lfh_dst, size_t lfh_bytes_remaining) + : lfh_dst(lfh_dst), lfh_bytes_remaining(lfh_bytes_remaining) + { + } - // this code grabs an LFH struct from file block(s) that are - // passed to the callback. usually, one call copies the whole thing, - // but the LFH may straddle a block boundary. - // - // rationale: this allows using temp buffers for zip_fixup_lfh, - // which avoids involving the file buffer manager and thus - // avoids cluttering the trace and cache contents. - static LibError lfh_copier_cb(uintptr_t cbData, const u8* block, size_t size) - { - LFH_Copier* p = (LFH_Copier*)cbData; - - debug_assert(size <= p->lfh_bytes_remaining); - memcpy(p->lfh_dst, block, size); - p->lfh_dst += size; - p->lfh_bytes_remaining -= size; + // this code grabs an LFH struct from file block(s) that are + // passed to the callback. usually, one call copies the whole thing, + // but the LFH may straddle a block boundary. + // + // rationale: this allows using temp buffers for zip_fixup_lfh, + // which avoids involving the file buffer manager and thus + // avoids cluttering the trace and cache contents. + LibError operator()(const u8* block, size_t size) const + { + debug_assert(size <= lfh_bytes_remaining); + memcpy(lfh_dst, block, size); + lfh_dst += size; + lfh_bytes_remaining -= size; - return INFO::CB_CONTINUE; - } + return INFO::CB_CONTINUE; + } + + mutable u8* lfh_dst; + mutable size_t lfh_bytes_remaining; + }; /** * fix up m_ofs (adjust it to point to cdata instead of the LFH). * * note: we cannot use CDFH filename and extra field lengths to skip * past LFH since that may not mirror CDFH (has happened). * * this is called at file-open time instead of while mounting to * reduce seeks: since reading the file will typically follow, the * block cache entirely absorbs the IO cost. **/ void AdjustOffset() const { if(!(m_flags & NeedsFixup)) return; m_flags &= ~NeedsFixup; // performance note: this ends up reading one file block, which is // only in the block cache if the file starts in the same block as a // previously read file (i.e. both are small). LFH lfh; - LFH_Copier params = { (u8*)&lfh, sizeof(LFH) }; - if(io_Scan(m_file, m_ofs, sizeof(LFH), lfh_copier_cb, (uintptr_t)¶ms) == INFO::OK) + io::Operation op(*m_file.get(), 0, sizeof(LFH), m_ofs); + if(io::Run(op, io::Parameters(), LFH_Copier((u8*)&lfh, sizeof(LFH))) == INFO::OK) m_ofs += (off_t)lfh.Size(); } PFile m_file; // all relevant LFH/CDFH fields not covered by FileInfo mutable off_t m_ofs; off_t m_csize; u32 m_checksum; u16 m_method; mutable u16 m_flags; }; //----------------------------------------------------------------------------- // ArchiveReader_Zip //----------------------------------------------------------------------------- class ArchiveReader_Zip : public IArchiveReader { public: ArchiveReader_Zip(const OsPath& pathname) - : m_file(new File(pathname, 'r')) + : m_file(new File(pathname, LIO_READ)) { FileInfo fileInfo; GetFileInfo(pathname, &fileInfo); m_fileSize = fileInfo.Size(); const size_t minFileSize = sizeof(LFH)+sizeof(CDFH)+sizeof(ECDR); debug_assert(m_fileSize >= off_t(minFileSize)); } virtual LibError ReadEntries(ArchiveEntryCallback cb, uintptr_t cbData) { // locate and read Central Directory off_t cd_ofs = 0; size_t cd_numEntries = 0; size_t cd_size = 0; RETURN_ERR(LocateCentralDirectory(m_file, m_fileSize, cd_ofs, cd_numEntries, cd_size)); - shared_ptr buf = io_Allocate(cd_size, cd_ofs); - u8* cd; - RETURN_ERR(io_Read(m_file, cd_ofs, buf.get(), cd_size, cd)); + UniqueRange buf(io::Allocate(cd_size)); + + io::Operation op(*m_file.get(), buf.get(), cd_size, cd_ofs); + RETURN_ERR(io::Run(op)); // iterate over Central Directory - const u8* pos = cd; + const u8* pos = (const u8*)buf.get(); for(size_t i = 0; i < cd_numEntries; i++) { // scan for next CDFH - CDFH* cdfh = (CDFH*)FindRecord(cd, cd_size, pos, cdfh_magic, sizeof(CDFH)); + CDFH* cdfh = (CDFH*)FindRecord((const u8*)buf.get(), cd_size, pos, cdfh_magic, sizeof(CDFH)); if(!cdfh) WARN_RETURN(ERR::CORRUPTED); const Path relativePathname(cdfh->Pathname()); if(!relativePathname.IsDirectory()) { const OsPath name = relativePathname.Filename(); FileInfo fileInfo(name, cdfh->USize(), cdfh->MTime()); shared_ptr archiveFile(new ArchiveFile_Zip(m_file, cdfh->HeaderOffset(), cdfh->CSize(), cdfh->Checksum(), cdfh->Method())); cb(relativePathname, fileInfo, archiveFile, cbData); } pos += cdfh->Size(); } return INFO::OK; } private: /** * Scan buffer for a Zip file record. * * @param buf * @param size * @param start position within buffer * @param magic signature of record * @param recordSize size of record (including signature) * @return pointer to record within buffer or 0 if not found. **/ static const u8* FindRecord(const u8* buf, size_t size, const u8* start, u32 magic, size_t recordSize) { // (don't use as the counter - otherwise we can't tell if // scanning within the buffer was necessary.) for(const u8* p = start; p <= buf+size-recordSize; p++) { // found it if(*(u32*)p == magic) { debug_assert(p == start); // otherwise, the archive is a bit broken return p; } } // passed EOF, didn't find it. // note: do not warn - this happens in the initial ECDR search at // EOF if the archive contains a comment field. return 0; } // search for ECDR in the last bytes of the file. // if found, fill with a copy of the (little-endian) ECDR and // return INFO::OK, otherwise IO error or ERR::CORRUPTED. static LibError ScanForEcdr(const PFile& file, off_t fileSize, u8* buf, size_t maxScanSize, size_t& cd_numEntries, off_t& cd_ofs, size_t& cd_size) { // don't scan more than the entire file const size_t scanSize = std::min(maxScanSize, size_t(fileSize)); // read desired chunk of file into memory const off_t ofs = fileSize - off_t(scanSize); - u8* data; - RETURN_ERR(io_Read(file, ofs, buf, scanSize, data)); + io::Operation op(*file.get(), buf, scanSize, ofs); + RETURN_ERR(io::Run(op)); // look for ECDR in buffer - const ECDR* ecdr = (const ECDR*)FindRecord(data, scanSize, data, ecdr_magic, sizeof(ECDR)); + const ECDR* ecdr = (const ECDR*)FindRecord(buf, scanSize, buf, ecdr_magic, sizeof(ECDR)); if(!ecdr) return INFO::CANNOT_HANDLE; ecdr->Decompose(cd_numEntries, cd_ofs, cd_size); return INFO::OK; } static LibError LocateCentralDirectory(const PFile& file, off_t fileSize, off_t& cd_ofs, size_t& cd_numEntries, size_t& cd_size) { const size_t maxScanSize = 66000u; // see below - shared_ptr buf = io_Allocate(maxScanSize, BLOCK_SIZE-1); // assume worst-case for alignment + UniqueRange buf(io::Allocate(maxScanSize)); // expected case: ECDR at EOF; no file comment - LibError ret = ScanForEcdr(file, fileSize, const_cast(buf.get()), sizeof(ECDR), cd_numEntries, cd_ofs, cd_size); + LibError ret = ScanForEcdr(file, fileSize, (u8*)buf.get(), sizeof(ECDR), cd_numEntries, cd_ofs, cd_size); if(ret == INFO::OK) return INFO::OK; // worst case: ECDR precedes 64 KiB of file comment - ret = ScanForEcdr(file, fileSize, const_cast(buf.get()), maxScanSize, cd_numEntries, cd_ofs, cd_size); + ret = ScanForEcdr(file, fileSize, (u8*)buf.get(), maxScanSize, cd_numEntries, cd_ofs, cd_size); if(ret == INFO::OK) return INFO::OK; // both ECDR scans failed - this is not a valid Zip file. - RETURN_ERR(io_ReadAligned(file, 0, const_cast(buf.get()), sizeof(LFH))); + io::Operation op(*file.get(), buf.get(), sizeof(LFH)); + RETURN_ERR(io::Run(op)); // the Zip file has an LFH but lacks an ECDR. this can happen if // the user hard-exits while an archive is being written. // notes: // - return ERR::CORRUPTED so VFS will not include this file. // - we could work around this by scanning all LFHs, but won't bother // because it'd be slow. // - do not warn - the corrupt archive will be deleted on next // successful archive builder run anyway. - if(FindRecord(buf.get(), sizeof(LFH), buf.get(), lfh_magic, sizeof(LFH))) + if(FindRecord((const u8*)buf.get(), sizeof(LFH), (const u8*)buf.get(), lfh_magic, sizeof(LFH))) return ERR::CORRUPTED; // NOWARN // totally bogus else WARN_RETURN(ERR::ARCHIVE_UNKNOWN_FORMAT); } PFile m_file; off_t m_fileSize; }; PIArchiveReader CreateArchiveReader_Zip(const OsPath& archivePathname) { return PIArchiveReader(new ArchiveReader_Zip(archivePathname)); } //----------------------------------------------------------------------------- // ArchiveWriter_Zip //----------------------------------------------------------------------------- class ArchiveWriter_Zip : public IArchiveWriter { public: ArchiveWriter_Zip(const OsPath& archivePathname, bool noDeflate) - : m_file(new File(archivePathname, 'w')), m_fileSize(0) - , m_unalignedWriter(new UnalignedWriter(m_file, 0)) + : m_file(new File(archivePathname, LIO_WRITE)), m_fileSize(0) , m_numEntries(0), m_noDeflate(noDeflate) { THROW_ERR(pool_create(&m_cdfhPool, 10*MiB, 0)); } ~ArchiveWriter_Zip() { // append an ECDR to the CDFH list (this allows us to // write out both to the archive file in one burst) const size_t cd_size = m_cdfhPool.da.pos; ECDR* ecdr = (ECDR*)pool_alloc(&m_cdfhPool, sizeof(ECDR)); if(!ecdr) throw std::bad_alloc(); const off_t cd_ofs = m_fileSize; ecdr->Init(m_numEntries, cd_ofs, cd_size); - m_unalignedWriter->Append(m_cdfhPool.da.base, cd_size+sizeof(ECDR)); - m_unalignedWriter->Flush(); - m_unalignedWriter.reset(); + write(m_file->Descriptor(), m_cdfhPool.da.base, cd_size+sizeof(ECDR)); (void)pool_destroy(&m_cdfhPool); - - const OsPath pathname = m_file->Pathname(); // (must be retrieved before resetting m_file) - m_file.reset(); - - m_fileSize += off_t(cd_size+sizeof(ECDR)); - - // remove padding added by UnalignedWriter - wtruncate(pathname, m_fileSize); } LibError AddFile(const OsPath& pathname, const OsPath& pathnameInArchive) { FileInfo fileInfo; RETURN_ERR(GetFileInfo(pathname, &fileInfo)); const off_t usize = fileInfo.Size(); // skip 0-length files. // rationale: zip.cpp needs to determine whether a CDFH entry is // a file or directory (the latter are written by some programs but // not needed - they'd only pollute the file table). // it looks like checking for usize=csize=0 is the safest way - // relying on file attributes (which are system-dependent!) is // even less safe. // we thus skip 0-length files to avoid confusing them with directories. if(!usize) return INFO::SKIPPED; PFile file(new File); - RETURN_ERR(file->Open(pathname, 'r')); + RETURN_ERR(file->Open(pathname, LIO_READ)); const size_t pathnameLength = pathnameInArchive.string().length(); // choose method and the corresponding codec ZipMethod method; PICodec codec; if(m_noDeflate || IsFileTypeIncompressible(pathnameInArchive)) { method = ZIP_METHOD_NONE; codec = CreateCodec_ZLibNone(); } else { method = ZIP_METHOD_DEFLATE; codec = CreateCompressor_ZLibDeflate(); } // allocate memory const size_t csizeMax = codec->MaxOutputSize(size_t(usize)); - shared_ptr buf = io_Allocate(sizeof(LFH) + pathnameLength + csizeMax); + UniqueRange buf(io::Allocate(sizeof(LFH) + pathnameLength + csizeMax)); // read and compress file contents size_t csize; u32 checksum; { u8* cdata = (u8*)buf.get() + sizeof(LFH) + pathnameLength; Stream stream(codec); stream.SetOutputBuffer(cdata, csizeMax); - RETURN_ERR(io_Scan(file, 0, usize, FeedStream, (uintptr_t)&stream)); + io::Operation op(*file.get(), 0, usize); + RETURN_ERR(io::Run(op, io::Parameters(), std::bind(&Stream::Feed, &stream, std::placeholders::_1, std::placeholders::_2))); RETURN_ERR(stream.Finish()); csize = stream.OutSize(); checksum = stream.Checksum(); } // build LFH { LFH* lfh = (LFH*)buf.get(); lfh->Init(fileInfo, (off_t)csize, method, checksum, pathnameInArchive); } // append a CDFH to the central directory (in memory) const off_t ofs = m_fileSize; const size_t prev_pos = m_cdfhPool.da.pos; // (required to determine padding size) const size_t cdfhSize = sizeof(CDFH) + pathnameLength; CDFH* cdfh = (CDFH*)pool_alloc(&m_cdfhPool, cdfhSize); if(!cdfh) WARN_RETURN(ERR::NO_MEM); const size_t slack = m_cdfhPool.da.pos - prev_pos - cdfhSize; cdfh->Init(fileInfo, ofs, (off_t)csize, method, checksum, pathnameInArchive, slack); m_numEntries++; // write LFH, pathname and cdata to file const size_t packageSize = sizeof(LFH) + pathnameLength + csize; - RETURN_ERR(m_unalignedWriter->Append(buf.get(), packageSize)); + if(write(m_file->Descriptor(), buf.get(), packageSize) < 0) + WARN_RETURN(ERR::IO); m_fileSize += (off_t)packageSize; return INFO::OK; } private: static bool IsFileTypeIncompressible(const OsPath& pathname) { const OsPath extension = pathname.Extension(); // file extensions that we don't want to compress static const wchar_t* incompressibleExtensions[] = { L".zip", L".rar", L".jpg", L".jpeg", L".png", L".ogg", L".mp3" }; for(size_t i = 0; i < ARRAY_SIZE(incompressibleExtensions); i++) { if(extension == incompressibleExtensions[i]) return true; } return false; } PFile m_file; off_t m_fileSize; - PUnalignedWriter m_unalignedWriter; Pool m_cdfhPool; size_t m_numEntries; bool m_noDeflate; }; PIArchiveWriter CreateArchiveWriter_Zip(const OsPath& archivePathname, bool noDeflate) { return PIArchiveWriter(new ArchiveWriter_Zip(archivePathname, noDeflate)); } Index: ps/trunk/source/lib/file/archive/stream.cpp =================================================================== --- ps/trunk/source/lib/file/archive/stream.cpp (revision 9349) +++ ps/trunk/source/lib/file/archive/stream.cpp (revision 9350) @@ -1,147 +1,138 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "precompiled.h" #include "lib/file/archive/stream.h" #include "lib/allocators/allocators.h" // page_aligned_alloc #include "lib/allocators/shared_ptr.h" #include "lib/file/archive/codec.h" //#include "lib/timer.h" //TIMER_ADD_CLIENT(tc_stream); OutputBufferManager::OutputBufferManager() { Reset(); } void OutputBufferManager::Reset() { m_buffer = 0; m_size = 0; m_capacity = 0; } void OutputBufferManager::SetBuffer(u8* buffer, size_t size) { debug_assert(IsAllowableBuffer(buffer, size)); m_buffer = buffer; m_size = size; } void OutputBufferManager::AllocateBuffer(size_t size) { // notes: // - this implementation allows reusing previous buffers if they // are big enough, which reduces the number of allocations. // - no further attempts to reduce allocations (e.g. by doubling // the current size) are made; this strategy is enough. // - Pool etc. cannot be used because files may be huge (larger // than the address space of 32-bit systems). // no buffer or the previous one wasn't big enough: reallocate if(!m_mem || m_capacity < size) { m_mem.reset((u8*)page_aligned_alloc(size), PageAlignedDeleter(size)); m_capacity = size; } SetBuffer(m_mem.get(), size); } bool OutputBufferManager::IsAllowableBuffer(u8* buffer, size_t size) { // none yet established if(m_buffer == 0 && m_size == 0) return true; // same as last time (happens with temp buffers) if(m_buffer == buffer && m_size == size) return true; // located after the last buffer (note: not necessarily after // the entire buffer; a lack of input can cause the output buffer // to only partially be used before the next call.) if((unsigned)(buffer - m_buffer) <= m_size) return true; return false; } //----------------------------------------------------------------------------- Stream::Stream(const PICodec& codec) : m_codec(codec) , m_inConsumed(0), m_outProduced(0) { } void Stream::AllocateOutputBuffer(size_t outSizeMax) { m_outputBufferManager.AllocateBuffer(outSizeMax); } void Stream::SetOutputBuffer(u8* out, size_t outSize) { m_outputBufferManager.SetBuffer(out, outSize); } LibError Stream::Feed(const u8* in, size_t inSize) { if(m_outProduced == m_outputBufferManager.Size()) // output buffer full; must not call Process return INFO::OK; size_t inConsumed, outProduced; u8* const out = m_outputBufferManager.Buffer() + m_outProduced; const size_t outSize = m_outputBufferManager.Size() - m_outProduced; RETURN_ERR(m_codec->Process(in, inSize, out, outSize, inConsumed, outProduced)); m_inConsumed += inConsumed; m_outProduced += outProduced; return INFO::CB_CONTINUE; } LibError Stream::Finish() { size_t outProduced; RETURN_ERR(m_codec->Finish(m_checksum, outProduced)); m_outProduced += outProduced; return INFO::OK; } - - -LibError FeedStream(uintptr_t cbData, const u8* in, size_t inSize) -{ -// TIMER_ACCRUE(tc_stream); - - Stream& stream = *(Stream*)cbData; - return stream.Feed(in, inSize); -} Index: ps/trunk/source/lib/file/file.h =================================================================== --- ps/trunk/source/lib/file/file.h (revision 9349) +++ ps/trunk/source/lib/file/file.h (revision 9350) @@ -1,114 +1,95 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * simple POSIX file wrapper. */ #ifndef INCLUDED_FILE #define INCLUDED_FILE -struct aiocb; - #include "lib/os_path.h" +#include "lib/posix/posix_aio.h" // opcode: LIO_READ or LIO_WRITE namespace ERR { const LibError FILE_ACCESS = -110300; - const LibError IO = -110301; -} - -namespace FileImpl -{ - LIB_API LibError Open(const OsPath& pathname, wchar_t mode, int& fd); - LIB_API void Close(int& fd); - LIB_API LibError IO(int fd, wchar_t mode, off_t ofs, u8* buf, size_t size); - LIB_API LibError Issue(aiocb& req, int fd, wchar_t mode, off_t alignedOfs, u8* alignedBuf, size_t alignedSize); - LIB_API LibError WaitUntilComplete(aiocb& req, u8*& alignedBuf, size_t& alignedSize); } +LIB_API LibError FileOpen(const OsPath& pathname, int opcode, int& fd); +LIB_API void FileClose(int& fd); class File { public: File() - : m_pathname(), m_fd(0) - { - } - - LibError Open(const OsPath& pathname, wchar_t mode) - { - RETURN_ERR(FileImpl::Open(pathname, mode, m_fd)); - m_pathname = pathname; - m_mode = mode; - return INFO::OK; - } - - void Close() + : pathname(), fd(-1) { - FileImpl::Close(m_fd); } - File(const OsPath& pathname, wchar_t mode) + File(const OsPath& pathname, int opcode) { - (void)Open(pathname, mode); + (void)Open(pathname, opcode); } ~File() { Close(); } - const OsPath& Pathname() const + LibError Open(const OsPath& pathname, int opcode) { - return m_pathname; + RETURN_ERR(FileOpen(pathname, opcode, fd)); + this->pathname = pathname; + this->opcode = opcode; + return INFO::OK; } - wchar_t Mode() const + void Close() { - return m_mode; + FileClose(fd); } - LibError Issue(aiocb& req, wchar_t mode, off_t alignedOfs, u8* alignedBuf, size_t alignedSize) const + const OsPath& Pathname() const { - return FileImpl::Issue(req, m_fd, mode, alignedOfs, alignedBuf, alignedSize); + return pathname; } - LibError Write(off_t ofs, const u8* buf, size_t size) + int Descriptor() const { - return FileImpl::IO(m_fd, 'w', ofs, const_cast(buf), size); + return fd; } - LibError Read(off_t ofs, u8* buf, size_t size) const + int Opcode() const { - return FileImpl::IO(m_fd, 'r', ofs, buf, size); + return opcode; } private: - OsPath m_pathname; - int m_fd; - wchar_t m_mode; + OsPath pathname; + int fd; + int opcode; }; typedef shared_ptr PFile; #endif // #ifndef INCLUDED_FILE Index: ps/trunk/source/lib/file/io/write_buffer.cpp =================================================================== --- ps/trunk/source/lib/file/io/write_buffer.cpp (revision 9349) +++ ps/trunk/source/lib/file/io/write_buffer.cpp (revision 9350) @@ -1,125 +1,134 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "precompiled.h" #include "lib/file/io/write_buffer.h" #include "lib/bits.h" // IsAligned #include "lib/sysdep/cpu.h" +#include "lib/allocators/shared_ptr.h" #include "lib/file/io/io.h" -#include "lib/file/io/io_align.h" + + +static const size_t BLOCK_SIZE = 512*KiB; WriteBuffer::WriteBuffer() - : m_capacity(4096), m_data(io_Allocate(m_capacity)), m_size(0) + : m_capacity(pageSize), m_data((u8*)rtl_AllocateAligned(m_capacity, maxSectorSize), AlignedDeleter()), m_size(0) { } void WriteBuffer::Append(const void* data, size_t size) { if(m_size + size > m_capacity) { m_capacity = round_up_to_pow2(m_size + size); - shared_ptr newData = io_Allocate(m_capacity); + shared_ptr newData; + AllocateAligned(newData, m_capacity, maxSectorSize); memcpy(newData.get(), m_data.get(), m_size); m_data = newData; } memcpy(m_data.get() + m_size, data, size); m_size += size; } void WriteBuffer::Overwrite(const void* data, size_t size, size_t offset) { debug_assert(offset+size < m_size); memcpy(m_data.get()+offset, data, size); } //----------------------------------------------------------------------------- // UnalignedWriter //----------------------------------------------------------------------------- UnalignedWriter::UnalignedWriter(const PFile& file, off_t ofs) - : m_file(file), m_alignedBuf(io_Allocate(BLOCK_SIZE)) + : m_file(file), m_alignedBuf((u8*)rtl_AllocateAligned(BLOCK_SIZE, maxSectorSize), AlignedDeleter()) { - m_alignedOfs = AlignedOffset(ofs); + m_alignedOfs = round_down(ofs, (off_t)BLOCK_SIZE); const size_t misalignment = (size_t)(ofs - m_alignedOfs); if(misalignment) - io_ReadAligned(m_file, m_alignedOfs, m_alignedBuf.get(), BLOCK_SIZE); + { + io::Operation op(*m_file.get(), m_alignedBuf.get(), BLOCK_SIZE, m_alignedOfs); + THROW_ERR(io::Run(op)); + } m_bytesUsed = misalignment; } UnalignedWriter::~UnalignedWriter() { Flush(); } LibError UnalignedWriter::Append(const u8* data, size_t size) const { while(size != 0) { // optimization: write directly from the input buffer, if possible const size_t alignedSize = (size / BLOCK_SIZE) * BLOCK_SIZE; - if(m_bytesUsed == 0 && IsAligned(data, SECTOR_SIZE) && alignedSize != 0) + if(m_bytesUsed == 0 && IsAligned(data, maxSectorSize) && alignedSize != 0) { - RETURN_ERR(io_WriteAligned(m_file, m_alignedOfs, data, alignedSize)); + io::Operation op(*m_file.get(), (void*)data, alignedSize, m_alignedOfs); + RETURN_ERR(io::Run(op)); m_alignedOfs += (off_t)alignedSize; data += alignedSize; size -= alignedSize; } const size_t chunkSize = std::min(size, BLOCK_SIZE-m_bytesUsed); memcpy(m_alignedBuf.get()+m_bytesUsed, data, chunkSize); m_bytesUsed += chunkSize; data += chunkSize; size -= chunkSize; if(m_bytesUsed == BLOCK_SIZE) RETURN_ERR(WriteBlock()); } return INFO::OK; } void UnalignedWriter::Flush() const { if(m_bytesUsed) { memset(m_alignedBuf.get()+m_bytesUsed, 0, BLOCK_SIZE-m_bytesUsed); (void)WriteBlock(); } } LibError UnalignedWriter::WriteBlock() const { - RETURN_ERR(io_WriteAligned(m_file, m_alignedOfs, m_alignedBuf.get(), BLOCK_SIZE)); + io::Operation op(*m_file.get(), m_alignedBuf.get(), BLOCK_SIZE, m_alignedOfs); + RETURN_ERR(io::Run(op)); m_alignedOfs += BLOCK_SIZE; m_bytesUsed = 0; return INFO::OK; } Index: ps/trunk/source/lib/file/io/io.cpp =================================================================== --- ps/trunk/source/lib/file/io/io.cpp (revision 9349) +++ ps/trunk/source/lib/file/io/io.cpp (revision 9350) @@ -1,341 +1,138 @@ -/* Copyright (c) 2010 Wildfire Games +/* Copyright (c) 2011 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "precompiled.h" #include "lib/file/io/io.h" -#include "lib/posix/posix_aio.h" -#include "lib/allocators/allocators.h" // AllocatorChecker -#include "lib/file/file.h" -#include "lib/file/common/file_stats.h" -#include "lib/file/io/block_cache.h" -#include "lib/file/io/io_align.h" - -static const size_t ioDepth = 16; +#include "lib/sysdep/rtl.h" +ERROR_ASSOCIATE(ERR::IO, L"Error during IO", EIO); -// the underlying aio implementation likes buffer and offset to be -// sector-aligned; if not, the transfer goes through an align buffer, -// and requires an extra memcpy. +namespace io { + +// the Windows aio implementation requires buffer and offset to be +// sector-aligned. // // if the user specifies an unaligned buffer, there's not much we can // do - we can't assume the buffer contains padding. therefore, // callers should let us allocate the buffer if possible. // // if ofs misalign = buffer, only the first and last blocks will need // to be copied by aio, since we read up to the next block boundary. // otherwise, everything will have to be copied; at least we split // the read into blocks, so aio's buffer won't have to cover the // whole file. // we don't do any caching or alignment here - this is just a thin // AIO wrapper. rationale: // - aligning the transfer isn't possible here since we have no control // over the buffer, i.e. we cannot read more data than requested. -// instead, this is done in io_manager. +// instead, this is done in manager. // - transfer sizes here are arbitrary (i.e. not block-aligned); // that means the cache would have to handle this or also split them up -// into blocks, which would duplicate the abovementioned work. +// into blocks, which would duplicate the above mentioned work. // - if caching here, we'd also have to handle "forwarding" (i.e. // desired block has been issued but isn't yet complete). again, it -// is easier to let the synchronous io_manager handle this. -// - finally, io_manager knows more about whether the block should be cached +// is easier to let the synchronous manager handle this. +// - finally, manager knows more about whether the block should be cached // (e.g. whether another block request will follow), but we don't // currently make use of this. // // disadvantages: // - streamed data will always be read from disk. that's not a problem, // because such data (e.g. music, long speech) is unlikely to be used // again soon. // - prefetching (issuing the next few blocks from archive/file during // idle time to satisfy potential future IOs) requires extra buffers; // this is a bit more complicated than just using the cache as storage. -//----------------------------------------------------------------------------- -// allocator -//----------------------------------------------------------------------------- - -#ifndef NDEBUG -static AllocatorChecker allocatorChecker; -#endif - -class IoDeleter -{ -public: - IoDeleter(size_t paddedSize) - : m_paddedSize(paddedSize) - { - } - - void operator()(u8* mem) - { - debug_assert(m_paddedSize != 0); -#ifndef NDEBUG - allocatorChecker.OnDeallocate(mem, m_paddedSize); -#endif - page_aligned_free(mem, m_paddedSize); - m_paddedSize = 0; - } - -private: - size_t m_paddedSize; -}; - - -shared_ptr io_Allocate(size_t size, off_t ofs) +UniqueRange Allocate(size_t size, size_t alignment) { - debug_assert(size != 0); - - const size_t paddedSize = (size_t)PaddedSize(size, ofs); - u8* mem = (u8*)page_aligned_alloc(paddedSize); - if(!mem) - throw std::bad_alloc(); - -#ifndef NDEBUG - allocatorChecker.OnAllocate(mem, paddedSize); -#endif - - return shared_ptr(mem, IoDeleter(paddedSize)); + debug_assert(is_pow2(alignment)); + if(alignment <= idxDeleterBits) + alignment = idxDeleterBits+1; + + const size_t alignedSize = round_up(size, alignment); + const UniqueRange::pointer p = rtl_AllocateAligned(alignedSize, alignment); + return UniqueRange(p, size, idxDeleterAligned); } -//----------------------------------------------------------------------------- -// BlockIo -//----------------------------------------------------------------------------- - -class BlockIo +LibError Issue(aiocb& cb, size_t queueDepth) { -public: - LibError Issue(const PFile& file, off_t alignedOfs, u8* alignedBuf) +#if CONFIG2_FILE_ENABLE_AIO + if(queueDepth > 1) { - m_blockId = BlockId(file->Pathname(), alignedOfs); - if(file->Mode() == 'r') - { - if(s_blockCache.Retrieve(m_blockId, m_cachedBlock)) - { - stats_block_cache(CR_HIT); - - // copy from cache into user buffer - if(alignedBuf) - { - memcpy(alignedBuf, m_cachedBlock.get(), BLOCK_SIZE); - m_alignedBuf = alignedBuf; - } - // return cached block - else - { - m_alignedBuf = const_cast(m_cachedBlock.get()); - } - - return INFO::OK; - } - else - { - stats_block_cache(CR_MISS); - // fall through to the actual issue.. - } - } - - stats_io_check_seek(m_blockId); - - // transfer directly to/from user buffer - if(alignedBuf) - { - m_alignedBuf = alignedBuf; - } - // transfer into newly allocated temporary block - else - { - m_tempBlock = io_Allocate(BLOCK_SIZE); - m_alignedBuf = const_cast(m_tempBlock.get()); - } - - return file->Issue(m_req, file->Mode(), alignedOfs, m_alignedBuf, BLOCK_SIZE); + const int ret = (cb.aio_lio_opcode == LIO_WRITE)? aio_write(&cb): aio_read(&cb); + RETURN_ERR(LibError_from_posix(ret)); } - - LibError WaitUntilComplete(const u8*& block, size_t& blockSize) + else +#endif { - if(m_cachedBlock) - { - block = m_alignedBuf; - blockSize = BLOCK_SIZE; - return INFO::OK; - } - - RETURN_ERR(FileImpl::WaitUntilComplete(m_req, const_cast(block), blockSize)); + debug_assert(lseek(cb.aio_fildes, cb.aio_offset, SEEK_SET) == cb.aio_offset); - if(m_tempBlock) - s_blockCache.Add(m_blockId, m_tempBlock); + void* buf = (void*)cb.aio_buf; // cast from volatile void* + const ssize_t bytesTransferred = (cb.aio_lio_opcode == LIO_WRITE)? write(cb.aio_fildes, buf, cb.aio_nbytes) : read(cb.aio_fildes, buf, cb.aio_nbytes); + if(bytesTransferred < 0) + return LibError_from_errno(); - return INFO::OK; + cb.aio_nbytes = (size_t)bytesTransferred; } -private: - static BlockCache s_blockCache; - - BlockId m_blockId; - - // the address that WaitUntilComplete will return - // (cached or temporary block, or user buffer) - u8* m_alignedBuf; - - shared_ptr m_cachedBlock; - shared_ptr m_tempBlock; - - aiocb m_req; -}; - -BlockCache BlockIo::s_blockCache; - + return INFO::OK; +} -//----------------------------------------------------------------------------- -// IoSplitter -//----------------------------------------------------------------------------- -class IoSplitter +LibError WaitUntilComplete(aiocb& cb, size_t queueDepth) { - NONCOPYABLE(IoSplitter); -public: - IoSplitter(off_t ofs, u8* alignedBuf, off_t size) - : m_ofs(ofs), m_alignedBuf(alignedBuf), m_size(size) - , m_totalIssued(0), m_totalTransferred(0) - { - m_alignedOfs = AlignedOffset(ofs); - m_alignedSize = PaddedSize(size, ofs); - m_misalignment = size_t(ofs - m_alignedOfs); - } - - LibError Run(const PFile& file, IoCallback cb = 0, uintptr_t cbData = 0) - { - ScopedIoMonitor monitor; - - // (issue even if cache hit because blocks must be processed in order) - std::deque pendingIos; - for(;;) - { - while(pendingIos.size() < ioDepth && m_totalIssued < m_alignedSize) - { - pendingIos.push_back(BlockIo()); - const off_t alignedOfs = m_alignedOfs + m_totalIssued; - u8* const alignedBuf = m_alignedBuf? m_alignedBuf+m_totalIssued : 0; - RETURN_ERR(pendingIos.back().Issue(file, alignedOfs, alignedBuf)); - m_totalIssued += BLOCK_SIZE; - } - - if(pendingIos.empty()) - break; - - Process(pendingIos.front(), cb, cbData); - pendingIos.pop_front(); - } - - debug_assert(m_totalIssued >= m_totalTransferred && m_totalTransferred >= m_size); - - monitor.NotifyOfSuccess(FI_AIO, file->Mode(), m_totalTransferred); - return INFO::OK; - } - - off_t AlignedOfs() const - { - return m_alignedOfs; - } - -private: - LibError Process(BlockIo& blockIo, IoCallback cb, uintptr_t cbData) const +#if CONFIG2_FILE_ENABLE_AIO + if(queueDepth > 1) { - const u8* block; size_t blockSize; - RETURN_ERR(blockIo.WaitUntilComplete(block, blockSize)); - - // first block: skip past alignment - if(m_totalTransferred == 0) + aiocb* const cbs = &cb; + timespec* const timeout = 0; // infinite +SUSPEND_AGAIN: + errno = 0; + const int ret = aio_suspend(&cbs, 1, timeout); + if(ret != 0) { - block += m_misalignment; - blockSize -= m_misalignment; + if(errno == EINTR) // interrupted by signal + goto SUSPEND_AGAIN; + return LibError_from_errno(); } - // last block: don't include trailing padding - if(m_totalTransferred + (off_t)blockSize > m_size) - blockSize = size_t(m_size - m_totalTransferred); - - m_totalTransferred += (off_t)blockSize; - - if(cb) + const int err = aio_error(&cb); + debug_assert(err != EINPROGRESS); // else aio_return is undefined + ssize_t bytesTransferred = aio_return(&cb); + if(bytesTransferred == -1) // transfer failed { - stats_cb_start(); - LibError ret = cb(cbData, block, blockSize); - stats_cb_finish(); - CHECK_ERR(ret); + errno = err; + return LibError_from_errno(); } - - return INFO::OK; + cb.aio_nbytes = (size_t)bytesTransferred; } +#endif - off_t m_ofs; - u8* m_alignedBuf; - off_t m_size; - - size_t m_misalignment; - off_t m_alignedOfs; - off_t m_alignedSize; - - // (useful, raw data: possibly compressed, but doesn't count padding) - mutable off_t m_totalIssued; - mutable off_t m_totalTransferred; -}; - - -LibError io_Scan(const PFile& file, off_t ofs, off_t size, IoCallback cb, uintptr_t cbData) -{ - u8* alignedBuf = 0; // use temporary block buffers - IoSplitter splitter(ofs, alignedBuf, size); - return splitter.Run(file, cb, cbData); -} - - -LibError io_Read(const PFile& file, off_t ofs, u8* alignedBuf, off_t size, u8*& data, IoCallback cb, uintptr_t cbData) -{ - IoSplitter splitter(ofs, alignedBuf, size); - RETURN_ERR(splitter.Run(file, cb, cbData)); - data = alignedBuf + ofs - splitter.AlignedOfs(); return INFO::OK; } - -LibError io_WriteAligned(const PFile& file, off_t alignedOfs, const u8* alignedData, off_t size, IoCallback cb, uintptr_t cbData) -{ - debug_assert(IsAligned_Offset(alignedOfs)); - debug_assert(IsAligned_Data(alignedData)); - - IoSplitter splitter(alignedOfs, const_cast(alignedData), size); - return splitter.Run(file, cb, cbData); -} - - -LibError io_ReadAligned(const PFile& file, off_t alignedOfs, u8* alignedBuf, off_t size, IoCallback cb, uintptr_t cbData) -{ - debug_assert(IsAligned_Offset(alignedOfs)); - debug_assert(IsAligned_Data(alignedBuf)); - - IoSplitter splitter(alignedOfs, alignedBuf, size); - return splitter.Run(file, cb, cbData); -} +} // namespace io Index: ps/trunk/source/lib/file/io/io.h =================================================================== --- ps/trunk/source/lib/file/io/io.h (revision 9349) +++ ps/trunk/source/lib/file/io/io.h (revision 9350) @@ -1,56 +1,334 @@ -/* Copyright (c) 2010 Wildfire Games +/* Copyright (c) 2011 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* - * IO manager: splits requests into asynchronously issued blocks, - * thus simplifying caching and enabling periodic callbacks. + * provide asynchronous and synchronous I/O with hooks to allow + * overlapped processing or progress reporting. */ #ifndef INCLUDED_IO #define INCLUDED_IO +#include "lib/config2.h" +#include "lib/alignment.h" +#include "lib/bits.h" #include "lib/file/file.h" +#include "lib/sysdep/filesystem.h" // wtruncate -// memory will be allocated from the heap, not the (limited) file cache. -// this makes sense for write buffers that are never used again, -// because we avoid having to displace some other cached items. -LIB_API shared_ptr io_Allocate(size_t size, off_t ofs = 0); +#include "lib/allocators/unique_range.h" -/** - * called after a block IO has completed. - * - * @return INFO::CB_CONTINUE to continue; any other value will cause the - * IO splitter to abort immediately and return that. - * - * this is useful for user progress notification or processing data while - * waiting for the next I/O to complete (without the complexity of threads). - **/ -typedef LibError (*IoCallback)(uintptr_t cbData, const u8* block, size_t blockSize); +namespace ERR +{ + const LibError IO = -110301; +} + +namespace io { + +// @return memory suitable for use as an I/O buffer (address is a +// multiple of alignment, size is rounded up to a multiple of alignment) +// +// use this instead of the file cache for write buffers that are +// never reused (avoids displacing other items). +LIB_API UniqueRange Allocate(size_t size, size_t alignment = maxSectorSize); + + +#pragma pack(push, 1) + +// required information for any I/O (this is basically the same as aiocb, +// but also applies to synchronous I/O and has shorter/nicer names.) +struct Operation +{ + // @param buf can be 0, in which case temporary block buffers are allocated. + // otherwise, it must be padded to the I/O alignment, e.g. via io::Allocate. + Operation(const File& file, void* buf, off_t size, off_t offset = 0) + : fd(file.Descriptor()), opcode(file.Opcode()) + , offset(offset), size(size), buf((void*)buf) + { + } + + void Validate() const + { + debug_assert(fd >= 0); + debug_assert(opcode == LIO_READ || opcode == LIO_WRITE); + + debug_assert(offset >= 0); + debug_assert(size >= 0); + // buf can legitimately be 0 (see above) + } + + int fd; + int opcode; + + off_t offset; + off_t size; + void* buf; +}; + + +// optional information how an Operation is to be carried out +struct Parameters +{ + // default to single blocking I/Os + Parameters() + : alignment(1) // no alignment requirements + // use one huge "block" truncated to the requested size. + // (this value is a power of two as required by Validate and + // avoids overflowing off_t in DivideRoundUp) + , blockSize((SIZE_MAX/2)+1) + , queueDepth(1) // disable aio + { + } + + // parameters for asynchronous I/O that maximize throughput on current drives + struct OverlappedTag {}; + Parameters(OverlappedTag) + : alignment(maxSectorSize), blockSize(128*KiB), queueDepth(32) + { + } + + Parameters(size_t blockSize, size_t queueDepth, off_t alignment = maxSectorSize) + : alignment(alignment), blockSize(blockSize), queueDepth(queueDepth) + { + } + + void Validate(const Operation& op) const + { + debug_assert(is_pow2(alignment)); + debug_assert(alignment > 0); + + debug_assert(is_pow2(blockSize)); + debug_assert(pageSize <= blockSize); // no upper limit needed + + debug_assert(1 <= queueDepth && queueDepth <= maxQueueDepth); + + debug_assert(IsAligned(op.offset, alignment)); + // op.size doesn't need to be aligned + debug_assert(IsAligned(op.buf, alignment)); + } + + // (ATTO only allows 10, which improves upon 8) + static const size_t maxQueueDepth = 32; + + off_t alignment; + + size_t blockSize; + + size_t queueDepth; +}; + +#define IO_OVERLAPPED io::Parameters(io::Parameters::OverlappedTag()) + + +struct DefaultCompletedHook +{ + /** + * called after a block I/O has completed. + * + * @return INFO::CB_CONTINUE to proceed; any other value will + * be immediately returned by Run. + * + * allows progress notification and processing data while waiting for + * previous I/Os to complete. + **/ + LibError operator()(const u8* UNUSED(block), size_t UNUSED(blockSize)) const + { + return INFO::CB_CONTINUE; + } +}; + + +struct DefaultIssueHook +{ + /** + * called before a block I/O is issued. + * + * @return INFO::CB_CONTINUE to proceed; any other value will + * be immediately returned by Run. + * + * allows generating the data to write while waiting for + * previous I/Os to complete. + **/ + LibError operator()(aiocb& UNUSED(cb)) const + { + return INFO::CB_CONTINUE; + } +}; + + +// ring buffer of partially initialized aiocb that can be passed +// directly to aio_write etc. after setting offset and buffer. +class ControlBlockRingBuffer +{ +public: + ControlBlockRingBuffer(const Operation& op, const Parameters& p) + : controlBlocks() // zero-initialize + { + // (default p.blockSize is "infinity", so clamp to the total size) + const size_t blockSize = (size_t)std::min((off_t)p.blockSize, op.size); + + const bool temporaryBuffersRequested = (op.buf == 0); + if(temporaryBuffersRequested) + buffers = RVALUE(io::Allocate(blockSize * p.queueDepth, p.alignment)); + + for(size_t i = 0; i < ARRAY_SIZE(controlBlocks); i++) + { + aiocb& cb = operator[](i); + cb.aio_fildes = op.fd; + cb.aio_nbytes = blockSize; + cb.aio_lio_opcode = op.opcode; + if(temporaryBuffersRequested) + cb.aio_buf = (volatile void*)(uintptr_t(buffers.get()) + i * blockSize); + } + } + + INLINE aiocb& operator[](size_t counter) + { + return controlBlocks[counter % ARRAY_SIZE(controlBlocks)]; + } + +private: + UniqueRange buffers; + aiocb controlBlocks[Parameters::maxQueueDepth]; +}; + +#pragma pack(pop) + + +LIB_API LibError Issue(aiocb& cb, size_t queueDepth); +LIB_API LibError WaitUntilComplete(aiocb& cb, size_t queueDepth); + + +//----------------------------------------------------------------------------- +// Run + +// (hooks must be passed by const reference to allow passing rvalues. +// functors with non-const member data can mark them as mutable.) +template +static inline LibError Run(const Operation& op, const Parameters& p = Parameters(), const CompletedHook& completedHook = CompletedHook(), const IssueHook& issueHook = IssueHook()) +{ + op.Validate(); + p.Validate(op); + + ControlBlockRingBuffer controlBlockRingBuffer(op, p); + + const off_t numBlocks = DivideRoundUp(op.size, (off_t)p.blockSize); + for(off_t blocksIssued = 0, blocksCompleted = 0; blocksCompleted < numBlocks; blocksCompleted++) + { + for(; blocksIssued != numBlocks && blocksIssued < blocksCompleted + (off_t)p.queueDepth; blocksIssued++) + { + aiocb& cb = controlBlockRingBuffer[blocksIssued]; + cb.aio_offset = op.offset + blocksIssued * p.blockSize; + if(op.buf) + cb.aio_buf = (volatile void*)(uintptr_t(op.buf) + blocksIssued * p.blockSize); + if(blocksIssued == numBlocks-1) + cb.aio_nbytes = round_up(size_t(op.size - blocksIssued * p.blockSize), size_t(p.alignment)); + + RETURN_IF_NOT_CONTINUE(issueHook(cb)); + + RETURN_ERR(Issue(cb, p.queueDepth)); + } + + aiocb& cb = controlBlockRingBuffer[blocksCompleted]; + RETURN_ERR(WaitUntilComplete(cb, p.queueDepth)); + + RETURN_IF_NOT_CONTINUE(completedHook((u8*)cb.aio_buf, cb.aio_nbytes)); + } + + return INFO::OK; +} + +// (overloads allow omitting parameters without requiring a template argument list) +template +static inline LibError Run(const Operation& op, const Parameters& p = Parameters(), const CompletedHook& completedHook = CompletedHook()) +{ + return Run(op, p, completedHook, DefaultIssueHook()); +} + +static inline LibError Run(const Operation& op, const Parameters& p = Parameters()) +{ + return Run(op, p, DefaultCompletedHook(), DefaultIssueHook()); +} + + +//----------------------------------------------------------------------------- +// Store + +// efficient writing requires preallocation, and the resulting file is +// padded to the sector size and needs to be truncated afterwards. +// this function takes care of both. +template +static inline LibError Store(const OsPath& pathname, const void* data, size_t size, const Parameters& p = Parameters(), const CompletedHook& completedHook = CompletedHook(), const IssueHook& issueHook = IssueHook()) +{ + File file(pathname, LIO_WRITE); + io::Operation op(file, (void*)data, size); + +#if OS_WIN && CONFIG2_FILE_ENABLE_AIO + (void)waio_Preallocate(op.fd, (off_t)size, p.alignment); +#endif + + RETURN_ERR(io::Run(op, p, completedHook, issueHook)); + + file.Close(); // (required by wtruncate) + + RETURN_ERR(wtruncate(pathname, size)); + + return INFO::OK; +} + +template +static inline LibError Store(const OsPath& pathname, const void* data, size_t size, const Parameters& p = Parameters(), const CompletedHook& completedHook = CompletedHook()) +{ + return Store(pathname, data, size, p, completedHook, DefaultIssueHook()); +} + +static inline LibError Store(const OsPath& pathname, const void* data, size_t size, const Parameters& p = Parameters()) +{ + return Store(pathname, data, size, p, DefaultCompletedHook(), DefaultIssueHook()); +} + + +//----------------------------------------------------------------------------- +// Load + +// convenience function provided for symmetry with Store +template +static inline LibError Load(const OsPath& pathname, void* buf, size_t size, const Parameters& p = Parameters(), const CompletedHook& completedHook = CompletedHook(), const IssueHook& issueHook = IssueHook()) +{ + File file(pathname, LIO_READ); + io::Operation op(file, buf, size); + return io::Run(op, p, completedHook, issueHook); +} -LIB_API LibError io_Scan(const PFile& file, off_t ofs, off_t size, IoCallback cb, uintptr_t cbData); +template +static inline LibError Load(const OsPath& pathname, void* buf, size_t size, const Parameters& p = Parameters(), const CompletedHook& completedHook = CompletedHook()) +{ + return Load(pathname, buf, size, p, completedHook, DefaultIssueHook()); +} -LIB_API LibError io_Read(const PFile& file, off_t ofs, u8* alignedBuf, off_t size, u8*& data, IoCallback cb = 0, uintptr_t cbData = 0); +static inline LibError Load(const OsPath& pathname, void* buf, size_t size, const Parameters& p = Parameters()) +{ + return Load(pathname, buf, size, p, DefaultCompletedHook(), DefaultIssueHook()); +} -LIB_API LibError io_WriteAligned(const PFile& file, off_t alignedOfs, const u8* alignedData, off_t size, IoCallback cb = 0, uintptr_t cbData = 0); -LIB_API LibError io_ReadAligned(const PFile& file, off_t alignedOfs, u8* alignedBuf, off_t size, IoCallback cb = 0, uintptr_t cbData = 0); +} // namespace io #endif // #ifndef INCLUDED_IO Index: ps/trunk/source/lib/file/common/trace.cpp =================================================================== --- ps/trunk/source/lib/file/common/trace.cpp (revision 9349) +++ ps/trunk/source/lib/file/common/trace.cpp (revision 9350) @@ -1,235 +1,234 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * IO event recording */ #include "precompiled.h" #include "lib/file/common/trace.h" #include #include #include "lib/allocators/pool.h" -#include "lib/bits.h" // round_up #include "lib/timer.h" // timer_Time #include "lib/sysdep/sysdep.h" // sys_OpenFile /*virtual*/ ITrace::~ITrace() { } //----------------------------------------------------------------------------- TraceEntry::TraceEntry(EAction action, const Path& pathname, size_t size) : m_timestamp((float)timer_Time()) , m_action(action) , m_pathname(pathname) , m_size(size) { } TraceEntry::TraceEntry(const std::wstring& text) { // swscanf is far too awkward to get working cross-platform, // so use iostreams here instead wchar_t dummy; wchar_t action; std::wstringstream stream(text); stream >> m_timestamp; stream >> dummy; debug_assert(dummy == ':'); stream >> action; debug_assert(action == 'L' || action == 'S'); m_action = (EAction)action; stream >> dummy; debug_assert(dummy == '"'); Path::String pathname; std::getline(stream, pathname, L'"'); m_pathname = Path(pathname); stream >> m_size; debug_assert(stream.get() == '\n'); debug_assert(stream.good()); debug_assert(stream.get() == WEOF); } std::wstring TraceEntry::EncodeAsText() const { const wchar_t action = (wchar_t)m_action; wchar_t buf[1000]; swprintf_s(buf, ARRAY_SIZE(buf), L"%#010f: %c \"%ls\" %lu\n", m_timestamp, action, m_pathname.string().c_str(), (unsigned long)m_size); return buf; } //----------------------------------------------------------------------------- class Trace_Dummy : public ITrace { public: Trace_Dummy(size_t UNUSED(maxSize)) { } virtual void NotifyLoad(const Path& UNUSED(pathname), size_t UNUSED(size)) { } virtual void NotifyStore(const Path& UNUSED(pathname), size_t UNUSED(size)) { } virtual LibError Load(const OsPath& UNUSED(pathname)) { return INFO::OK; } virtual LibError Store(const OsPath& UNUSED(pathname)) const { return INFO::OK; } virtual const TraceEntry* Entries() const { return 0; } virtual size_t NumEntries() const { return 0; } }; //----------------------------------------------------------------------------- class Trace : public ITrace { public: Trace(size_t maxSize) { (void)pool_create(&m_pool, maxSize, sizeof(TraceEntry)); } virtual ~Trace() { for(size_t i = 0; i < NumEntries(); i++) { TraceEntry* entry = (TraceEntry*)(uintptr_t(m_pool.da.base) + i*m_pool.el_size); entry->~TraceEntry(); } (void)pool_destroy(&m_pool); } virtual void NotifyLoad(const Path& pathname, size_t size) { new(Allocate()) TraceEntry(TraceEntry::Load, pathname, size); } virtual void NotifyStore(const Path& pathname, size_t size) { new(Allocate()) TraceEntry(TraceEntry::Store, pathname, size); } virtual LibError Load(const OsPath& pathname) { pool_free_all(&m_pool); errno = 0; FILE* file = sys_OpenFile(pathname, "rt"); if(!file) return LibError_from_errno(); for(;;) { wchar_t text[500]; if(!fgetws(text, ARRAY_SIZE(text)-1, file)) break; new(Allocate()) TraceEntry(text); } fclose(file); return INFO::OK; } virtual LibError Store(const OsPath& pathname) const { errno = 0; FILE* file = sys_OpenFile(pathname, "at"); if(!file) return LibError_from_errno(); for(size_t i = 0; i < NumEntries(); i++) { std::wstring text = Entries()[i].EncodeAsText(); fputws(text.c_str(), file); } (void)fclose(file); return INFO::OK; } virtual const TraceEntry* Entries() const { return (const TraceEntry*)m_pool.da.base; } virtual size_t NumEntries() const { return m_pool.da.pos / m_pool.el_size; } private: void* Allocate() { void* p = pool_alloc(&m_pool, 0); debug_assert(p); return p; } Pool m_pool; }; PITrace CreateDummyTrace(size_t maxSize) { return PITrace(new Trace_Dummy(maxSize)); } PITrace CreateTrace(size_t maxSize) { return PITrace(new Trace(maxSize)); } Index: ps/trunk/source/lib/file/common/file_stats.h =================================================================== --- ps/trunk/source/lib/file/common/file_stats.h (revision 9349) +++ ps/trunk/source/lib/file/common/file_stats.h (revision 9350) @@ -1,121 +1,118 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * gathers statistics from all file modules. */ #ifndef INCLUDED_FILE_STATS #define INCLUDED_FILE_STATS +#include "lib/posix/posix_aio.h" // LIO_READ, LIO_WRITE + #define FILE_STATS_ENABLED 0 enum FileIOImplentation { FI_LOWIO, FI_AIO, FI_BCACHE, FI_MAX_IDX }; -enum FileOp { FO_READ, FO_WRITE }; enum CacheRet { CR_HIT, CR_MISS }; -#include "lib/file/io/block_cache.h" // BlockId - #if FILE_STATS_ENABLED // vfs extern void stats_vfs_file_add(size_t file_size); extern void stats_vfs_file_remove(size_t file_size); extern void stats_vfs_init_start(); extern void stats_vfs_init_finish(); // file // currently not called because string_pool is now in lib/allocators extern void stats_unique_name(size_t name_len); extern void stats_open(); extern void stats_close(); // file_buf extern void stats_buf_alloc(size_t size, size_t alignedSize); extern void stats_buf_free(); extern void stats_buf_ref(); // file_io extern void stats_io_user_request(size_t user_size); // this is used to measure effective throughput for the two // synchronous IO variants. // note: improved measurements of the actual aio throughput by instrumenting // issue/wait doesn't work because IOManager's decompression may cause us to // miss the exact end of IO, thus throwing off measurements. class ScopedIoMonitor { public: ScopedIoMonitor(); ~ScopedIoMonitor(); - void NotifyOfSuccess(FileIOImplentation fi, wchar_t mode, off_t size); + void NotifyOfSuccess(FileIOImplentation fi, int opcode, off_t size); private: double m_startTime; }; -extern void stats_io_check_seek(BlockId& blockId); extern void stats_cb_start(); extern void stats_cb_finish(); // file_cache extern void stats_cache(CacheRet cr, size_t size); extern void stats_block_cache(CacheRet cr); // archive builder extern void stats_ab_connection(bool already_exists); extern void file_stats_dump(); #else #define stats_vfs_file_add(file_size) #define stats_vfs_file_remove(file_size) #define stats_vfs_init_start() #define stats_vfs_init_finish() #define stats_unique_name(name_len) #define stats_open() #define stats_close() #define stats_buf_alloc(size, alignedSize) #define stats_buf_free() #define stats_buf_ref() #define stats_io_user_request(user_size) class ScopedIoMonitor { public: ScopedIoMonitor() {} ~ScopedIoMonitor() {} - void NotifyOfSuccess(FileIOImplentation UNUSED(fi), wchar_t UNUSED(mode), off_t UNUSED(size)) {} + void NotifyOfSuccess(FileIOImplentation UNUSED(fi), int UNUSED(opcode), off_t UNUSED(size)) {} }; -#define stats_io_check_seek(blockId) #define stats_cb_start() #define stats_cb_finish() #define stats_cache(cr, size) #define stats_block_cache(cr) #define stats_ab_connection(already_exists) #define file_stats_dump() #endif #endif // #ifndef INCLUDED_FILE_STATS Index: ps/trunk/source/lib/file/common/real_directory.cpp =================================================================== --- ps/trunk/source/lib/file/common/real_directory.cpp (revision 9349) +++ ps/trunk/source/lib/file/common/real_directory.cpp (revision 9350) @@ -1,92 +1,72 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "precompiled.h" #include "lib/file/common/real_directory.h" #include "lib/sysdep/filesystem.h" #include "lib/file/file.h" #include "lib/file/io/io.h" RealDirectory::RealDirectory(const OsPath& path, size_t priority, size_t flags) : m_path(path), m_priority(priority), m_flags(flags) { } /*virtual*/ size_t RealDirectory::Precedence() const { return 1u; } /*virtual*/ wchar_t RealDirectory::LocationCode() const { return 'F'; } /*virtual*/ LibError RealDirectory::Load(const OsPath& name, const shared_ptr& buf, size_t size) const { - const OsPath pathname = m_path / name; - - PFile file(new File); - RETURN_ERR(file->Open(pathname, 'r')); - - RETURN_ERR(io_ReadAligned(file, 0, buf.get(), size)); - return INFO::OK; + return io::Load(m_path / name, buf.get(), size); } LibError RealDirectory::Store(const OsPath& name, const shared_ptr& fileContents, size_t size) { - const OsPath pathname = m_path / name; - - { - PFile file(new File); - RETURN_ERR(file->Open(pathname, 'w')); - RETURN_ERR(io_WriteAligned(file, 0, fileContents.get(), size)); - } - - // io_WriteAligned pads the file; we need to truncate it to the actual - // length. ftruncate can't be used because Windows' FILE_FLAG_NO_BUFFERING - // only allows resizing to sector boundaries, so the file must first - // be closed. - wtruncate(pathname, size); - - return INFO::OK; + return io::Store(m_path / name, fileContents.get(), size); } void RealDirectory::Watch() { if(!m_watch) (void)dir_watch_Add(m_path, m_watch); } PRealDirectory CreateRealSubdirectory(const PRealDirectory& realDirectory, const OsPath& subdirectoryName) { const OsPath path = realDirectory->Path() / subdirectoryName/""; return PRealDirectory(new RealDirectory(path, realDirectory->Priority(), realDirectory->Flags())); } Index: ps/trunk/source/lib/file/common/file_stats.cpp =================================================================== --- ps/trunk/source/lib/file/common/file_stats.cpp (revision 9349) +++ ps/trunk/source/lib/file/common/file_stats.cpp (revision 9350) @@ -1,347 +1,337 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * gathers statistics from all file modules. */ #include "precompiled.h" #include "lib/file/common/file_stats.h" #include #include "lib/timer.h" #if FILE_STATS_ENABLED // vfs static size_t vfs_files; static double vfs_size_total; static double vfs_init_elapsed_time; // file static size_t unique_names; static size_t unique_name_len_total; static size_t open_files_cur, open_files_max; // total = opened_files.size() // file_buf static size_t extant_bufs_cur, extant_bufs_max, extant_bufs_total; static double buf_size_total, buf_aligned_size_total; // file_io static size_t user_ios; static double user_io_size_total; static double io_actual_size_total[FI_MAX_IDX][2]; static double io_elapsed_time[FI_MAX_IDX][2]; static double io_process_time_total; static size_t io_seeks; // file_cache static size_t cache_count[2]; static double cache_size_total[2]; static size_t conflict_misses; //static double conflict_miss_size_total; // JW: currently not used nor computed static size_t block_cache_count[2]; // archive builder static size_t ab_connection_attempts; // total number of trace entries static size_t ab_repeated_connections; // how many of these were not unique // convenience functions for measuring elapsed time in an interval. // by exposing start/finish calls, we avoid callers from querying // timestamps when stats are disabled. static double start_time; static void timer_start(double* start_time_storage = &start_time) { // make sure no measurement is currently active // (since start_time is shared static storage) debug_assert(*start_time_storage == 0.0); *start_time_storage = timer_Time(); } static double timer_reset(double* start_time_storage = &start_time) { double elapsed = timer_Time() - *start_time_storage; *start_time_storage = 0.0; return elapsed; } //----------------------------------------------------------------------------- // // vfs // void stats_vfs_file_add(size_t file_size) { vfs_files++; vfs_size_total += file_size; } void stats_vfs_file_remove(size_t file_size) { vfs_files--; vfs_size_total -= file_size; } // stats_vfs_init_* are currently unused void stats_vfs_init_start() { timer_start(); } void stats_vfs_init_finish() { vfs_init_elapsed_time += timer_reset(); } // // file // void stats_unique_name(size_t name_len) { unique_names++; unique_name_len_total += name_len; } void stats_open() { open_files_cur++; open_files_max = std::max(open_files_max, open_files_cur); // could also use a set to determine unique files that have been opened } void stats_close() { debug_assert(open_files_cur > 0); open_files_cur--; } // // file_buf // void stats_buf_alloc(size_t size, size_t alignedSize) { extant_bufs_cur++; extant_bufs_max = std::max(extant_bufs_max, extant_bufs_cur); extant_bufs_total++; buf_size_total += size; buf_aligned_size_total += alignedSize; } void stats_buf_free() { debug_assert(extant_bufs_cur > 0); extant_bufs_cur--; } void stats_buf_ref() { extant_bufs_cur++; } // // file_io // void stats_io_user_request(size_t user_size) { user_ios++; user_io_size_total += user_size; } ScopedIoMonitor::ScopedIoMonitor() { m_startTime = 0.0; timer_start(&m_startTime); } ScopedIoMonitor::~ScopedIoMonitor() { // note: we can only bill IOs that have succeeded :S timer_reset(&m_startTime); } -void ScopedIoMonitor::NotifyOfSuccess(FileIOImplentation fi, wchar_t mode, off_t size) +void ScopedIoMonitor::NotifyOfSuccess(FileIOImplentation fi, int opcode, off_t size) { debug_assert(fi < FI_MAX_IDX); - debug_assert(mode == 'r' || mode == 'w'); - const FileOp op = (mode == 'r')? FO_READ : FO_WRITE; + debug_assert(opcode == LIO_READ || opcode == LIO_WRITE); - io_actual_size_total[fi][op] += size; - io_elapsed_time[fi][op] += timer_reset(&m_startTime); -} - -void stats_io_check_seek(BlockId& blockId) -{ - static BlockId lastBlockId; - - if(blockId != lastBlockId) - io_seeks++; - lastBlockId = blockId; + io_actual_size_total[fi][opcode == LIO_WRITE] += size; + io_elapsed_time[fi][opcode == LIO_WRITE] += timer_reset(&m_startTime); } void stats_cb_start() { timer_start(); } void stats_cb_finish() { io_process_time_total += timer_reset(); } // // file_cache // void stats_cache(CacheRet cr, size_t size) { debug_assert(cr == CR_HIT || cr == CR_MISS); #if 0 if(cr == CR_MISS) { PairIB ret = ever_cached_files.insert(atom_fn); if(!ret.second) // was already cached once { conflict_miss_size_total += size; conflict_misses++; } } #endif cache_count[cr]++; cache_size_total[cr] += size; } void stats_block_cache(CacheRet cr) { debug_assert(cr == CR_HIT || cr == CR_MISS); block_cache_count[cr]++; } // // archive builder // void stats_ab_connection(bool already_exists) { ab_connection_attempts++; if(already_exists) ab_repeated_connections++; } //----------------------------------------------------------------------------- template int percent(T num, T divisor) { if(!divisor) return 0; return (int)(100*num / divisor); } void file_stats_dump() { if(!debug_filter_allows(L"FILE_STATS|")) return; const double KB = 1e3; const double MB = 1e6; const double ms = 1e-3; debug_printf(L"--------------------------------------------------------------------------------\n"); debug_printf(L"File statistics:\n"); // note: we split the reports into several debug_printfs for clarity; // this is necessary anyway due to fixed-size buffer. debug_printf( L"\nvfs:\n" L"Total files: %lu (%g MB)\n" L"Init/mount time: %g ms\n", (unsigned long)vfs_files, vfs_size_total/MB, vfs_init_elapsed_time/ms ); debug_printf( L"\nfile:\n" L"Total names: %lu (%lu KB)\n" L"Max. concurrent: %lu; leaked: %lu.\n", (unsigned long)unique_names, (unsigned long)(unique_name_len_total/1000), (unsigned long)open_files_max, (unsigned long)open_files_cur ); debug_printf( L"\nfile_buf:\n" L"Total buffers used: %lu (%g MB)\n" L"Max concurrent: %lu; leaked: %lu\n" L"Internal fragmentation: %d%%\n", (unsigned long)extant_bufs_total, buf_size_total/MB, (unsigned long)extant_bufs_max, (unsigned long)extant_bufs_cur, percent(buf_aligned_size_total-buf_size_total, buf_size_total) ); debug_printf( L"\nfile_io:\n" L"Total user load requests: %lu (%g MB)\n" L"IO thoughput [MB/s; 0=never happened]:\n" L" lowio: R=%.3g, W=%.3g\n" L" aio: R=%.3g, W=%.3g\n" L"Average size = %g KB; seeks: %lu; total callback time: %g ms\n" L"Total data actually read from disk = %g MB\n", (unsigned long)user_ios, user_io_size_total/MB, -#define THROUGHPUT(impl, op) (io_elapsed_time[impl][op] == 0.0)? 0.0 : (io_actual_size_total[impl][op] / io_elapsed_time[impl][op] / MB) - THROUGHPUT(FI_LOWIO, FO_READ), THROUGHPUT(FI_LOWIO, FO_WRITE), - THROUGHPUT(FI_AIO , FO_READ), THROUGHPUT(FI_AIO , FO_WRITE), +#define THROUGHPUT(impl, opcode) (io_elapsed_time[impl][opcode == LIO_WRITE] == 0.0)? 0.0 : (io_actual_size_total[impl][opcode == LIO_WRITE] / io_elapsed_time[impl][opcode == LIO_WRITE] / MB) + THROUGHPUT(FI_LOWIO, LIO_READ), THROUGHPUT(FI_LOWIO, LIO_WRITE), + THROUGHPUT(FI_AIO , LIO_READ), THROUGHPUT(FI_AIO , LIO_WRITE), user_io_size_total/user_ios/KB, (unsigned long)io_seeks, io_process_time_total/ms, - (io_actual_size_total[FI_LOWIO][FO_READ]+io_actual_size_total[FI_AIO][FO_READ])/MB + (io_actual_size_total[FI_LOWIO][0]+io_actual_size_total[FI_AIO][0])/MB ); debug_printf( L"\nfile_cache:\n" L"Hits: %lu (%g MB); misses %lu (%g MB); ratio: %u%%\n" L"Percent of requested bytes satisfied by cache: %u%%; non-compulsory misses: %lu (%u%% of misses)\n" L"Block hits: %lu; misses: %lu; ratio: %u%%\n", (unsigned long)cache_count[CR_HIT], cache_size_total[CR_HIT]/MB, (unsigned long)cache_count[CR_MISS], cache_size_total[CR_MISS]/MB, percent(cache_count[CR_HIT], cache_count[CR_HIT]+cache_count[CR_MISS]), percent(cache_size_total[CR_HIT], cache_size_total[CR_HIT]+cache_size_total[CR_MISS]), (unsigned long)conflict_misses, percent(conflict_misses, cache_count[CR_MISS]), (unsigned long)block_cache_count[CR_HIT], (unsigned long)block_cache_count[CR_MISS], percent(block_cache_count[CR_HIT], block_cache_count[CR_HIT]+block_cache_count[CR_MISS]) ); debug_printf( L"\nvfs_optimizer:\n" L"Total trace entries: %lu; repeated connections: %lu; unique files: %lu\n", (unsigned long)ab_connection_attempts, (unsigned long)ab_repeated_connections, (unsigned long)(ab_connection_attempts-ab_repeated_connections) ); } #endif // FILE_STATS_ENABLED Index: ps/trunk/source/lib/file/vfs/vfs.cpp =================================================================== --- ps/trunk/source/lib/file/vfs/vfs.cpp (revision 9349) +++ ps/trunk/source/lib/file/vfs/vfs.cpp (revision 9350) @@ -1,247 +1,247 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "precompiled.h" #include "lib/file/vfs/vfs.h" #include "lib/allocators/shared_ptr.h" #include "lib/file/file_system_util.h" #include "lib/file/common/file_stats.h" #include "lib/file/common/trace.h" #include "lib/file/archive/archive.h" #include "lib/file/io/io.h" #include "lib/file/vfs/vfs_tree.h" #include "lib/file/vfs/vfs_lookup.h" #include "lib/file/vfs/vfs_populate.h" #include "lib/file/vfs/file_cache.h" ERROR_ASSOCIATE(ERR::VFS_DIR_NOT_FOUND, L"VFS directory not found", -1); ERROR_ASSOCIATE(ERR::VFS_FILE_NOT_FOUND, L"VFS file not found", -1); ERROR_ASSOCIATE(ERR::VFS_ALREADY_MOUNTED, L"VFS path already mounted", -1); class VFS : public IVFS { public: VFS(size_t cacheSize) : m_cacheSize(cacheSize), m_fileCache(m_cacheSize) , m_trace(CreateTrace(4*MiB)) { } virtual LibError Mount(const VfsPath& mountPoint, const OsPath& path, size_t flags /* = 0 */, size_t priority /* = 0 */) { if(!fs_util::DirectoryExists(path)) { if(flags & VFS_MOUNT_MUST_EXIST) return ERR::VFS_DIR_NOT_FOUND; // NOWARN else RETURN_ERR(CreateDirectories(path, 0700)); } VfsDirectory* directory; CHECK_ERR(vfs_Lookup(mountPoint, &m_rootDirectory, directory, 0, VFS_LOOKUP_ADD|VFS_LOOKUP_SKIP_POPULATE)); PRealDirectory realDirectory(new RealDirectory(path, priority, flags)); RETURN_ERR(vfs_Attach(directory, realDirectory)); return INFO::OK; } virtual LibError GetFileInfo(const VfsPath& pathname, FileInfo* pfileInfo) const { VfsDirectory* directory; VfsFile* file; LibError ret = vfs_Lookup(pathname, &m_rootDirectory, directory, &file); if(!pfileInfo) // just indicate if the file exists without raising warnings. return ret; CHECK_ERR(ret); *pfileInfo = FileInfo(file->Name(), file->Size(), file->MTime()); return INFO::OK; } virtual LibError GetFilePriority(const VfsPath& pathname, size_t* ppriority) const { VfsDirectory* directory; VfsFile* file; RETURN_ERR(vfs_Lookup(pathname, &m_rootDirectory, directory, &file)); *ppriority = file->Priority(); return INFO::OK; } virtual LibError GetDirectoryEntries(const VfsPath& path, FileInfos* fileInfos, DirectoryNames* subdirectoryNames) const { VfsDirectory* directory; CHECK_ERR(vfs_Lookup(path, &m_rootDirectory, directory, 0)); if(fileInfos) { const VfsDirectory::VfsFiles& files = directory->Files(); fileInfos->clear(); fileInfos->reserve(files.size()); for(VfsDirectory::VfsFiles::const_iterator it = files.begin(); it != files.end(); ++it) { const VfsFile& file = it->second; fileInfos->push_back(FileInfo(file.Name(), file.Size(), file.MTime())); } } if(subdirectoryNames) { const VfsDirectory::VfsSubdirectories& subdirectories = directory->Subdirectories(); subdirectoryNames->clear(); subdirectoryNames->reserve(subdirectories.size()); for(VfsDirectory::VfsSubdirectories::const_iterator it = subdirectories.begin(); it != subdirectories.end(); ++it) subdirectoryNames->push_back(it->first); } return INFO::OK; } virtual LibError CreateFile(const VfsPath& pathname, const shared_ptr& fileContents, size_t size) { VfsDirectory* directory; CHECK_ERR(vfs_Lookup(pathname, &m_rootDirectory, directory, 0, VFS_LOOKUP_ADD|VFS_LOOKUP_CREATE)); const PRealDirectory& realDirectory = directory->AssociatedDirectory(); const OsPath name = pathname.Filename(); RETURN_ERR(realDirectory->Store(name, fileContents, size)); // wipe out any cached blocks. this is necessary to cover the (rare) case // of file cache contents predating the file write. m_fileCache.Remove(pathname); const VfsFile file(name, size, time(0), realDirectory->Priority(), realDirectory); directory->AddFile(file); m_trace->NotifyStore(pathname, size); return INFO::OK; } virtual LibError LoadFile(const VfsPath& pathname, shared_ptr& fileContents, size_t& size) { const bool isCacheHit = m_fileCache.Retrieve(pathname, fileContents, size); if(!isCacheHit) { VfsDirectory* directory; VfsFile* file; // per 2010-05-01 meeting, this shouldn't raise 'scary error // dialogs', which might fail to display the culprit pathname // instead, callers should log the error, including pathname. RETURN_ERR(vfs_Lookup(pathname, &m_rootDirectory, directory, &file)); size = file->Size(); // safely handle zero-length files if(!size) fileContents = DummySharedPtr((u8*)0); else if(size > m_cacheSize) { - fileContents = io_Allocate(size); + RETURN_ERR(AllocateAligned(fileContents, size, maxSectorSize)); RETURN_ERR(file->Loader()->Load(file->Name(), fileContents, file->Size())); } else { fileContents = m_fileCache.Reserve(size); RETURN_ERR(file->Loader()->Load(file->Name(), fileContents, file->Size())); m_fileCache.Add(pathname, fileContents, size); } } stats_io_user_request(size); stats_cache(isCacheHit? CR_HIT : CR_MISS, size); m_trace->NotifyLoad(pathname, size); return INFO::OK; } virtual std::wstring TextRepresentation() const { std::wstring textRepresentation; textRepresentation.reserve(100*KiB); DirectoryDescriptionR(textRepresentation, m_rootDirectory, 0); return textRepresentation; } virtual LibError GetRealPath(const VfsPath& pathname, OsPath& realPathname) { VfsDirectory* directory; VfsFile* file; CHECK_ERR(vfs_Lookup(pathname, &m_rootDirectory, directory, &file)); realPathname = file->Loader()->Path() / pathname.Filename(); return INFO::OK; } virtual LibError GetVirtualPath(const OsPath& realPathname, VfsPath& pathname) { const OsPath realPath = realPathname.Parent()/""; VfsPath path; RETURN_ERR(FindRealPathR(realPath, m_rootDirectory, L"", path)); pathname = path / realPathname.Filename(); return INFO::OK; } virtual LibError Invalidate(const VfsPath& pathname) { m_fileCache.Remove(pathname); VfsDirectory* directory; RETURN_ERR(vfs_Lookup(pathname, &m_rootDirectory, directory, 0)); const OsPath name = pathname.Filename(); directory->Invalidate(name); return INFO::OK; } virtual void Clear() { m_rootDirectory.Clear(); } private: LibError FindRealPathR(const OsPath& realPath, const VfsDirectory& directory, const VfsPath& curPath, VfsPath& path) { PRealDirectory realDirectory = directory.AssociatedDirectory(); if(realDirectory && realDirectory->Path() == realPath) { path = curPath; return INFO::OK; } const VfsDirectory::VfsSubdirectories& subdirectories = directory.Subdirectories(); for(VfsDirectory::VfsSubdirectories::const_iterator it = subdirectories.begin(); it != subdirectories.end(); ++it) { const OsPath& subdirectoryName = it->first; const VfsDirectory& subdirectory = it->second; LibError ret = FindRealPathR(realPath, subdirectory, curPath / subdirectoryName/"", path); if(ret == INFO::OK) return INFO::OK; } return ERR::PATH_NOT_FOUND; // NOWARN } size_t m_cacheSize; FileCache m_fileCache; PITrace m_trace; mutable VfsDirectory m_rootDirectory; }; //----------------------------------------------------------------------------- PIVFS CreateVfs(size_t cacheSize) { return PIVFS(new VFS(cacheSize)); } Index: ps/trunk/source/lib/file/vfs/file_cache.cpp =================================================================== --- ps/trunk/source/lib/file/vfs/file_cache.cpp (revision 9349) +++ ps/trunk/source/lib/file/vfs/file_cache.cpp (revision 9350) @@ -1,251 +1,250 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * cache of file contents (supports zero-copy IO) */ #include "precompiled.h" #include "lib/file/vfs/file_cache.h" #include "lib/external_libraries/suppress_boost_warnings.h" #include "lib/file/common/file_stats.h" -#include "lib/file/io/io_align.h" // BLOCK_SIZE #include "lib/cache_adt.h" // Cache #include "lib/bits.h" // round_up #include "lib/allocators/allocators.h" #include "lib/allocators/shared_ptr.h" #include "lib/allocators/headerless.h" #include "lib/sysdep/os_cpu.h" // os_cpu_PageSize //----------------------------------------------------------------------------- // allocator /* the biggest worry of a file cache is external fragmentation. there are two basic ways to combat this: 1) 'defragment' periodically - move blocks around to increase size of available 'holes'. 2) prevent fragmentation from occurring at all via deliberate alloc/free policy. file contents are returned directly to the user (zero-copy IO), so only currently unreferenced blocks can be moved. it is believed that this would severely hamper defragmentation; we therefore go with the latter approach. the basic insight is: fragmentation occurs when a block is freed whose neighbors are not free (thus preventing coalescing). this can be prevented by allocating objects of similar lifetimes together. typical workloads (uniform access frequency) already show such behavior: the Landlord cache manager evicts files in an LRU manner, which matches the allocation policy. references: "The Memory Fragmentation Problem - Solved?" (Johnstone and Wilson) "Dynamic Storage Allocation - A Survey and Critical Review" (Johnstone and Wilson) */ // shared_ptrs must own a reference to their allocator to ensure it's extant when // they are freed. it is stored in the shared_ptr deleter. class Allocator; typedef shared_ptr PAllocator; class FileCacheDeleter { public: FileCacheDeleter(size_t size, const PAllocator& allocator) : m_size(size), m_allocator(allocator) { } // (this uses Allocator and must come after its definition) void operator()(u8* mem) const; private: size_t m_size; PAllocator m_allocator; }; // adds statistics and AllocatorChecker to a HeaderlessAllocator class Allocator { public: Allocator(size_t maxSize) : m_allocator(maxSize) { } shared_ptr Allocate(size_t size, const PAllocator& pthis) { - const size_t alignedSize = round_up(size, BLOCK_SIZE); + const size_t alignedSize = Align(size); u8* mem = (u8*)m_allocator.Allocate(alignedSize); if(!mem) return DummySharedPtr(0); // (prevent FileCacheDeleter from seeing a null pointer) #ifndef NDEBUG m_checker.OnAllocate(mem, alignedSize); #endif stats_buf_alloc(size, alignedSize); return shared_ptr(mem, FileCacheDeleter(size, pthis)); } void Deallocate(u8* mem, size_t size) { - const size_t alignedSize = round_up(size, BLOCK_SIZE); + const size_t alignedSize = Align(size); // (re)allow writes in case the buffer was made read-only. it would // be nice to unmap the buffer, but this is not possible because // HeaderlessAllocator needs to affix boundary tags. (void)mprotect(mem, size, PROT_READ|PROT_WRITE); #ifndef NDEBUG m_checker.OnDeallocate(mem, alignedSize); #endif m_allocator.Deallocate(mem, alignedSize); stats_buf_free(); } private: HeaderlessAllocator m_allocator; #ifndef NDEBUG AllocatorChecker m_checker; #endif }; void FileCacheDeleter::operator()(u8* mem) const { m_allocator->Deallocate(mem, m_size); } //----------------------------------------------------------------------------- // FileCache::Impl //----------------------------------------------------------------------------- // since users are strongly encouraged to only load/process one file at a // time, there won't be many active references to cache entries. we could // take advantage of this with a separate extant list, but the cache's // hash map should be fast enough and this way is less work than maintaining // (possibly disjunct) cached and extant lists. class FileCache::Impl { public: Impl(size_t maxSize) : m_allocator(new Allocator(maxSize)) { } shared_ptr Reserve(size_t size) { // (should never happen because the VFS ensures size != 0.) debug_assert(size != 0); // (300 iterations have been observed when reserving several MB // of space in a full cache) for(;;) { { shared_ptr data = m_allocator->Allocate(size, m_allocator); if(data) return data; } // remove least valuable entry from cache (if users are holding // references, the contents won't actually be deallocated) { shared_ptr discardedData; size_t discardedSize; bool removed = m_cache.remove_least_valuable(&discardedData, &discardedSize); // only false if cache is empty, which can't be the case because // allocation failed. debug_assert(removed); } } } void Add(const VfsPath& pathname, const shared_ptr& data, size_t size, size_t cost) { // zero-copy cache => all users share the contents => must not // allow changes. this will be reverted when deallocating. (void)mprotect((void*)data.get(), size, PROT_READ); m_cache.add(pathname, data, size, cost); } bool Retrieve(const VfsPath& pathname, shared_ptr& data, size_t& size) { // (note: don't call stats_cache because we don't know the file size // in case of a cache miss; doing so is left to the caller.) stats_buf_ref(); return m_cache.retrieve(pathname, data, &size); } void Remove(const VfsPath& pathname) { m_cache.remove(pathname); // note: we could check if someone is still holding a reference // to the contents, but that currently doesn't matter. } private: typedef Cache > CacheType; CacheType m_cache; PAllocator m_allocator; }; //----------------------------------------------------------------------------- FileCache::FileCache(size_t size) : impl(new Impl(size)) { } shared_ptr FileCache::Reserve(size_t size) { return impl->Reserve(size); } void FileCache::Add(const VfsPath& pathname, const shared_ptr& data, size_t size, size_t cost) { impl->Add(pathname, data, size, cost); } void FileCache::Remove(const VfsPath& pathname) { impl->Remove(pathname); } bool FileCache::Retrieve(const VfsPath& pathname, shared_ptr& data, size_t& size) { return impl->Retrieve(pathname, data, size); } Index: ps/trunk/source/lib/lib_errors.h =================================================================== --- ps/trunk/source/lib/lib_errors.h (revision 9349) +++ ps/trunk/source/lib/lib_errors.h (revision 9350) @@ -1,487 +1,496 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * error handling system: defines error codes, associates them with * descriptive text, simplifies error notification. */ /** Error handling system Introduction ------------ This module defines error codes, translates them to/from other systems (e.g. errno), provides several macros that simplify returning errors / checking if a function failed, and associates codes with descriptive text. Why Error Codes? ---------------- To convey information about what failed, the alternatives are unique integral codes and direct pointers to descriptive text. Both occupy the same amount of space, but codes are easier to internationalize. Method of Propagating Errors ---------------------------- When a low-level function has failed, this must be conveyed to the higher-level application logic across several functions on the call stack. There are two alternatives: 1) check at each call site whether a function failed; if so, return to the caller. 2) throw an exception. We will discuss the advantages and disadvantages of exceptions, which mirror those of call site checking. - performance: they shouldn't be used in time-critical code. - predictability: exceptions can come up almost anywhere, so it is hard to say what execution path will be taken. - interoperability: not compatible with other languages. + readability: cleans up code by separating application logic and error handling. however, this is also a disadvantage because it may be difficult to see at a glance if a piece of code does error checking at all. + visibility: errors are more likely to be seen than relying on callers to check return codes; less reliant on discipline. Both have their place. Our recommendation is to throw error code exceptions when checking call sites and propagating errors becomes tedious. However, inter-module boundaries should always return error codes for interoperability with other languages. Simplifying Call-Site Checking ------------------------------ As mentioned above, this approach requires discipline. We provide macros to simplify this task: function calls can be wrapped in an "enforcer" that checks whether they succeeded and can take action (e.g. returning to caller or warning user) as appropriate. Consider the following example: LibError ret = doWork(); if(ret != INFO::OK) { warnUser(ret); return ret; } This can be replaced by: CHECK_ERR(doWork()); This provides a visible sign that the code handles errors, automatically propagates errors back to the caller, and most importantly, allows warning the user whenever an error occurs. Thus, no errors can be swept under the carpet by failing to check return value or catch(...) all exceptions. When to warn the user? ---------------------- When a function fails, there are 2 places we can raise a warning: as soon as the error condition is known, or in the higher-level caller. The former is the WARN_RETURN(ERR::FAIL) approach, while the latter corresponds to the example above. We prefer the former because it is easier to ensure that all possible return paths have been covered: search for all "return ERR::*" that are not followed by a "// NOWARN" comment. Also, the latter approach raises the question of where exactly to issue the warning. Clearly API-level routines must raise the warning, but sometimes they will want to call each other. Multiple warnings along the call stack ensuing from the same root cause are not nice. Note the special case of "validator" functions that e.g. verify the state of an object: we now discuss pros/cons of just returning errors without warning, and having their callers take care of that. + they typically have many return paths (-> increased code size) - this is balanced by validators that have many call sites. - we want all return statements wrapped for consistency and easily checking if any were forgotten - adding // NOWARN to each validator return statement would be tedious. - there is no advantage to checking at the call site; call stack indicates which caller of the validator failed anyway. Validator functions should therefore also use WARN_RETURN. Numbering Scheme ---------------- Each module header defines its own error codes to avoid a full rebuild whenever a new code is added. Error codes start at -100000 (warnings are positive, but reserves a negative value; absolute values are unique). This avoids collisions with all known error code schemes. Each header gets 100 possible values; the tens value may be used to denote groups within that header. The subsystem is denoted by the ten-thousands digit: 0 lib 1 file 2 res (resource management) 3 sysdep (system-dependent) 4 win (Windows-specific) To summarize: +/-1SHHCC (S=subsystem, HH=header, CC=code number) 10 lib 00CC lib_errors 03CC path 04CC debug 05CC debug_stl 06CC secure_crt 07CC wchar 11 file 01CC vfs 03CC file 04CC archive 12 res 01CC tex 02CC ogl_shader 13 sysdep 00CC cpu 01CC os_cpu 14 win 00CC whrt Notes: - file is called lib_errors.h because 0ad has another errors.cpp and the MS linker isn't smart enough to deal with object files of the same name but in different paths. **/ #ifndef INCLUDED_LIB_ERRORS #define INCLUDED_LIB_ERRORS #include "lib/code_annotation.h" #include "lib/code_generation.h" // note: this loses compiler type safety (being able to prevent // return 1 when a LibError is the return value), but allows splitting // up the error namespace into separate headers. // Lint's 'strong type' checking can be used to find errors. typedef long LibError; // opaque - do not access its fields! // note: must be defined here because clients instantiate them; // fields cannot be made private due to POD requirement. struct LibErrorAssociation { LibError err; // must remain valid until end of program. const wchar_t* description; // POSIX errno, or -1 int errno_equivalent; LibErrorAssociation* next; }; /** * associate a LibError with a description and errno equivalent. * @return dummy integer to allow calling via static initializer. **/ extern int error_AddAssociation(LibErrorAssociation*); // associate a LibError with a description and errno equivalent. // Invoke this at file or function scope. #define ERROR_ASSOCIATE(err, description, errno_equivalent)\ static LibErrorAssociation UID__ = { err, description, errno_equivalent, NULL };\ static int UID2__ = error_AddAssociation(&UID__) /** * generate textual description of an error code. * * @param err LibError to be translated. if despite type checking we * get an invalid enum value, the string will be something like * "Unknown error (65536, 0x10000)". * @param buf destination buffer * @param max_chars size of buffer [characters] * @return buf (allows using this function in expressions) **/ LIB_API wchar_t* error_description_r(LibError err, wchar_t* buf, size_t max_chars); //----------------------------------------------------------------------------- // conversion to/from other error code definitions. // note: other conversion routines (e.g. to/from Win32) are implemented in // the corresponding modules to keep this header portable. /** * translate errno to LibError. * * should only be called directly after a POSIX function indicates failure; * errno may otherwise still be set from another error cause. * * @param warn_if_failed if set, raise a warning when returning an error * (i.e. ERR::*, but not INFO::OK). this avoids having to wrap all * call sites in WARN_ERR etc. * @return LibError equivalent of errno, or ERR::FAIL if there's no equal. **/ extern LibError LibError_from_errno(bool warn_if_failed = true); /** * translate a POSIX function's return/error indication to LibError. * * you should set errno to 0 before calling the POSIX function to * make sure we do not return any stale errors. typical usage: * errno = 0; * int ret = posix_func(..); * return LibError_from_posix(ret); * * @param ret return value of a POSIX function: 0 indicates success, * -1 is error. * @param warn_if_failed if set, raise a warning when returning an error * (i.e. ERR::*, but not INFO::OK). this avoids having to wrap all * call sites in WARN_ERR etc. * @return INFO::OK if the POSIX function succeeded, else the LibError * equivalent of errno, or ERR::FAIL if there's no equal. **/ extern LibError LibError_from_posix(int ret, bool warn_if_failed = true); /** * set errno to the equivalent of a LibError. * * used in wposix - underlying functions return LibError but must be * translated to errno at e.g. the mmap interface level. higher-level code * that calls mmap will in turn convert back to LibError. * * @param err error code to set **/ extern void LibError_set_errno(LibError err); //----------------------------------------------------------------------------- // be careful here. the given expression (e.g. variable or // function return value) may be a Handle (=i64), so it needs to be // stored and compared as such. (very large but legitimate Handle values // casted to int can end up negative) // all functions using this return LibError (instead of i64) for // efficiency and simplicity. if the input was negative, it is an // error code and is therefore known to fit; we still mask with // UINT_MAX to avoid VC cast-to-smaller-type warnings. // if expression evaluates to a negative error code, warn user and // return the number. #define CHECK_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ DEBUG_WARN_ERR(err__);\ return (LibError)(err__ & ULONG_MAX);\ }\ ) // just pass on errors without any kind of annoying warning // (useful for functions that can legitimately fail). #define RETURN_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ return err__;\ }\ ) + +// if expression evaluates to a negative error code, return 0. +#define RETURN_IF_NOT_CONTINUE(expression)\ +STMT(\ + i64 err64__ = (i64)(expression);\ + if(err64__ != INFO::CB_CONTINUE)\ + return err64__;\ +) + // return an error and warn about it (replaces debug_warn+return) #define WARN_RETURN(err)\ STMT(\ DEBUG_WARN_ERR(err);\ return err;\ ) // if expression evaluates to a negative error code, warn user and // throw that number. #define THROW_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ DEBUG_WARN_ERR(err__);\ throw err__;\ }\ ) // if expression evaluates to a negative error code, warn user and just return // (useful for void functions that must bail and complain) #define WARN_ERR_RETURN(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ DEBUG_WARN_ERR(err__);\ return;\ }\ ) // if expression evaluates to a negative error code, warn user // (this is similar to debug_assert but also works in release mode) #define WARN_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ {\ LibError err__ = (LibError)(err64__ & ULONG_MAX);\ DEBUG_WARN_ERR(err__);\ }\ ) // if expression evaluates to a negative error code, return 0. #define RETURN0_IF_ERR(expression)\ STMT(\ i64 err64__ = (i64)(expression);\ if(err64__ < 0)\ return 0;\ ) // if ok evaluates to false or FALSE, warn user and return -1. #define WARN_RETURN_IF_FALSE(ok)\ STMT(\ if(!(ok))\ {\ - debug_warn(L"FYI: WARN_RETURN_IF_FALSE reports that a function failed."\ - L"feel free to ignore or suppress this warning.");\ + debug_warn(L"FYI: WARN_RETURN_IF_FALSE reports that a function failed. "\ + L"Feel free to ignore or suppress this warning.");\ return ERR::FAIL;\ }\ ) // if ok evaluates to false or FALSE, return -1. #define RETURN_IF_FALSE(ok)\ STMT(\ if(!(ok))\ return ERR::FAIL;\ ) // if ok evaluates to false or FALSE, warn user. #define WARN_IF_FALSE(ok)\ STMT(\ if(!(ok))\ - debug_warn(L"FYI: WARN_IF_FALSE reports that a function failed."\ - L"feel free to ignore or suppress this warning.");\ + debug_warn(L"FYI: WARN_IF_FALSE reports that a function failed. "\ + L"Feel free to ignore or suppress this warning.");\ ) //----------------------------------------------------------------------------- namespace INFO { const LibError OK = 0; // note: these values are > 100 to allow multiplexing them with // coroutine return values, which return completion percentage. // function is a callback and indicates that it can (but need not // necessarily) be called again. const LibError CB_CONTINUE = +100000; // notify caller that nothing was done. const LibError SKIPPED = +100001; // function is incapable of doing the requested task with the given inputs. // this implies SKIPPED, but also conveys a bit more information. const LibError CANNOT_HANDLE = +100002; // function is meant to be called repeatedly, and now indicates that // all jobs are complete. const LibError ALL_COMPLETE = +100003; // (returned e.g. when inserting into container) const LibError ALREADY_EXISTS = +100004; } namespace ERR { const LibError FAIL = -1; // general const LibError LOGIC = -100010; const LibError TIMED_OUT = -100011; const LibError REENTERED = -100012; const LibError CORRUPTED = -100013; const LibError VERSION = -100014; // function arguments const LibError INVALID_PARAM = -100020; const LibError INVALID_HANDLE = -100021; const LibError BUF_SIZE = -100022; // system limitations const LibError AGAIN = -100030; const LibError LIMIT = -100031; const LibError NO_SYS = -100032; const LibError NOT_IMPLEMENTED = -100033; const LibError NOT_SUPPORTED = -100034; const LibError NO_MEM = -100035; // these are for cases where we just want a distinct value to display and // a symbolic name + string would be overkill (e.g. the various // test cases in a validate() call). they are shared between multiple // functions; when something fails, the stack trace will show in which // one it was => these errors are unambiguous. // there are 3 tiers - 1..9 are used in most functions, 11..19 are // used in a function that calls another validator and 21..29 are // for for functions that call 2 other validators (this avoids // ambiguity as to which error actually happened where) const LibError _1 = -100101; const LibError _2 = -100102; const LibError _3 = -100103; const LibError _4 = -100104; const LibError _5 = -100105; const LibError _6 = -100106; const LibError _7 = -100107; const LibError _8 = -100108; const LibError _9 = -100109; const LibError _11 = -100111; const LibError _12 = -100112; const LibError _13 = -100113; const LibError _14 = -100114; const LibError _15 = -100115; const LibError _16 = -100116; const LibError _17 = -100117; const LibError _18 = -100118; const LibError _19 = -100119; const LibError _21 = -100121; const LibError _22 = -100122; const LibError _23 = -100123; const LibError _24 = -100124; const LibError _25 = -100125; const LibError _26 = -100126; const LibError _27 = -100127; const LibError _28 = -100128; const LibError _29 = -100129; } // namespace ERR #endif // #ifndef INCLUDED_LIB_ERRORS Index: ps/trunk/source/lib/sysdep/os/win/wposix/waio.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wposix/waio.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wposix/waio.cpp (revision 9350) @@ -1,508 +1,641 @@ -/* Copyright (c) 2010 Wildfire Games +/* Copyright (c) 2011 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * emulate POSIX asynchronous I/O on Windows. */ +// NB: this module is significantly faster than Intel's aio library, +// which also returns ERROR_INVALID_PARAMETER from aio_error if the +// file is opened with FILE_FLAG_OVERLAPPED. (it looks like they are +// using threaded blocking IO) + #include "precompiled.h" #include "lib/sysdep/os/win/wposix/waio.h" -#include - -#include "lib/sysdep/os/win/wposix/crt_posix.h" // correct definitions of _open() etc. +#include "lib/alignment.h" // IsAligned +#include "lib/module_init.h" +#include "lib/sysdep/cpu.h" // cpu_AtomicAdd #include "lib/sysdep/filesystem.h" // O_NO_AIO_NP +#include "lib/sysdep/os/win/wutil.h" // wutil_SetPrivilege +#include "lib/sysdep/os/win/wiocp.h" +#include "lib/sysdep/os/win/winit.h" -#include "lib/bits.h" // IsAligned -#include "lib/sysdep/os/win/wposix/wposix_internal.h" -#include "lib/sysdep/os/win/wposix/wtime.h" // timespec - - -WINIT_REGISTER_MAIN_INIT(waio_Init); WINIT_REGISTER_MAIN_SHUTDOWN(waio_Shutdown); -// note: we assume sector sizes no larger than a page. -// (GetDiskFreeSpace allows querying the actual size, but we'd -// have to do so for all drives, and that may change depending on whether -// there is a DVD in the drive or not) -// sector size is relevant because Windows aio requires all IO -// buffers, offsets and lengths to be a multiple of it. this requirement -// is also carried over into the vfs / file.cpp interfaces for efficiency -// (avoids the need for copying to/from align buffers). -const uintptr_t sectorSize = 0x1000; +// (dynamic linking preserves compatibility with previous Windows versions) +static WUTIL_FUNC(pSetFileCompletionNotificationModes, BOOL, (HANDLE, UCHAR)); +static WUTIL_FUNC(pSetFileIoOverlappedRange, BOOL, (HANDLE, PUCHAR, ULONG)); +static WUTIL_FUNC(pSetFileValidData, BOOL, (HANDLE, LONGLONG)); + +// (there must be one global IOCP because aio_suspend might be called for +// requests from different files) +static HANDLE hIOCP; -//----------------------------------------------------------------------------- -// note: the Windows lowio file descriptor limit is currrently 2048. +//----------------------------------------------------------------------------- +// OvlAllocator -/** - * association between POSIX file descriptor and Win32 HANDLE. - * NB: callers must ensure thread safety. - **/ -class HandleManager +// allocator for OVERLAPPED (enables a special optimization, see Associate) +struct OvlAllocator // POD { -public: - /** - * associate an aio handle with a file descriptor. - **/ - void Associate(int fd, HANDLE hFile) + // freelist entries for (padded) OVERLAPPED from our storage + struct Entry { - debug_assert(fd > 2); - debug_assert(GetFileSize(hFile, 0) != INVALID_FILE_SIZE); - std::pair ret = m_map.insert(std::make_pair(fd, hFile)); - debug_assert(ret.second); // fd better not already have been associated + SLIST_ENTRY entry; + OVERLAPPED ovl; + }; + + LibError Init() + { + // the allocation must be naturally aligned to ensure it doesn't + // overlap another page, which might prevent SetFileIoOverlappedRange + // from pinning the pages if one of them is PAGE_NOACCESS. + storage = _mm_malloc(storageSize, storageSize); + if(!storage) + WARN_RETURN(ERR::NO_MEM); + memset(storage, 0, storageSize); + + InitializeSListHead(&freelist); + + // storageSize provides more than enough OVERLAPPED, so we + // pad them to the cache line size to maybe avoid a few RFOs. + const size_t size = Align(sizeof(Entry)); + for(uintptr_t offset = 0; offset+size <= storageSize; offset += size) + { + Entry* entry = (Entry*)(uintptr_t(storage) + offset); + debug_assert(IsAligned(entry, MEMORY_ALLOCATION_ALIGNMENT)); + InterlockedPushEntrySList(&freelist, &entry->entry); + } + + extant = 0; + + return INFO::OK; } - void Dissociate(int fd) + void Shutdown() { - const size_t numRemoved = m_map.erase(fd); - debug_assert(numRemoved == 1); + debug_assert(extant == 0); + + InterlockedFlushSList(&freelist); + + _mm_free(storage); + storage = 0; } - bool IsAssociated(int fd) const + // irrevocably enable a special optimization for all I/Os requests + // concerning this file, ending when the file is closed. has no effect + // unless Vista+ and SeLockMemoryPrivilege are available. + void Associate(HANDLE hFile) { - return m_map.find(fd) != m_map.end(); + debug_assert(extant == 0); + + // pin the page in kernel address space, which means our thread + // won't have to be the one to service the I/O, thus avoiding an APC. + // ("thread agnostic I/O") + if(pSetFileIoOverlappedRange) + WARN_IF_FALSE(pSetFileIoOverlappedRange(hFile, (PUCHAR)storage, storageSize)); } - /** - * @return aio handle associated with file descriptor or - * INVALID_HANDLE_VALUE if there is none. - **/ - HANDLE Get(int fd) const + // @return OVERLAPPED initialized for I/O starting at offset, + // or 0 if all available structures have already been allocated. + OVERLAPPED* Allocate(off_t offset) { - Map::const_iterator it = m_map.find(fd); - if(it == m_map.end()) - return INVALID_HANDLE_VALUE; - return it->second; - } + Entry* entry = (Entry*)InterlockedPopEntrySList(&freelist); + if(!entry) + return 0; -private: - typedef std::map Map; - Map m_map; -}; + OVERLAPPED& ovl = entry->ovl; + ovl.Internal = 0; + ovl.InternalHigh = 0; + ovl.Offset = u64_lo(offset); + ovl.OffsetHigh = u64_hi(offset); + ovl.hEvent = 0; // (notification is via IOCP and/or polling) -static HandleManager* handleManager; + cpu_AtomicAdd(&extant, +1); + return &ovl; + } -// do we want to open a second aio-capable handle? -static bool IsAioPossible(int fd, bool is_com_port, int oflag) -{ - // stdin/stdout/stderr - if(fd <= 2) - return false; + void Deallocate(OVERLAPPED* ovl) + { + cpu_AtomicAdd(&extant, -1); - // COM port - we don't currently need aio access for those, and - // aio_reopen's CreateFileW would fail with "access denied". - if(is_com_port) - return false; + const uintptr_t address = uintptr_t(ovl); + debug_assert(uintptr_t(storage) <= address && address < uintptr_t(storage)+storageSize); + InterlockedPushEntrySList(&freelist, (PSLIST_ENTRY)(address - offsetof(Entry, ovl))); + } - // caller is requesting we skip it (see open()) - if(oflag & O_NO_AIO_NP) - return false; + // one 4 KiB page is enough for 64 OVERLAPPED per file (i.e. plenty). + static const size_t storageSize = pageSize; - return true; -} + void* storage; +#if MSC_VERSION +# pragma warning(push) +# pragma warning(disable:4324) // structure was padded due to __declspec(align()) +#endif + __declspec(align(MEMORY_ALLOCATION_ALIGNMENT)) SLIST_HEADER freelist; +#if MSC_VERSION +# pragma warning(pop) +#endif -static DWORD CreationDisposition(int oflag) -{ - if(oflag & O_CREAT) - return (oflag & O_EXCL)? CREATE_NEW : CREATE_ALWAYS; + volatile intptr_t extant; +}; - if(oflag & O_TRUNC) - return TRUNCATE_EXISTING; - return OPEN_EXISTING; -} +//----------------------------------------------------------------------------- +// FileControlBlock +// (must correspond to static zero-initialization of fd) +static const intptr_t FD_AVAILABLE = 0; -// (re)open file in asynchronous mode and associate handle with fd. -// (this works because the files default to DENY_NONE sharing) -LibError waio_reopen(int fd, const OsPath& pathname, int oflag, ...) +// information required to start asynchronous I/Os from a file +// (aiocb stores a pointer to the originating FCB) +struct FileControlBlock // POD { - WinScopedPreserveLastError s; // CreateFile + // search key, indicates the file descriptor with which this + // FCB was associated (or FD_AVAILABLE if none). + volatile intptr_t fd; - debug_assert(!(oflag & O_APPEND)); // not supported - if(!IsAioPossible(fd, false, oflag)) - return INFO::SKIPPED; + // second aio-enabled handle from waio_reopen + HANDLE hFile; - DWORD flags = FILE_FLAG_OVERLAPPED|FILE_FLAG_NO_BUFFERING|FILE_FLAG_SEQUENTIAL_SCAN; + OvlAllocator ovl; - // decode file access mode - DWORD access, share; - switch(oflag & (O_RDONLY|O_WRONLY|O_RDWR)) + LibError Init() { - case O_RDONLY: - access = GENERIC_READ; - share = FILE_SHARE_READ; - break; - - case O_WRONLY: - access = GENERIC_WRITE; - share = FILE_SHARE_WRITE; - flags |= FILE_FLAG_WRITE_THROUGH; - break; - - case O_RDWR: - access = GENERIC_READ|GENERIC_WRITE; - share = FILE_SHARE_READ|FILE_SHARE_WRITE; - flags |= FILE_FLAG_WRITE_THROUGH; - break; + fd = FD_AVAILABLE; + hFile = INVALID_HANDLE_VALUE; + return ovl.Init(); + } - default: - WARN_RETURN(ERR::INVALID_PARAM); + void Shutdown() + { + debug_assert(fd == FD_AVAILABLE); + debug_assert(hFile == INVALID_HANDLE_VALUE); + ovl.Shutdown(); } +}; + + +// NB: the Windows lowio file descriptor limit is 2048, but +// our applications rarely open more than a few files at a time. +static FileControlBlock fileControlBlocks[16]; - // open file - const DWORD create = CreationDisposition(oflag); - const HANDLE hFile = CreateFileW(OsString(pathname).c_str(), access, share, 0, create, FILE_ATTRIBUTE_NORMAL|flags, 0); - if(hFile == INVALID_HANDLE_VALUE) - return LibError_from_GLE(); +static FileControlBlock* AssociateFileControlBlock(int fd, HANDLE hFile) +{ + for(size_t i = 0; i < ARRAY_SIZE(fileControlBlocks); i++) { - WinScopedLock lock(WAIO_CS); - handleManager->Associate(fd, hFile); + FileControlBlock& fcb = fileControlBlocks[i]; + if(cpu_CAS(&fcb.fd, FD_AVAILABLE, fd)) // the FCB is now ours + { + fcb.hFile = hFile; + fcb.ovl.Associate(hFile); + + AttachToCompletionPort(hFile, hIOCP, (ULONG_PTR)&fcb); + + // minor optimization: avoid posting to IOCP in rare cases + // where the I/O completes synchronously + if(pSetFileCompletionNotificationModes) + { + // (for reasons as yet unknown, this fails when the file is + // opened for read-only access) + (void)pSetFileCompletionNotificationModes(fcb.hFile, FILE_SKIP_COMPLETION_PORT_ON_SUCCESS); + } + + return &fcb; + } } - return INFO::OK; + + return 0; } -LibError waio_close(int fd) +static void DissociateFileControlBlock(FileControlBlock* fcb) { - HANDLE hFile; + fcb->hFile = INVALID_HANDLE_VALUE; + fcb->fd = FD_AVAILABLE; +} + + +static FileControlBlock* FindFileControlBlock(int fd) +{ + debug_assert(fd != FD_AVAILABLE); + + for(size_t i = 0; i < ARRAY_SIZE(fileControlBlocks); i++) { - WinScopedLock lock(WAIO_CS); - if(!handleManager->IsAssociated(fd)) // wasn't opened for aio - return INFO::SKIPPED; - hFile = handleManager->Get(fd); - handleManager->Dissociate(fd); + FileControlBlock& fcb = fileControlBlocks[i]; + if(fcb.fd == fd) + return &fcb; } - if(!CloseHandle(hFile)) - WARN_RETURN(ERR::INVALID_HANDLE); - - return INFO::OK; + return 0; } -// we don't want to #define read to _read, since that's a fairly common -// identifier. therefore, translate from MS CRT names via thunk functions. -// efficiency is less important, and the overhead could be optimized away. +//----------------------------------------------------------------------------- +// init/shutdown -int read(int fd, void* buf, size_t nbytes) -{ - return _read(fd, buf, (int)nbytes); -} +static ModuleInitState waio_initState; -int write(int fd, void* buf, size_t nbytes) +static LibError waio_Init() { - return _write(fd, buf, (int)nbytes); + for(size_t i = 0; i < ARRAY_SIZE(fileControlBlocks); i++) + fileControlBlocks[i].Init(); + + WUTIL_IMPORT_KERNEL32(SetFileCompletionNotificationModes, pSetFileCompletionNotificationModes); + + // NB: using these functions when the privileges are not available would + // trigger warnings. since callers have to check the function pointers + // anyway, just refrain from setting them in such cases. + + if(wutil_SetPrivilege(L"SeLockMemoryPrivilege", true) == INFO::OK) + WUTIL_IMPORT_KERNEL32(SetFileIoOverlappedRange, pSetFileIoOverlappedRange); + + if(wutil_SetPrivilege(L"SeManageVolumePrivilege", true) == INFO::OK) + WUTIL_IMPORT_KERNEL32(SetFileValidData, pSetFileValidData); + + return INFO::OK; } -off_t lseek(int fd, off_t ofs, int whence) + +static LibError waio_Shutdown() { - return _lseeki64(fd, ofs, whence); + if(waio_initState == 0) // we were never initialized + return INFO::OK; + + for(size_t i = 0; i < ARRAY_SIZE(fileControlBlocks); i++) + fileControlBlocks[i].Shutdown(); + + WARN_IF_FALSE(CloseHandle(hIOCP)); + + return INFO::OK; } //----------------------------------------------------------------------------- +// OpenFile -class aiocb::Impl +static DWORD DesiredAccess(int oflag) { -public: - Impl() + switch(oflag & (O_RDONLY|O_WRONLY|O_RDWR)) { - m_hFile = INVALID_HANDLE_VALUE; - - // (hEvent is initialized below and the rest in Issue(), but clear out - // any subsequently added fields) - memset(&m_overlapped, 0, sizeof(m_overlapped)); - - const BOOL manualReset = TRUE; - const BOOL initialState = FALSE; - m_overlapped.hEvent = CreateEvent(0, manualReset, initialState, 0); + case O_RDONLY: + // (WinXP x64 requires FILE_WRITE_ATTRIBUTES for SetFileCompletionNotificationModes) + return GENERIC_READ|FILE_WRITE_ATTRIBUTES; + case O_WRONLY: + return GENERIC_WRITE; + case O_RDWR: + return GENERIC_READ|GENERIC_WRITE; + default: + DEBUG_WARN_ERR(ERR::INVALID_PARAM); + return 0; } +} - ~Impl() +static DWORD ShareMode(int oflag) +{ + switch(oflag & (O_RDONLY|O_WRONLY|O_RDWR)) { - CloseHandle(m_overlapped.hEvent); + case O_RDONLY: + return FILE_SHARE_READ; + case O_WRONLY: + return FILE_SHARE_WRITE; + case O_RDWR: + return FILE_SHARE_READ|FILE_SHARE_WRITE; + default: + DEBUG_WARN_ERR(ERR::INVALID_PARAM); + return 0; } +} - LibError Issue(HANDLE hFile, off_t ofs, void* buf, size_t size, bool isWrite) - { - WinScopedPreserveLastError s; +static DWORD CreationDisposition(int oflag) +{ + if(oflag & O_CREAT) + return (oflag & O_EXCL)? CREATE_NEW : CREATE_ALWAYS; - m_hFile = hFile; + if(oflag & O_TRUNC) + return TRUNCATE_EXISTING; - // note: Read-/WriteFile reset m_overlapped.hEvent, so we don't have to. - m_overlapped.Internal = m_overlapped.InternalHigh = 0; - m_overlapped.Offset = u64_lo(ofs); - m_overlapped.OffsetHigh = u64_hi(ofs); + return OPEN_EXISTING; +} - DWORD bytesTransferred; - BOOL ok; - if(isWrite) - ok = WriteFile(hFile, buf, u64_lo(size), &bytesTransferred, &m_overlapped); - else - ok = ReadFile(hFile, buf, u64_lo(size), &bytesTransferred, &m_overlapped); - if(!ok && GetLastError() == ERROR_IO_PENDING) // "pending" isn't an error - { - ok = TRUE; - SetLastError(0); - } - return LibError_from_win32(ok); - } +static DWORD FlagsAndAttributes() +{ + // - FILE_FLAG_SEQUENTIAL_SCAN is ignored when FILE_FLAG_NO_BUFFERING + // is set (c.f. "Windows via C/C++", p. 324) + // - FILE_FLAG_WRITE_THROUGH is ~5% slower (diskspd.cpp suggests it + // disables hardware caching; the overhead may also be due to the + // Windows cache manager) + const DWORD flags = FILE_FLAG_OVERLAPPED|FILE_FLAG_NO_BUFFERING; + const DWORD attributes = FILE_ATTRIBUTE_NORMAL; + return flags|attributes; +} - bool HasCompleted() const - { - // NB: .Internal "was originally reserved for system use and its behavior may change". - // besides 0 and STATUS_PENDING, I have seen the address of a pointer to a buffer. - return HasOverlappedIoCompleted(&m_overlapped); - } +static LibError OpenFile(const OsPath& pathname, int oflag, HANDLE& hFile) +{ + WinScopedPreserveLastError s; - // required for WaitForMultipleObjects - HANDLE Event() const - { - return m_overlapped.hEvent; - } + const DWORD access = DesiredAccess(oflag); + const DWORD share = ShareMode(oflag); + const DWORD create = CreationDisposition(oflag); + const DWORD flags = FlagsAndAttributes(); + hFile = CreateFileW(OsString(pathname).c_str(), access, share, 0, create, flags, 0); + if(hFile == INVALID_HANDLE_VALUE) + return LibError_from_GLE(); - LibError GetResult(size_t* pBytesTransferred) - { - DWORD bytesTransferred; - const BOOL wait = FALSE; // callers should wait until HasCompleted - if(!GetOverlappedResult(m_hFile, &m_overlapped, &bytesTransferred, wait)) - { - *pBytesTransferred = 0; - return LibError_from_GLE(); - } - else - { - *pBytesTransferred = bytesTransferred; - return INFO::OK; - } - } + return INFO::OK; +} -private: - OVERLAPPED m_overlapped; - HANDLE m_hFile; -}; +//----------------------------------------------------------------------------- +// Windows-only APIs -// called by aio_read, aio_write, and lio_listio. -// cb->aio_lio_opcode specifies desired operation. -// @return LibError, also setting errno in case of failure. -static LibError aio_issue(struct aiocb* cb) +LibError waio_reopen(int fd, const OsPath& pathname, int oflag, ...) { - // no-op (probably from lio_listio) - if(!cb || cb->aio_lio_opcode == LIO_NOP) - return INFO::SKIPPED; + debug_assert(fd > 2); + debug_assert(!(oflag & O_APPEND)); // not supported - // extract aiocb fields for convenience - const bool isWrite = (cb->aio_lio_opcode == LIO_WRITE); - const int fd = cb->aio_fildes; - const size_t size = cb->aio_nbytes; - const off_t ofs = cb->aio_offset; - void* const buf = (void*)cb->aio_buf; // from volatile void* + if(oflag & O_NO_AIO_NP) + return INFO::SKIPPED; - // Win32 requires transfers to be sector-aligned. - if(!IsAligned(ofs, sectorSize) || !IsAligned(buf, sectorSize) || !IsAligned(size, sectorSize)) - { - errno = EINVAL; - WARN_RETURN(ERR::INVALID_PARAM); - } + RETURN_ERR(ModuleInit(&waio_initState, waio_Init)); HANDLE hFile; - { - WinScopedLock lock(WAIO_CS); - hFile = handleManager->Get(fd); - } - if(hFile == INVALID_HANDLE_VALUE) - { - errno = EINVAL; - WARN_RETURN(ERR::INVALID_HANDLE); - } + RETURN_ERR(OpenFile(pathname, oflag, hFile)); - debug_assert(!cb->impl); // SUSv3 requires that the aiocb not be in use - cb->impl.reset(new aiocb::Impl); - - LibError ret = cb->impl->Issue(hFile, ofs, buf, size, isWrite); - if(ret < 0) + if(!AssociateFileControlBlock(fd, hFile)) { - LibError_set_errno(ret); - return ret; + CloseHandle(hFile); + WARN_RETURN(ERR::LIMIT); } return INFO::OK; } -// return status of transfer -int aio_error(const struct aiocb* cb) +LibError waio_close(int fd) { - return cb->impl->HasCompleted()? 0 : EINPROGRESS; + FileControlBlock* fcb = FindFileControlBlock(fd); + if(!fcb) + WARN_RETURN(ERR::INVALID_HANDLE); + const HANDLE hFile = fcb->hFile; + + DissociateFileControlBlock(fcb); + + if(!CloseHandle(hFile)) + WARN_RETURN(ERR::INVALID_HANDLE); + + return INFO::OK; } -// get bytes transferred. call exactly once for each issued request. -ssize_t aio_return(struct aiocb* cb) +LibError waio_Preallocate(int fd, off_t alignedSize, off_t alignment) { - // SUSv3 says we mustn't be callable before the request has completed - debug_assert(cb->impl); - debug_assert(cb->impl->HasCompleted()); - size_t bytesTransferred; - LibError ret = cb->impl->GetResult(&bytesTransferred); - cb->impl.reset(); // disallow calling again, as required by SUSv3 - if(ret < 0) - { - LibError_set_errno(ret); - return (ssize_t)-1; - } - return (ssize_t)bytesTransferred; + debug_assert(IsAligned(alignedSize, alignment)); + + FileControlBlock* fcb = FindFileControlBlock(fd); + if(!fcb) + WARN_RETURN(ERR::INVALID_HANDLE); + const HANDLE hFile = fcb->hFile; + + // allocate all space up front to reduce fragmentation + LARGE_INTEGER size64; size64.QuadPart = alignedSize; + WARN_IF_FALSE(SetFilePointerEx(hFile, size64, 0, FILE_BEGIN)); + WARN_IF_FALSE(SetEndOfFile(hFile)); + + // avoid synchronous zero-fill (see discussion in header) + if(pSetFileValidData) + WARN_IF_FALSE(pSetFileValidData(hFile, alignedSize)); + + return INFO::OK; } -int aio_suspend(const struct aiocb* const cbs[], int n, const struct timespec* ts) +//----------------------------------------------------------------------------- +// helper functions + +// called by aio_read, aio_write, and lio_listio. +// cb->aio_lio_opcode specifies desired operation. +// @return -1 on failure (having also set errno) +static int Issue(aiocb* cb) { - if(n <= 0 || n > MAXIMUM_WAIT_OBJECTS) + debug_assert(IsAligned(cb->aio_offset, maxSectorSize)); + debug_assert(IsAligned(cb->aio_buf, maxSectorSize)); + debug_assert(IsAligned(cb->aio_nbytes, maxSectorSize)); + + FileControlBlock* fcb = FindFileControlBlock(cb->aio_fildes); + if(!fcb || fcb->hFile == INVALID_HANDLE_VALUE) { - WARN_ERR(ERR::INVALID_PARAM); + DEBUG_WARN_ERR(ERR::INVALID_HANDLE); errno = EINVAL; return -1; } - // build array of event handles - HANDLE hEvents[MAXIMUM_WAIT_OBJECTS]; - size_t numPendingIos = 0; - for(int i = 0; i < n; i++) + debug_assert(!cb->fcb && !cb->ovl); // SUSv3: aiocb must not be in use + cb->fcb = fcb; + cb->ovl = fcb->ovl.Allocate(cb->aio_offset); + if(!cb->ovl) { - if(!cbs[i]) // SUSv3 says NULL entries are to be ignored - continue; - - aiocb::Impl* impl = cbs[i]->impl.get(); - debug_assert(impl); - if(!impl->HasCompleted()) - hEvents[numPendingIos++] = impl->Event(); + DEBUG_WARN_ERR(ERR::LIMIT); + errno = EMFILE; + return -1; } - if(!numPendingIos) // done, don't need to suspend. - return 0; - const BOOL waitAll = FALSE; - // convert timespec to milliseconds (ts == 0 => no timeout) - const DWORD timeout = ts? (DWORD)(ts->tv_sec*1000 + ts->tv_nsec/1000000) : INFINITE; - const DWORD result = WaitForMultipleObjects((DWORD)numPendingIos, hEvents, waitAll, timeout); - - for(size_t i = 0; i < numPendingIos; i++) - ResetEvent(hEvents[i]); - - switch(result) - { - case WAIT_FAILED: - WARN_ERR(ERR::FAIL); - errno = EIO; - return -1; + WinScopedPreserveLastError s; - case WAIT_TIMEOUT: - errno = EAGAIN; - return -1; + const HANDLE hFile = fcb->hFile; + void* const buf = (void*)cb->aio_buf; // from volatile void* + const DWORD size = u64_lo(cb->aio_nbytes); + debug_assert(u64_hi(cb->aio_nbytes) == 0); + OVERLAPPED* ovl = (OVERLAPPED*)cb->ovl; + // (there is no point in using WriteFileGather/ReadFileScatter here + // because the IO manager still needs to lock pages and translate them + // into an MDL, and we'd just be increasing the number of addresses) + const BOOL ok = (cb->aio_lio_opcode == LIO_WRITE)? WriteFile(hFile, buf, size, 0, ovl) : ReadFile(hFile, buf, size, 0, ovl); + if(ok || GetLastError() == ERROR_IO_PENDING) + return 0; // success - default: - return 0; - } + LibError_set_errno(LibError_from_GLE()); + return -1; } -int aio_cancel(int fd, struct aiocb* cb) +static bool AreAnyComplete(const struct aiocb* const cbs[], int n) { - // Win32 limitation: can't cancel single transfers - - // all pending reads on this file are canceled. - UNUSED2(cb); - - HANDLE hFile; - { - WinScopedLock lock(WAIO_CS); - hFile = handleManager->Get(fd); - } - if(hFile == INVALID_HANDLE_VALUE) + for(int i = 0; i < n; i++) { - WARN_ERR(ERR::INVALID_HANDLE); - errno = EINVAL; - return -1; + if(!cbs[i]) // SUSv3: must ignore NULL entries + continue; + + if(HasOverlappedIoCompleted((OVERLAPPED*)cbs[i]->ovl)) + return true; } - WARN_IF_FALSE(CancelIo(hFile)); - return AIO_CANCELED; + return false; } +//----------------------------------------------------------------------------- +// API + int aio_read(struct aiocb* cb) { cb->aio_lio_opcode = LIO_READ; - return (aio_issue(cb) < 0)? 0 : -1; + return Issue(cb); } int aio_write(struct aiocb* cb) { cb->aio_lio_opcode = LIO_WRITE; - return (aio_issue(cb) < 0)? 0 : -1; + return Issue(cb); } int lio_listio(int mode, struct aiocb* const cbs[], int n, struct sigevent* se) { debug_assert(mode == LIO_WAIT || mode == LIO_NOWAIT); UNUSED2(se); // signaling is not implemented. for(int i = 0; i < n; i++) { - if(aio_issue(cbs[i]) < 0) + if(cbs[i] == 0 || cbs[i]->aio_lio_opcode == LIO_NOP) + continue; + + if(Issue(cbs[i]) == -1) return -1; } if(mode == LIO_WAIT) return aio_suspend(cbs, n, 0); return 0; } -int aio_fsync(int, struct aiocb*) +int aio_suspend(const struct aiocb* const cbs[], int n, const struct timespec* timeout) { - WARN_ERR(ERR::NOT_IMPLEMENTED); - errno = ENOSYS; + // consume all pending notifications to prevent them from piling up if + // requests are always complete by the time we're called + DWORD bytesTransferred; ULONG_PTR key; OVERLAPPED* ovl; + while(PollCompletionPort(hIOCP, 0, bytesTransferred, key, ovl) == INFO::OK) {} + + // avoid blocking if already complete (synchronous requests don't post notifications) + if(AreAnyComplete(cbs, n)) + return 0; + + // caller doesn't want to block, and no requests are complete + if(timeout && timeout->tv_sec == 0 && timeout->tv_nsec == 0) + { + errno = EAGAIN; + return -1; + } + + // reduce CPU usage by blocking until a notification arrives or a + // brief timeout elapses (necessary because other threads - or even + // the above poll - might have consumed our notification). note that + // re-posting notifications that don't concern the respective requests + // is not desirable because POSIX doesn't require aio_suspend to be + // called, which means notifications might pile up. + const DWORD milliseconds = 1; // as short as possible (don't oversleep) + const LibError ret = PollCompletionPort(hIOCP, milliseconds, bytesTransferred, key, ovl); + if(ret != INFO::OK && ret != ERR::AGAIN) // failed + { + debug_assert(0); + return -1; + } + + // scan again (even if we got a notification, it might not concern THESE requests) + if(AreAnyComplete(cbs, n)) + return 0; + + // none completed, must repeat the above steps. provoke being called again by + // claiming to have been interrupted by a signal. + errno = EINTR; return -1; } -//----------------------------------------------------------------------------- +int aio_error(const struct aiocb* cb) +{ + const OVERLAPPED* ovl = (const OVERLAPPED*)cb->ovl; + if(!ovl) // called after aio_return + return EINVAL; + if(!HasOverlappedIoCompleted(ovl)) + return EINPROGRESS; + if(ovl->Internal != ERROR_SUCCESS) + return EIO; + return 0; +} -static LibError waio_Init() + +ssize_t aio_return(struct aiocb* cb) { - handleManager = new HandleManager; - return INFO::OK; + FileControlBlock* fcb = (FileControlBlock*)cb->fcb; + OVERLAPPED* ovl = (OVERLAPPED*)cb->ovl; + if(!fcb || !ovl) + { + errno = EINVAL; + return -1; + } + + const ULONG_PTR status = ovl->Internal; + const ULONG_PTR bytesTransferred = ovl->InternalHigh; + + cb->ovl = 0; // prevent further calls to aio_error/aio_return + COMPILER_FENCE; + fcb->ovl.Deallocate(ovl); + cb->fcb = 0; // allow reuse + + return (status == ERROR_SUCCESS)? bytesTransferred : -1; } -static LibError waio_Shutdown() + +int aio_cancel(int UNUSED(fd), struct aiocb* cb) { - delete handleManager; - return INFO::OK; + // (faster than calling FindFileControlBlock) + const HANDLE hFile = ((const FileControlBlock*)cb->fcb)->hFile; + if(hFile == INVALID_HANDLE_VALUE) + { + WARN_ERR(ERR::INVALID_HANDLE); + errno = EINVAL; + return -1; + } + + // cancel all I/Os this thread issued for the given file + // (CancelIoEx can cancel individual operations, but is only + // available starting with Vista) + WARN_IF_FALSE(CancelIo(hFile)); + + return AIO_CANCELED; +} + + +int aio_fsync(int, struct aiocb*) +{ + WARN_ERR(ERR::NOT_IMPLEMENTED); + errno = ENOSYS; + return -1; } Index: ps/trunk/source/lib/sysdep/os/win/wposix/waio.h =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wposix/waio.h (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wposix/waio.h (revision 9350) @@ -1,121 +1,148 @@ -/* Copyright (c) 2010 Wildfire Games +/* Copyright (c) 2011 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * emulate POSIX asynchronous I/O on Windows. */ #ifndef INCLUDED_WAIO #define INCLUDED_WAIO #include "lib/lib_errors.h" #include "lib/os_path.h" +#include "lib/posix/posix_time.h" // timespec #include "lib/sysdep/os/win/wposix/wposix_types.h" -#include "lib/sysdep/os/win/wposix/no_crt_posix.h" - // Note: transfer buffers, offsets, and lengths must be sector-aligned -// (we don't bother copying to an align buffer because the file cache -// already requires splitting IOs into aligned blocks) +// (we don't bother copying to an align buffer because our block cache +// already requires splitting IOs into naturally-aligned blocks) // // // union sigval // unused { int sival_int; // Integer signal value. void* sival_ptr; // Pointer signal value. }; struct sigevent // unused { int sigev_notify; // notification mode int sigev_signo; // signal number union sigval sigev_value; // signal value void (*sigev_notify_function)(union sigval); }; // -// -// - -extern int read (int fd, void* buf, size_t nbytes); // thunk -extern int write(int fd, void* buf, size_t nbytes); // thunk -extern off_t lseek(int fd, off_t ofs, int whence); // thunk - - -// // // struct aiocb { int aio_fildes; // File descriptor. off_t aio_offset; // File offset. volatile void* aio_buf; // Location of buffer. size_t aio_nbytes; // Length of transfer. int aio_reqprio; // Request priority offset. (unused) struct sigevent aio_sigevent; // Signal number and value. (unused) int aio_lio_opcode; // Operation to be performed. - class Impl; - shared_ptr impl; + // internal use only; must be zero-initialized before + // calling the first aio_read/aio_write/lio_listio (aio_return also + // zero-initializes them) + void* fcb; + void* ovl; }; enum { // aio_cancel return AIO_ALLDONE, // None of the requested operations could be canceled since they are already complete. AIO_CANCELED, // All requested operations have been canceled. AIO_NOTCANCELED, // Some of the requested operations could not be canceled since they are in progress. // lio_listio mode LIO_WAIT, // wait until all I/O is complete LIO_NOWAIT, // lio_listio ops LIO_NOP, LIO_READ, LIO_WRITE }; -extern int aio_cancel(int, struct aiocb*); -extern int aio_error(const struct aiocb*); -extern int aio_fsync(int, struct aiocb*); extern int aio_read(struct aiocb*); -extern ssize_t aio_return(struct aiocb*); -struct timespec; -extern int aio_suspend(const struct aiocb* const[], int, const struct timespec*); extern int aio_write(struct aiocb*); extern int lio_listio(int, struct aiocb* const[], int, struct sigevent*); -// for use by wfilesystem's wopen/wclose: +// (if never called, IOCP notifications will pile up.) +extern int aio_suspend(const struct aiocb* const[], int, const struct timespec*); -// (re)open file in asynchronous mode and associate handle with fd. -// (this works because the files default to DENY_NONE sharing) +// @return status of transfer (0 or an errno) +extern int aio_error(const struct aiocb*); + +// @return bytes transferred or -1 on error. +// frees internal storage related to the request and MUST be called +// exactly once for each aiocb after aio_error != EINPROGRESS. +extern ssize_t aio_return(struct aiocb*); + +extern int aio_cancel(int, struct aiocb*); + +extern int aio_fsync(int, struct aiocb*); + +// Windows doesn't allow aio unless the file is opened in asynchronous mode, +// which is not possible with _wsopen_s. since we don't want to have to +// provide a separate File class for aio-enabled files, our wopen wrapper +// will also call this function to open a SECOND handle to the file (works +// because CRT open() defaults to DENY_NONE sharing). the CRT's lowio +// descriptor table remains unaffected, but our [w]aio_* functions are +// notified of the file descriptor, which means e.g. read and aio_read can +// both be used. this function must have been called before any +// other [w]aio_* functions are used. extern LibError waio_reopen(int fd, const OsPath& pathname, int oflag, ...); + +// close our second aio-enabled handle to the file (called from wclose). extern LibError waio_close(int fd); +// call this before writing a large file to preallocate clusters, thus +// reducing fragmentation. +// +// @param alignedSize must be a multiple of alignment (SetEndOfFile requires +// sector alignment; this could be avoided by using the undocumented +// NtSetInformationFile or SetFileInformationByHandle on Vista and later). +// use wtruncate after I/O is complete to chop off any excess padding. +// +// NB: writes that extend a file (i.e. ALL WRITES when creating new files) +// are synchronous, which prevents overlapping I/O and other work. +// (http://support.microsoft.com/default.aspx?scid=kb%3Ben-us%3B156932) +// if Windows XP and the SE_MANAGE_VOLUME_NAME privileges are available, +// this function sets the valid data length to avoid the synchronous zero-fill. +// note that this exposes the previous disk contents (possibly even to +// other users since the waio_reopen design cannot deny file sharing) until +// the application successfully writes to the file. +LIB_API LibError waio_Preallocate(int fd, off_t alignedSize, off_t alignment); + #endif // #ifndef INCLUDED_WAIO Index: ps/trunk/source/lib/sysdep/os/win/wposix/wfilesystem.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wposix/wfilesystem.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wposix/wfilesystem.cpp (revision 9350) @@ -1,449 +1,469 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "precompiled.h" #include "lib/sysdep/filesystem.h" #include "lib/allocators/allocators.h" // single_calloc #include "lib/sysdep/os/win/wposix/wposix_internal.h" #include "lib/sysdep/os/win/wposix/waio.h" #include "lib/sysdep/os/win/wposix/wtime_internal.h" // wtime_utc_filetime_to_time_t #include "lib/sysdep/os/win/wposix/crt_posix.h" // _rmdir, _access // // determine file system type on the current drive - // needed to work around incorrect FAT time translation. // static enum Filesystem { FS_INVALID, // detect_filesystem() not yet called FS_FAT, // FAT12, FAT16, or FAT32 FS_NTFS, // (most common) FS_UNKNOWN // newer FS we don't know about } filesystem; // rationale: the previous method of checking every path was way too slow // (taking ~800ms total during init). instead, we only determine the FS once. // this is quite a bit easier than intercepting chdir() calls and/or // caching FS type per drive letter, but not foolproof. // // if some data files are on a different volume that is set up as FAT, // the workaround below won't be triggered (=> timestamps may be off by // 1 hour when DST is in effect). oh well, that is not a supported. // // the common case (everything is on a single NTFS volume) is more important // and must run without penalty. // called from the first filetime_to_time_t() call, not win.cpp init; // this means we can rely on the current directory having been set to // the app's directory (and the corresponding volume - see above). static void detect_filesystem() { const DWORD length = GetCurrentDirectoryW(0, 0); debug_assert(length != 0); std::wstring rootPath(length, '\0'); const DWORD charsWritten = GetCurrentDirectoryW(length, &rootPath[0]); debug_assert(charsWritten == length-1); wchar_t drive[_MAX_DRIVE]; debug_assert(_wsplitpath_s(&rootPath[0], drive, ARRAY_SIZE(drive), 0,0, 0,0, 0,0) == 0); wchar_t filesystemName[MAX_PATH+1] = {0}; // mandated by GetVolumeInformationW BOOL ret = GetVolumeInformationW(OsString(OsPath(drive)/"").c_str(), 0,0,0,0,0, filesystemName, ARRAY_SIZE(filesystemName)); debug_assert(ret != 0); filesystem = FS_UNKNOWN; if(!wcsncmp(filesystemName, L"FAT", 3)) // e.g. FAT32 filesystem = FS_FAT; else if(!wcscmp(filesystemName, L"NTFS")) filesystem = FS_NTFS; } // convert local FILETIME (includes timezone bias and possibly DST bias) // to seconds-since-1970 UTC. // // note: splitting into month, year etc. is inefficient, // but much easier than determining whether ft lies in DST, // and ourselves adding the appropriate bias. // // called for FAT file times; see wposix filetime_to_time_t. time_t time_t_from_local_filetime(FILETIME* ft) { SYSTEMTIME st; FileTimeToSystemTime(ft, &st); struct tm t; t.tm_sec = st.wSecond; t.tm_min = st.wMinute; t.tm_hour = st.wHour; t.tm_mday = st.wDay; t.tm_mon = st.wMonth-1; t.tm_year = st.wYear-1900; t.tm_isdst = -1; // let the CRT determine whether this local time // falls under DST by the US rules. return mktime(&t); } // convert Windows FILETIME to POSIX time_t (seconds-since-1970 UTC); // used by stat and readdir_stat_np for st_mtime. // // works around a documented Windows bug in converting FAT file times // (correct results are desired since VFS mount logic considers // files 'equal' if their mtime and size are the same). static time_t filetime_to_time_t(FILETIME* ft) { ONCE(detect_filesystem()); // the FAT file system stores local file times, while // NTFS records UTC. Windows does convert automatically, // but uses the current DST settings. (boo!) // we go back to local time, and convert properly. if(filesystem == FS_FAT) { FILETIME local_ft; FileTimeToLocalFileTime(ft, &local_ft); return time_t_from_local_filetime(&local_ft); } return wtime_utc_filetime_to_time_t(ft); } //----------------------------------------------------------------------------- // dirent.h //----------------------------------------------------------------------------- // note: we avoid opening directories or returning entries that have // hidden or system attributes set. this is to prevent returning something // like "\System Volume Information", which raises an error upon opening. // 0-initialized by wdir_alloc for safety; this is required for // num_entries_scanned. struct WDIR { HANDLE hFind; // the wdirent returned by readdir. // note: having only one global instance is not possible because // multiple independent wopendir/wreaddir sequences must be supported. struct wdirent ent; WIN32_FIND_DATAW fd; // since wopendir calls FindFirstFileW, we need a means of telling the // first call to wreaddir that we already have a file. // that's the case iff this is == 0; we use a counter rather than a // flag because that allows keeping statistics. int num_entries_scanned; }; // suballocator - satisfies most requests with a reusable static instance, // thus speeding up allocation and avoiding heap fragmentation. // thread-safe. static WDIR global_wdir; static intptr_t global_wdir_is_in_use; // zero-initializes the WDIR (code below relies on this) static inline WDIR* wdir_alloc() { return (WDIR*)single_calloc(&global_wdir, &global_wdir_is_in_use, sizeof(WDIR)); } static inline void wdir_free(WDIR* d) { single_free(&global_wdir, &global_wdir_is_in_use, d); } static const DWORD hs = FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM; // make sure path exists and is a normal (according to attributes) directory. static bool is_normal_dir(const OsPath& path) { const DWORD fa = GetFileAttributesW(OsString(path).c_str()); // path not found if(fa == INVALID_FILE_ATTRIBUTES) return false; // not a directory if((fa & FILE_ATTRIBUTE_DIRECTORY) == 0) return false; // hidden or system attribute(s) set // this check is now disabled because wsnd's add_oal_dlls_in_dir // needs to open the Windows system directory, which sometimes has // these attributes set. //if((fa & hs) != 0) // return false; return true; } WDIR* wopendir(const OsPath& path) { if(!is_normal_dir(path)) { errno = ENOENT; return 0; } WDIR* d = wdir_alloc(); if(!d) { errno = ENOMEM; return 0; } // NB: c:\path only returns information about that directory; trailing // slashes aren't allowed. append * to retrieve directory entries. OsPath searchPath = path/"*"; // note: we could store search_path and defer FindFirstFileW until // wreaddir. this way is a bit more complex but required for // correctness (we must return a valid DIR iff is valid). d->hFind = FindFirstFileW(OsString(searchPath).c_str(), &d->fd); if(d->hFind == INVALID_HANDLE_VALUE) { // not an error - the directory is just empty. if(GetLastError() == ERROR_NO_MORE_FILES) return d; // translate Win32 error to errno. LibError err = LibError_from_win32(FALSE); LibError_set_errno(err); // release the WDIR allocated above. // unfortunately there's no way around this; we need to allocate // d before FindFirstFile because it uses d->fd. copying from a // temporary isn't nice either (this free doesn't happen often) wdir_free(d); return 0; } return d; } struct wdirent* wreaddir(WDIR* d) { // avoid polluting the last error. DWORD prev_err = GetLastError(); // first call - skip FindNextFileW (see wopendir). if(d->num_entries_scanned == 0) { // this directory is empty. if(d->hFind == INVALID_HANDLE_VALUE) return 0; goto already_have_file; } // until end of directory or a valid entry was found: for(;;) { if(!FindNextFileW(d->hFind, &d->fd)) goto fail; already_have_file: d->num_entries_scanned++; // not a hidden or system entry -> it's valid. if((d->fd.dwFileAttributes & hs) == 0) break; } // this entry has passed all checks; return information about it. // (note: d_name is a pointer; see struct dirent definition) d->ent.d_name = d->fd.cFileName; return &d->ent; fail: // FindNextFileW failed; determine why and bail. // .. legit, end of dir reached. don't pollute last error code. if(GetLastError() == ERROR_NO_MORE_FILES) SetLastError(prev_err); else WARN_ERR(LibError_from_GLE()); return 0; } int wreaddir_stat_np(WDIR* d, struct stat* s) { memset(s, 0, sizeof(*s)); s->st_size = (off_t)u64_from_u32(d->fd.nFileSizeHigh, d->fd.nFileSizeLow); s->st_mode = (unsigned short)((d->fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)? S_IFDIR : S_IFREG); s->st_mtime = filetime_to_time_t(&d->fd.ftLastWriteTime); return 0; } int wclosedir(WDIR* d) { FindClose(d->hFind); wdir_free(d); return 0; } //----------------------------------------------------------------------------- // fcntl.h //----------------------------------------------------------------------------- int wopen(const OsPath& pathname, int oflag) { debug_assert(!(oflag & O_CREAT)); return wopen(OsString(pathname).c_str(), oflag, _S_IREAD|_S_IWRITE); } int wopen(const OsPath& pathname, int oflag, mode_t mode_arg) { mode_t mode = _S_IREAD|_S_IWRITE; if(oflag & O_CREAT) mode = mode_arg; WinScopedPreserveLastError s; // _wsopen_s's CreateFileW int fd; errno_t ret = _wsopen_s(&fd, OsString(pathname).c_str(), oflag, _SH_DENYNO, mode); if(ret != 0) { errno = ret; return -1; // NOWARN } if(waio_reopen(fd, pathname, oflag) != INFO::OK) return -1; // CRT doesn't like more than 255 files open. // warn now, so that we notice why so many are open. #ifndef NDEBUG if(fd > 256) WARN_ERR(ERR::LIMIT); #endif return fd; } int wclose(int fd) { debug_assert(3 <= fd && fd < 256); (void)waio_close(fd); // no-op if fd wasn't opened for aio return _close(fd); } //----------------------------------------------------------------------------- // unistd.h //----------------------------------------------------------------------------- +// we don't want to #define read to _read, since that's a fairly common +// identifier. therefore, translate from MS CRT names via thunk functions. +// efficiency is less important, and the overhead could be optimized away. + +int read(int fd, void* buf, size_t nbytes) +{ + return _read(fd, buf, (int)nbytes); +} + +int write(int fd, void* buf, size_t nbytes) +{ + return _write(fd, buf, (int)nbytes); +} + +off_t lseek(int fd, off_t ofs, int whence) +{ + return _lseeki64(fd, ofs, whence); +} + + int wtruncate(const OsPath& pathname, off_t length) { HANDLE hFile = CreateFileW(OsString(pathname).c_str(), GENERIC_WRITE, 0, 0, OPEN_EXISTING, 0, 0); debug_assert(hFile != INVALID_HANDLE_VALUE); LARGE_INTEGER ofs; ofs.QuadPart = length; WARN_IF_FALSE(SetFilePointerEx(hFile, ofs, 0, FILE_BEGIN)); WARN_IF_FALSE(SetEndOfFile(hFile)); WARN_IF_FALSE(CloseHandle(hFile)); return 0; } int wunlink(const OsPath& pathname) { return _wunlink(OsString(pathname).c_str()); } int wrmdir(const OsPath& path) { return _wrmdir(OsString(path).c_str()); } int wrename(const OsPath& pathnameOld, const OsPath& pathnameNew) { return _wrename(OsString(pathnameOld).c_str(), OsString(pathnameNew).c_str()); } OsPath wrealpath(const OsPath& pathname) { wchar_t resolved[PATH_MAX]; if(!GetFullPathNameW(OsString(pathname).c_str(), PATH_MAX, resolved, 0)) return OsPath(); return resolved; } static int ErrnoFromCreateDirectory() { switch(GetLastError()) { case ERROR_ALREADY_EXISTS: return EEXIST; case ERROR_PATH_NOT_FOUND: return ENOENT; case ERROR_ACCESS_DENIED: return EACCES; case ERROR_WRITE_PROTECT: return EROFS; case ERROR_DIRECTORY: return ENOTDIR; default: return 0; } } int wmkdir(const OsPath& path, mode_t UNUSED(mode)) { if(!CreateDirectoryW(OsString(path).c_str(), (LPSECURITY_ATTRIBUTES)NULL)) { errno = ErrnoFromCreateDirectory(); return -1; } return 0; } int wstat(const OsPath& pathname, struct stat* buf) { return _wstat64(OsString(pathname).c_str(), buf); } Index: ps/trunk/source/lib/sysdep/os/win/wposix/wfilesystem.h =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wposix/wfilesystem.h (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wposix/wfilesystem.h (revision 9350) @@ -1,55 +1,64 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef INCLUDED_WFILESYSTEM #define INCLUDED_WFILESYSTEM // // sys/stat.h // #include // for S_IFREG etc. #if MSC_VERSION typedef unsigned int mode_t; // defined by MinGW but not VC #define stat _stat64 // we need 64-bit st_size and time_t #endif // permission masks when creating files (_wsopen_s doesn't distinguish // between owner/user/group) #define S_IRUSR _S_IREAD #define S_IRGRP _S_IREAD #define S_IROTH _S_IREAD #define S_IWUSR _S_IWRITE #define S_IWGRP _S_IWRITE #define S_IWOTH _S_IWRITE #define S_IXUSR 0 #define S_IXGRP 0 #define S_IXOTH 0 #define S_IRWXU (S_IRUSR|S_IWUSR|S_IXUSR) #define S_IRWXG (S_IRGRP|S_IWGRP|S_IXGRP) #define S_IRWXO (S_IROTH|S_IWOTH|S_IXOTH) #define S_ISDIR(m) (m & S_IFDIR) #define S_ISREG(m) (m & S_IFREG) + +// +// +// + +extern int read (int fd, void* buf, size_t nbytes); // thunk +extern int write(int fd, void* buf, size_t nbytes); // thunk +extern off_t lseek(int fd, off_t ofs, int whence); // thunk + #endif // #ifndef INCLUDED_WFILESYSTEM Index: ps/trunk/source/lib/sysdep/os/win/wiocp.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wiocp.cpp (nonexistent) +++ ps/trunk/source/lib/sysdep/os/win/wiocp.cpp (revision 9350) @@ -0,0 +1,32 @@ +#include "precompiled.h" +#include "lib/sysdep/os/win/wiocp.h" + +#include "lib/file/file.h" // ERR::IO +#include "lib/sysdep/os/win/wutil.h" + + +void AttachToCompletionPort(HANDLE hFile, HANDLE& hIOCP, ULONG_PTR key, DWORD numConcurrentThreads) +{ + WinScopedPreserveLastError s; // CreateIoCompletionPort + + // (when called for the first time, ends up creating hIOCP) + hIOCP = CreateIoCompletionPort(hFile, hIOCP, key, numConcurrentThreads); + debug_assert(wutil_IsValidHandle(hIOCP)); +} + + +LibError PollCompletionPort(HANDLE hIOCP, DWORD timeout, DWORD& bytesTransferred, ULONG_PTR& key, OVERLAPPED*& ovl) +{ + if(hIOCP == 0) + return ERR::INVALID_HANDLE; // NOWARN (happens if called before the first Attach) + + bytesTransferred = 0; + key = 0; + ovl = 0; + if(GetQueuedCompletionStatus(hIOCP, &bytesTransferred, &key, &ovl, timeout)) + return INFO::OK; + if(GetLastError() == WAIT_TIMEOUT) + return ERR::AGAIN; // NOWARN (nothing pending) + else + return ERR::FAIL; // NOWARN (let caller decide what to do) +} Index: ps/trunk/source/lib/sysdep/os/win/whrt/counter.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/whrt/counter.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/whrt/counter.cpp (revision 9350) @@ -1,117 +1,117 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * Interface for counter implementations */ #include "precompiled.h" #include "lib/sysdep/os/win/whrt/counter.h" -#include "lib/bits.h" +#include "lib/alignment.h" #include "lib/sysdep/cpu.h" // cpu_CAS #include "lib/sysdep/os/win/whrt/tsc.h" #include "lib/sysdep/os/win/whrt/hpet.h" #include "lib/sysdep/os/win/whrt/pmt.h" #include "lib/sysdep/os/win/whrt/qpc.h" #include "lib/sysdep/os/win/whrt/tgt.h" // to add a new counter type, simply include its header here and // insert a case in ConstructCounterAt's switch statement. //----------------------------------------------------------------------------- // create/destroy counters /** * @param id * @param address * @param size Maximum allowable size [bytes] of the subclass instance * @return pointer to a newly constructed ICounter subclass of type \ at * the given address, or 0 iff the ID is invalid. **/ static ICounter* ConstructCounterAt(size_t id, void* address, size_t size) { // rationale for placement new: see call site. // counters are chosen according to the following order. rationale: // - TSC must come before QPC and PMT to make sure a bug in the latter on // Pentium systems doesn't come up. // - PMT works, but is inexplicably slower than QPC on a PIII Mobile. // - TGT really isn't as safe as the others, so it should be last. // - low-overhead and high-resolution counters are preferred. switch(id) { case 0: return CreateCounterHPET(address, size); case 1: return CreateCounterTSC(address, size); case 2: return CreateCounterQPC(address, size); case 3: return CreateCounterPMT(address, size); case 4: return CreateCounterTGT(address, size); default: return 0; } } static volatile intptr_t isCounterAllocated; ICounter* CreateCounter(size_t id) { // we placement-new the Counter classes in a static buffer. // this is dangerous, but we are careful to ensure alignment. it is // unusual and thus bad, but there's also one advantage: we avoid // using global operator new before the CRT is initialized (risky). // // alternatives: // - defining as static doesn't work because the ctors (necessary for // vptr initialization) run during _cinit, which comes after our // first use of them. // - using static_calloc isn't possible because we don't know the // size until after the alloc / placement new. if(!cpu_CAS(&isCounterAllocated, 0, 1)) debug_assert(0); // static counter memory is already in use! static const size_t memSize = 200; static u8 mem[memSize]; - u8* alignedMem = (u8*)round_up((uintptr_t)mem, (uintptr_t)16u); + u8* alignedMem = (u8*)Align<16>((uintptr_t)mem); const size_t bytesLeft = mem+memSize - alignedMem; ICounter* counter = ConstructCounterAt(id, alignedMem, bytesLeft); return counter; } void DestroyCounter(ICounter*& counter) { debug_assert(counter); counter->Shutdown(); counter->~ICounter(); // must be called due to placement new counter = 0; isCounterAllocated = 0; } Index: ps/trunk/source/lib/sysdep/os/win/wfirmware.cpp =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wfirmware.cpp (revision 9349) +++ ps/trunk/source/lib/sysdep/os/win/wfirmware.cpp (revision 9350) @@ -1,48 +1,48 @@ #include "precompiled.h" #include "lib/sysdep/os/win/wfirmware.h" #include "lib/sysdep/os/win/wutil.h" namespace wfirmware { TableIds GetTableIDs(Provider provider) { WUTIL_FUNC(pEnumSystemFirmwareTables, UINT, (DWORD, PVOID, DWORD)); WUTIL_IMPORT_KERNEL32(EnumSystemFirmwareTables, pEnumSystemFirmwareTables); if(!pEnumSystemFirmwareTables) return TableIds(); const size_t tableIdsSize = pEnumSystemFirmwareTables(provider, 0, 0); debug_assert(tableIdsSize != 0); debug_assert(tableIdsSize % sizeof(TableId) == 0); TableIds tableIDs(DivideRoundUp(tableIdsSize, sizeof(TableId)), 0); - const size_t bytesWritten = pEnumSystemFirmwareTables(provider, &tableIDs[0], tableIdsSize); + const size_t bytesWritten = pEnumSystemFirmwareTables(provider, &tableIDs[0], (DWORD)tableIdsSize); debug_assert(bytesWritten == tableIdsSize); return tableIDs; } Table GetTable(Provider provider, TableId tableId) { WUTIL_FUNC(pGetSystemFirmwareTable, UINT, (DWORD, DWORD, PVOID, DWORD)); WUTIL_IMPORT_KERNEL32(GetSystemFirmwareTable, pGetSystemFirmwareTable); if(!pGetSystemFirmwareTable) return Table(); const size_t tableSize = pGetSystemFirmwareTable(provider, tableId, 0, 0); if(tableSize == 0) { debug_assert(0); return Table(); } Table table(tableSize, 0); - const size_t bytesWritten = pGetSystemFirmwareTable(provider, tableId, &table[0], tableSize); + const size_t bytesWritten = pGetSystemFirmwareTable(provider, tableId, &table[0], (DWORD)tableSize); debug_assert(bytesWritten == tableSize); return table; } } // namespace wfirmware Index: ps/trunk/source/lib/sysdep/os/win/wiocp.h =================================================================== --- ps/trunk/source/lib/sysdep/os/win/wiocp.h (nonexistent) +++ ps/trunk/source/lib/sysdep/os/win/wiocp.h (revision 9350) @@ -0,0 +1,50 @@ +/* Copyright (c) 2010 Wildfire Games + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * I/O completion port + */ + +#ifndef INCLUDED_WIOCP +#define INCLUDED_WIOCP + +#include "lib/sysdep/os/win/win.h" + +// this appears to be the best solution for IO notification. +// there are three alternatives: +// - multiple threads with blocking I/O. this is rather inefficient when +// many directories (e.g. mods) are being watched. +// - normal overlapped I/O: build a contiguous array of the hEvents +// in all OVERLAPPED structures, and WaitForMultipleObjects. +// it would be cumbersome to update this array when adding/removing watches. +// - callback notification: a notification function is called when the thread +// that initiated the I/O (ReadDirectoryChangesW) enters an alertable +// wait state. it is desirable for notifications to arrive at a single +// known point - see dir_watch_Poll. however, other APIs might also +// trigger APC delivery. + +// @param hIOCP 0 to create a new port +extern void AttachToCompletionPort(HANDLE hFile, HANDLE& hIOCP, ULONG_PTR key, DWORD numConcurrentThreads = 0); + +extern LibError PollCompletionPort(HANDLE hIOCP, DWORD timeout, DWORD& bytesTransferred, ULONG_PTR& key, OVERLAPPED*& ovl); + +#endif // #ifndef INCLUDED_WIOCP Index: ps/trunk/source/lib/sysdep/compiler.h =================================================================== --- ps/trunk/source/lib/sysdep/compiler.h (revision 9349) +++ ps/trunk/source/lib/sysdep/compiler.h (revision 9350) @@ -1,230 +1,101 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * compiler-specific macros and fixes */ #ifndef INCLUDED_COMPILER #define INCLUDED_COMPILER -#include "lib/sysdep/arch.h" // ARCH_AMD64 -#include "lib/config.h" // CONFIG_OMIT_FP - - // detect compiler and its version (0 if not present, otherwise // major*100 + minor). note that more than one *_VERSION may be // non-zero due to interoperability (e.g. ICC with MSC). // .. VC #ifdef _MSC_VER # define MSC_VERSION _MSC_VER #else # define MSC_VERSION 0 #endif // .. ICC (VC-compatible) #if defined(__INTEL_COMPILER) # define ICC_VERSION __INTEL_COMPILER #else # define ICC_VERSION 0 #endif // .. LCC (VC-compatible) #if defined(__LCC__) # define LCC_VERSION __LCC__ #else # define LCC_VERSION 0 #endif // .. GCC #ifdef __GNUC__ # define GCC_VERSION (__GNUC__*100 + __GNUC_MINOR__) #else # define GCC_VERSION 0 #endif -// pass "omit frame pointer" setting on to the compiler -#if MSC_VERSION && !ARCH_AMD64 -# if CONFIG_OMIT_FP -# pragma optimize("y", on) -# else -# pragma optimize("y", off) -# endif -#elif GCC_VERSION -// TODO -#endif - - // are PreCompiled Headers supported? #if MSC_VERSION # define HAVE_PCH 1 #elif defined(USING_PCH) # define HAVE_PCH 1 #else # define HAVE_PCH 0 #endif -// try to define _W64, if not already done -// (this is useful for catching pointer size bugs) -#ifndef _W64 -# if MSC_VERSION -# define _W64 __w64 -# elif GCC_VERSION -# define _W64 __attribute__((mode (__pointer__))) -# else -# define _W64 -# endif -#endif - - // check if compiling in pure C mode (not C++) with support for C99. // (this is more convenient than testing __STDC_VERSION__ directly) // // note: C99 provides several useful but disjunct bits of functionality. // unfortunately, most C++ compilers do not offer a complete implementation. // however, many of these features are likely to be added to C++, and/or are // already available as extensions. what we'll do is add a HAVE_ macro for // each feature and test those instead. they are set if HAVE_C99, or also if // the compiler happens to support something compatible. // // rationale: lying about __STDC_VERSION__ via Premake so as to enable support // for some C99 functions doesn't work. Mac OS X headers would then use the // restrict keyword, which is never supported by g++ (because that might // end up breaking valid C++98 programs). #define HAVE_C99 0 #ifdef __STDC_VERSION__ # if __STDC_VERSION__ >= 199901L # undef HAVE_C99 # define HAVE_C99 1 # endif #endif -// (at least rudimentary) support for C++0x +// do we have (at least rudimentary) support for C++0x? #ifndef HAVE_CPP0X # if defined(__GXX_EXPERIMENTAL_CPP0X__) || MSC_VERSION >= 1600 || ICC_VERSION >= 1200 # define HAVE_CPP0X 1 # else # define HAVE_CPP0X 0 # endif #endif - -// C99-like restrict (non-standard in C++, but widely supported in various forms). -// -// May be used on pointers. May also be used on member functions to indicate -// that 'this' is unaliased (e.g. "void C::m() RESTRICT { ... }"). -// Must not be used on references - GCC supports that but VC doesn't. -// -// We call this "RESTRICT" to avoid conflicts with VC's __declspec(restrict), -// and because it's not really the same as C99's restrict. -// -// To be safe and satisfy the compilers' stated requirements: an object accessed -// by a restricted pointer must not be accessed by any other pointer within the -// lifetime of the restricted pointer, if the object is modified. -// To maximise the chance of optimisation, any pointers that could potentially -// alias with the restricted one should be marked as restricted too. -// -// It would probably be a good idea to write test cases for any code that uses -// this in an even very slightly unclear way, in case it causes obscure problems -// in a rare compiler due to differing semantics. -// -// .. GCC -#if GCC_VERSION -# define RESTRICT __restrict__ -// .. VC8 provides __restrict -#elif MSC_VERSION >= 1400 -# define RESTRICT __restrict -// .. ICC supports the keyword 'restrict' when run with the /Qrestrict option, -// but it always also supports __restrict__ or __restrict to be compatible -// with GCC/MSVC, so we'll use the underscored version. One of {GCC,MSC}_VERSION -// should have been defined in addition to ICC_VERSION, so we should be using -// one of the above cases (unless it's an old VS7.1-emulating ICC). -#elif ICC_VERSION -# error ICC_VERSION defined without either GCC_VERSION or an adequate MSC_VERSION -// .. unsupported; remove it from code -#else -# define RESTRICT -#endif - - -// C99-style __func__ -// .. newer GCC already have it -#if GCC_VERSION >= 300 - // nothing need be done -// .. old GCC and MSVC have __FUNCTION__ -#elif GCC_VERSION >= 200 || MSC_VERSION -# define __func__ __FUNCTION__ -// .. unsupported -#else -# define __func__ "(unknown)" -#endif - - -// tell the compiler that the code at/following this macro invocation is -// unreachable. this can improve optimization and avoid warnings. -// -// this macro should not generate any fallback code; it is merely the -// compiler-specific backend for lib.h's UNREACHABLE. -// #define it to nothing if the compiler doesn't support such a hint. -#define HAVE_ASSUME_UNREACHABLE 1 -#if MSC_VERSION && !ICC_VERSION // (ICC ignores this) -# define ASSUME_UNREACHABLE __assume(0) -#elif GCC_VERSION >= 450 -# define ASSUME_UNREACHABLE __builtin_unreachable() -#else -# define ASSUME_UNREACHABLE -# undef HAVE_ASSUME_UNREACHABLE -# define HAVE_ASSUME_UNREACHABLE 0 -#endif - - -// extern "C", but does the right thing in pure-C mode -#if defined(__cplusplus) -# define EXTERN_C extern "C" -#else -# define EXTERN_C extern -#endif - -#if MSC_VERSION -# define INLINE __forceinline -#else -# define INLINE inline -#endif - -#if MSC_VERSION -# define CALL_CONV __cdecl -#else -# define CALL_CONV -#endif - -#if MSC_VERSION && !ARCH_AMD64 -# define DECORATED_NAME(name) _##name -#else -# define DECORATED_NAME(name) name -#endif - -// workaround for preprocessor limitation: macro args aren't expanded -// before being pasted. -#define STRINGIZE2(id) # id -#define STRINGIZE(id) STRINGIZE2(id) - #endif // #ifndef INCLUDED_COMPILER Index: ps/trunk/source/lib/code_annotation.h =================================================================== --- ps/trunk/source/lib/code_annotation.h (revision 9349) +++ ps/trunk/source/lib/code_annotation.h (revision 9350) @@ -1,217 +1,378 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * macros for code annotation. */ #ifndef INCLUDED_CODE_ANNOTATION #define INCLUDED_CODE_ANNOTATION #include "lib/sysdep/compiler.h" +#include "lib/sysdep/arch.h" // ARCH_AMD64 /** * mark a function local variable or parameter as unused and avoid * the corresponding compiler warning. * use inside the function body, e.g. void f(int x) { UNUSED2(x); } **/ #if ICC_VERSION // NB: #pragma unused is documented but "unrecognized" when used; // casting to void isn't sufficient, but the following is: # define UNUSED2(param) param = param #else # define UNUSED2(param) (void)param #endif /** * mark a function parameter as unused and avoid * the corresponding compiler warning. * wrap around the parameter name, e.g. void f(int UNUSED(x)) **/ #define UNUSED(param) /** * "unreachable code" helpers * * unreachable lines of code are often the source or symptom of subtle bugs. * they are flagged by compiler warnings; however, the opposite problem - * erroneously reaching certain spots (e.g. due to missing return statement) * is worse and not detected automatically. * * to defend against this, the programmer can annotate their code to * indicate to humans that a particular spot should never be reached. * however, that isn't much help; better is a sentinel that raises an * error if if it is actually reached. hence, the UNREACHABLE macro. * * ironically, if the code guarded by UNREACHABLE works as it should, * compilers may flag the macro's code as unreachable. this would * distract from genuine warnings, which is unacceptable. * * even worse, compilers differ in their code checking: GCC only complains if * non-void functions end without returning a value (i.e. missing return * statement), while VC checks if lines are unreachable (e.g. if they are * preceded by a return on all paths). * * the implementation below enables optimization and automated checking * without raising warnings. **/ #define UNREACHABLE // actually defined below.. this is for # undef UNREACHABLE // CppDoc's benefit only. +// this macro should not generate any fallback code; it is merely the +// compiler-specific backend for UNREACHABLE. +// #define it to nothing if the compiler doesn't support such a hint. +#define HAVE_ASSUME_UNREACHABLE 1 +#if MSC_VERSION && !ICC_VERSION // (ICC ignores this) +# define ASSUME_UNREACHABLE __assume(0) +#elif GCC_VERSION >= 450 +# define ASSUME_UNREACHABLE __builtin_unreachable() +#else +# define ASSUME_UNREACHABLE +# undef HAVE_ASSUME_UNREACHABLE +# define HAVE_ASSUME_UNREACHABLE 0 +#endif + // compiler supports ASSUME_UNREACHABLE => allow it to assume the code is // never reached (improves optimization at the cost of undefined behavior // if the annotation turns out to be incorrect). #if HAVE_ASSUME_UNREACHABLE && !CONFIG_PARANOIA # define UNREACHABLE ASSUME_UNREACHABLE // otherwise (or if CONFIG_PARANOIA is set), add a user-visible // warning if the code is reached. note that abort() fails to stop // ICC from warning about the lack of a return statement, so we // use an infinite loop instead. #else # define UNREACHABLE\ STMT(\ debug_assert(0); /* hit supposedly unreachable code */\ for(;;){};\ ) #endif /** convenient specialization of UNREACHABLE for switch statements whose default can never be reached. example usage: int x; switch(x % 2) { case 0: break; case 1: break; NODEFAULT; } **/ #define NODEFAULT default: UNREACHABLE // generate a symbol containing the line number of the macro invocation. // used to give a unique name (per file) to types made by cassert. // we can't prepend __FILE__ to make it globally unique - the filename // may be enclosed in quotes. PASTE3_HIDDEN__ is needed to make sure // __LINE__ is expanded correctly. #define PASTE3_HIDDEN__(a, b, c) a ## b ## c #define PASTE3__(a, b, c) PASTE3_HIDDEN__(a, b, c) #define UID__ PASTE3__(LINE_, __LINE__, _) #define UID2__ PASTE3__(LINE_, __LINE__, _2) /** * Compile-time debug_assert. Causes a compile error if the expression * evaluates to zero/false. * * No runtime overhead; may be used anywhere, including file scope. * Especially useful for testing sizeof types. * * @param expr Expression that is expected to evaluate to non-zero at compile-time. **/ #define cassert(expr) typedef static_assert_<(expr)>::type UID__ template struct static_assert_; template<> struct static_assert_ { typedef int type; }; /** * @copydoc cassert(expr) * * This version must be used if expr uses a dependent type (e.g. depends on * a template parameter). **/ #define cassert_dependent(expr) typedef typename static_assert_<(expr)>::type UID__ /** * @copydoc cassert(expr) * * This version has a less helpful error message, but redefinition doesn't * trigger warnings. **/ #define cassert2(expr) extern u8 CASSERT_FAILURE[1][(expr)] // indicate a class is noncopyable (usually due to const or reference members). // example: // class C { // NONCOPYABLE(C); // public: // etc. // }; // this is preferable to inheritance from boost::noncopyable because it // avoids ICC 11 W4 warnings about non-virtual dtors and suppression of // the copy assignment operator. #define NONCOPYABLE(className)\ private:\ className(const className&);\ const className& operator=(const className&) #if ICC_VERSION # define ASSUME_ALIGNED(ptr, multiple) __assume_aligned(ptr, multiple) #else # define ASSUME_ALIGNED(ptr, multiple) #endif // annotate printf-style functions for compile-time type checking. // fmtpos is the index of the format argument, counting from 1 or // (if it's a non-static class function) 2; the '...' is assumed // to come directly after it. #if GCC_VERSION # define PRINTF_ARGS(fmtpos) __attribute__ ((format (printf, fmtpos, fmtpos+1))) # define VPRINTF_ARGS(fmtpos) __attribute__ ((format (printf, fmtpos, 0))) # if CONFIG_DEHYDRA # define WPRINTF_ARGS(fmtpos) __attribute__ ((user("format, w, printf, " #fmtpos ", +1"))) # else # define WPRINTF_ARGS(fmtpos) /* not currently supported in GCC */ # endif # define VWPRINTF_ARGS(fmtpos) /* not currently supported in GCC */ #else # define PRINTF_ARGS(fmtpos) # define VPRINTF_ARGS(fmtpos) # define WPRINTF_ARGS(fmtpos) # define VWPRINTF_ARGS(fmtpos) // TODO: support _Printf_format_string_ for VC9+ #endif // annotate vararg functions that expect to end with an explicit NULL #if GCC_VERSION # define SENTINEL_ARG __attribute__ ((sentinel)) #else # define SENTINEL_ARG #endif /** * prevent the compiler from reordering loads or stores across this point. **/ #if ICC_VERSION # define COMPILER_FENCE __memory_barrier() #elif MSC_VERSION # include # pragma intrinsic(_ReadWriteBarrier) # define COMPILER_FENCE _ReadWriteBarrier() #elif GCC_VERSION # define COMPILER_FENCE asm volatile("" : : : "memory") #else # define COMPILER_FENCE #endif + +// try to define _W64, if not already done +// (this is useful for catching pointer size bugs) +#ifndef _W64 +# if MSC_VERSION +# define _W64 __w64 +# elif GCC_VERSION +# define _W64 __attribute__((mode (__pointer__))) +# else +# define _W64 +# endif +#endif + + +// C99-like restrict (non-standard in C++, but widely supported in various forms). +// +// May be used on pointers. May also be used on member functions to indicate +// that 'this' is unaliased (e.g. "void C::m() RESTRICT { ... }"). +// Must not be used on references - GCC supports that but VC doesn't. +// +// We call this "RESTRICT" to avoid conflicts with VC's __declspec(restrict), +// and because it's not really the same as C99's restrict. +// +// To be safe and satisfy the compilers' stated requirements: an object accessed +// by a restricted pointer must not be accessed by any other pointer within the +// lifetime of the restricted pointer, if the object is modified. +// To maximise the chance of optimisation, any pointers that could potentially +// alias with the restricted one should be marked as restricted too. +// +// It would probably be a good idea to write test cases for any code that uses +// this in an even very slightly unclear way, in case it causes obscure problems +// in a rare compiler due to differing semantics. +// +// .. GCC +#if GCC_VERSION +# define RESTRICT __restrict__ +// .. VC8 provides __restrict +#elif MSC_VERSION >= 1400 +# define RESTRICT __restrict +// .. ICC supports the keyword 'restrict' when run with the /Qrestrict option, +// but it always also supports __restrict__ or __restrict to be compatible +// with GCC/MSVC, so we'll use the underscored version. One of {GCC,MSC}_VERSION +// should have been defined in addition to ICC_VERSION, so we should be using +// one of the above cases (unless it's an old VS7.1-emulating ICC). +#elif ICC_VERSION +# error ICC_VERSION defined without either GCC_VERSION or an adequate MSC_VERSION +// .. unsupported; remove it from code +#else +# define RESTRICT +#endif + + +// C99-style __func__ +// .. newer GCC already have it +#if GCC_VERSION >= 300 +// nothing need be done +// .. old GCC and MSVC have __FUNCTION__ +#elif GCC_VERSION >= 200 || MSC_VERSION +# define __func__ __FUNCTION__ +// .. unsupported +#else +# define __func__ "(unknown)" +#endif + + +// extern "C", but does the right thing in pure-C mode +#if defined(__cplusplus) +# define EXTERN_C extern "C" +#else +# define EXTERN_C extern +#endif + + +#if MSC_VERSION +# define INLINE __forceinline +#else +# define INLINE inline +#endif + + +#if MSC_VERSION +# define CALL_CONV __cdecl +#else +# define CALL_CONV +#endif + + +#if MSC_VERSION && !ARCH_AMD64 +# define DECORATED_NAME(name) _##name +#else +# define DECORATED_NAME(name) name +#endif + + +// workaround for preprocessor limitation: macro args aren't expanded +// before being pasted. +#define STRINGIZE2(id) # id +#define STRINGIZE(id) STRINGIZE2(id) + + +//----------------------------------------------------------------------------- +// partial emulation of C++0x rvalue references (required for UniqueRange) + +#if HAVE_CPP0X + +#define RVREF(T) T&& // the type of an rvalue reference +#define LVALUE(rvalue) rvalue // (a named rvalue reference is an lvalue) +#define RVALUE(lvalue) std::move(lvalue) +#define RVALUE_FROM_R(rvalue) RVALUE(rvalue) // (see above) + +#else + +// RVALUE wraps an lvalue reference in this class for later use by a +// "move ctor" that takes an RValue. +template +class RValue +{ +public: + explicit RValue(T& lvalue): lvalue(lvalue) {} + T& LValue() const { return lvalue; } + +private: + T& lvalue; +}; + +// from rvalue or const lvalue +template +static inline RValue ToRValue(const T& t) +{ + return RValue((T&)t); +} + +// from lvalue +template +static inline RValue ToRValue(T& t) +{ + return RValue(t); +} + +#define RVREF(T) const RValue& // the type of an rvalue reference +#define LVALUE(rvalue) rvalue.LValue() +#define RVALUE(lvalue) ToRValue(lvalue) +#define RVALUE_FROM_R(rvalue) rvalue + +#endif // #if !HAVE_CPP0X + #endif // #ifndef INCLUDED_CODE_ANNOTATION Index: ps/trunk/source/lib/tex/tex.cpp =================================================================== --- ps/trunk/source/lib/tex/tex.cpp (revision 9349) +++ ps/trunk/source/lib/tex/tex.cpp (revision 9350) @@ -1,778 +1,781 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * support routines for 2d texture access/writing. */ #include "precompiled.h" #include "tex.h" #include #include #include #include "lib/timer.h" #include "lib/bits.h" +#include "lib/allocators/shared_ptr.h" #include "lib/sysdep/cpu.h" #include "tex_codec.h" ERROR_ASSOCIATE(ERR::TEX_FMT_INVALID, L"Invalid/unsupported texture format", -1); ERROR_ASSOCIATE(ERR::TEX_INVALID_COLOR_TYPE, L"Invalid color type", -1); ERROR_ASSOCIATE(ERR::TEX_NOT_8BIT_PRECISION, L"Not 8-bit channel precision", -1); ERROR_ASSOCIATE(ERR::TEX_INVALID_LAYOUT, L"Unsupported texel layout, e.g. right-to-left", -1); ERROR_ASSOCIATE(ERR::TEX_COMPRESSED, L"Unsupported texture compression", -1); ERROR_ASSOCIATE(WARN::TEX_INVALID_DATA, L"Warning: invalid texel data encountered", -1); ERROR_ASSOCIATE(ERR::TEX_INVALID_SIZE, L"Texture size is incorrect", -1); ERROR_ASSOCIATE(INFO::TEX_CODEC_CANNOT_HANDLE, L"Texture codec cannot handle the given format", -1); //----------------------------------------------------------------------------- // validation //----------------------------------------------------------------------------- // be careful not to use other tex_* APIs here because they call us. LibError tex_validate(const Tex* t) { if(t->flags & TEX_UNDEFINED_FLAGS) WARN_RETURN(ERR::_1); // pixel data (only check validity if the image is still in memory; // ogl_tex frees the data after uploading to GL) if(t->data) { // file size smaller than header+pixels. // possible causes: texture file header is invalid, // or file wasn't loaded completely. if(t->dataSize < t->ofs + t->w*t->h*t->bpp/8) WARN_RETURN(ERR::_2); } // bits per pixel // (we don't bother checking all values; a sanity check is enough) if(t->bpp % 4 || t->bpp > 32) WARN_RETURN(ERR::_3); // flags // .. DXT value const size_t dxt = t->flags & TEX_DXT; if(dxt != 0 && dxt != 1 && dxt != DXT1A && dxt != 3 && dxt != 5) WARN_RETURN(ERR::_4); // .. orientation const size_t orientation = t->flags & TEX_ORIENTATION; if(orientation == (TEX_BOTTOM_UP|TEX_TOP_DOWN)) WARN_RETURN(ERR::_5); return INFO::OK; } #define CHECK_TEX(t) RETURN_ERR(tex_validate(t)) // check if the given texture format is acceptable: 8bpp grey, // 24bpp color or 32bpp color+alpha (BGR / upside down are permitted). // basically, this is the "plain" format understood by all codecs and // tex_codec_plain_transform. LibError tex_validate_plain_format(size_t bpp, size_t flags) { const bool alpha = (flags & TEX_ALPHA ) != 0; const bool grey = (flags & TEX_GREY ) != 0; const bool dxt = (flags & TEX_DXT ) != 0; const bool mipmaps = (flags & TEX_MIPMAPS) != 0; if(dxt || mipmaps) WARN_RETURN(ERR::TEX_FMT_INVALID); // grey must be 8bpp without alpha, or it's invalid. if(grey) { if(bpp == 8 && !alpha) return INFO::OK; WARN_RETURN(ERR::TEX_FMT_INVALID); } if(bpp == 24 && !alpha) return INFO::OK; if(bpp == 32 && alpha) return INFO::OK; WARN_RETURN(ERR::TEX_FMT_INVALID); } //----------------------------------------------------------------------------- // mipmaps //----------------------------------------------------------------------------- void tex_util_foreach_mipmap(size_t w, size_t h, size_t bpp, const u8* pixels, int levels_to_skip, size_t data_padding, MipmapCB cb, void* RESTRICT cbData) { debug_assert(levels_to_skip >= 0 || levels_to_skip == TEX_BASE_LEVEL_ONLY); size_t level_w = w, level_h = h; const u8* level_data = pixels; // we iterate through the loop (necessary to skip over image data), // but do not actually call back until the requisite number of // levels have been skipped (i.e. level == 0). int level = (levels_to_skip == TEX_BASE_LEVEL_ONLY)? 0 : -levels_to_skip; // until at level 1x1: for(;;) { // used to skip past this mip level in const size_t level_data_size = (size_t)(round_up(level_w, data_padding) * round_up(level_h, data_padding) * bpp/8); if(level >= 0) cb((size_t)level, level_w, level_h, level_data, level_data_size, cbData); level_data += level_data_size; // 1x1 reached - done if(level_w == 1 && level_h == 1) break; level_w /= 2; level_h /= 2; // if the texture is non-square, one of the dimensions will become // 0 before the other. to satisfy OpenGL's expectations, change it // back to 1. if(level_w == 0) level_w = 1; if(level_h == 0) level_h = 1; level++; // special case: no mipmaps, we were only supposed to call for // the base level if(levels_to_skip == TEX_BASE_LEVEL_ONLY) break; } } struct CreateLevelData { size_t num_components; size_t prev_level_w; size_t prev_level_h; const u8* prev_level_data; size_t prev_level_data_size; }; // uses 2x2 box filter static void create_level(size_t level, size_t level_w, size_t level_h, const u8* RESTRICT level_data, size_t level_data_size, void* RESTRICT cbData) { CreateLevelData* cld = (CreateLevelData*)cbData; const size_t src_w = cld->prev_level_w; const size_t src_h = cld->prev_level_h; const u8* src = cld->prev_level_data; u8* dst = (u8*)level_data; // base level - must be copied over from source buffer if(level == 0) { debug_assert(level_data_size == cld->prev_level_data_size); memcpy(dst, src, level_data_size); } else { const size_t num_components = cld->num_components; const size_t dx = num_components, dy = dx*src_w; // special case: image is too small for 2x2 filter if(cld->prev_level_w == 1 || cld->prev_level_h == 1) { // image is either a horizontal or vertical line. // their memory layout is the same (packed pixels), so no special // handling is needed; just pick max dimension. for(size_t y = 0; y < std::max(src_w, src_h); y += 2) { for(size_t i = 0; i < num_components; i++) { *dst++ = (src[0]+src[dx]+1)/2; src += 1; } src += dx; // skip to next pixel (since box is 2x2) } } // normal else { for(size_t y = 0; y < src_h; y += 2) { for(size_t x = 0; x < src_w; x += 2) { for(size_t i = 0; i < num_components; i++) { *dst++ = (src[0]+src[dx]+src[dy]+src[dx+dy]+2)/4; src += 1; } src += dx; // skip to next pixel (since box is 2x2) } src += dy; // skip to next row (since box is 2x2) } } debug_assert(dst == level_data + level_data_size); debug_assert(src == cld->prev_level_data + cld->prev_level_data_size); } cld->prev_level_data = level_data; cld->prev_level_data_size = level_data_size; cld->prev_level_w = level_w; cld->prev_level_h = level_h; } static LibError add_mipmaps(Tex* t, size_t w, size_t h, size_t bpp, void* newData, size_t data_size) { // this code assumes the image is of POT dimension; we don't // go to the trouble of implementing image scaling because // the only place this is used (ogl_tex_upload) requires POT anyway. if(!is_pow2(w) || !is_pow2(h)) WARN_RETURN(ERR::TEX_INVALID_SIZE); t->flags |= TEX_MIPMAPS; // must come before tex_img_size! const size_t mipmap_size = tex_img_size(t); - shared_ptr mipmapData = io_Allocate(mipmap_size, 0); + shared_ptr mipmapData; + AllocateAligned(mipmapData, mipmap_size); CreateLevelData cld = { bpp/8, w, h, (const u8*)newData, data_size }; tex_util_foreach_mipmap(w, h, bpp, mipmapData.get(), 0, 1, create_level, &cld); t->data = mipmapData; t->dataSize = mipmap_size; t->ofs = 0; return INFO::OK; } //----------------------------------------------------------------------------- // pixel format conversion (transformation) //----------------------------------------------------------------------------- TIMER_ADD_CLIENT(tc_plain_transform); // handles BGR and row flipping in "plain" format (see below). // // called by codecs after they get their format-specific transforms out of // the way. note that this approach requires several passes over the image, // but is much easier to maintain than providing all<->all conversion paths. // // somewhat optimized (loops are hoisted, cache associativity accounted for) static LibError plain_transform(Tex* t, size_t transforms) { TIMER_ACCRUE(tc_plain_transform); // (this is also called directly instead of through ogl_tex, so // we need to validate) CHECK_TEX(t); // extract texture info const size_t w = t->w, h = t->h, bpp = t->bpp; const size_t flags = t->flags; u8* const data = tex_get_data(t); // sanity checks (not errors, we just can't handle these cases) // .. unknown transform if(transforms & ~(TEX_BGR|TEX_ORIENTATION|TEX_MIPMAPS|TEX_ALPHA)) return INFO::TEX_CODEC_CANNOT_HANDLE; // .. data is not in "plain" format RETURN_ERR(tex_validate_plain_format(bpp, flags)); // .. nothing to do if(!transforms) return INFO::OK; const size_t data_size = tex_img_size(t); // size of source size_t new_data_size = data_size; // size of destination if(transforms & TEX_ALPHA) { // add alpha channel if(bpp == 24) { new_data_size = (data_size / 3) * 4; t->bpp = 32; } // remove alpha channel else if(bpp == 32) { return INFO::TEX_CODEC_CANNOT_HANDLE; } // can't have alpha with greyscale else { return INFO::TEX_CODEC_CANNOT_HANDLE; } } // allocate copy of the image data. // rationale: L1 cache is typically A2 => swapping in-place with a // line buffer leads to thrashing. we'll assume the whole texture*2 // fits in cache, allocate a copy, and transfer directly from there. // // this is necessary even when not flipping because the initial data // is read-only. - shared_ptr newData = io_Allocate(new_data_size); + shared_ptr newData; + AllocateAligned(newData, new_data_size); // setup row source/destination pointers (simplifies outer loop) u8* dst = (u8*)newData.get(); const u8* src; const size_t pitch = w * bpp/8; // source bpp (not necessarily dest bpp) // .. avoid y*pitch multiply in row loop; instead, add row_ofs. ssize_t row_ofs = (ssize_t)pitch; // flipping rows (0,1,2 -> 2,1,0) if(transforms & TEX_ORIENTATION) { src = (const u8*)data+data_size-pitch; // last row row_ofs = -(ssize_t)pitch; } // adding/removing alpha channel (can't convert in-place) else if(transforms & TEX_ALPHA) { src = (const u8*)data; } // do other transforms in-place else { src = (const u8*)newData.get(); memcpy(newData.get(), data, data_size); } // no conversion necessary if(!(transforms & (TEX_BGR | TEX_ALPHA))) { if(src != dst) // avoid overlapping memcpy if not flipping rows { for(size_t y = 0; y < h; y++) { memcpy(dst, src, pitch); dst += pitch; src += row_ofs; } } } // RGB -> BGRA, BGR -> RGBA else if(bpp == 24 && (transforms & TEX_ALPHA) && (transforms & TEX_BGR)) { for(size_t y = 0; y < h; y++) { for(size_t x = 0; x < w; x++) { // need temporaries in case src == dst (i.e. not flipping) const u8 b = src[0], g = src[1], r = src[2]; dst[0] = r; dst[1] = g; dst[2] = b; dst[3] = 0xFF; dst += 4; src += 3; } src += row_ofs - pitch; // flip? previous row : stay } } // RGB -> RGBA, BGR -> BGRA else if(bpp == 24 && (transforms & TEX_ALPHA) && !(transforms & TEX_BGR)) { for(size_t y = 0; y < h; y++) { for(size_t x = 0; x < w; x++) { // need temporaries in case src == dst (i.e. not flipping) const u8 r = src[0], g = src[1], b = src[2]; dst[0] = r; dst[1] = g; dst[2] = b; dst[3] = 0xFF; dst += 4; src += 3; } src += row_ofs - pitch; // flip? previous row : stay } } // RGB <-> BGR else if(bpp == 24 && !(transforms & TEX_ALPHA)) { for(size_t y = 0; y < h; y++) { for(size_t x = 0; x < w; x++) { // need temporaries in case src == dst (i.e. not flipping) const u8 b = src[0], g = src[1], r = src[2]; dst[0] = r; dst[1] = g; dst[2] = b; dst += 3; src += 3; } src += row_ofs - pitch; // flip? previous row : stay } } // RGBA <-> BGRA else if(bpp == 32 && !(transforms & TEX_ALPHA)) { for(size_t y = 0; y < h; y++) { for(size_t x = 0; x < w; x++) { // need temporaries in case src == dst (i.e. not flipping) const u8 b = src[0], g = src[1], r = src[2], a = src[3]; dst[0] = r; dst[1] = g; dst[2] = b; dst[3] = a; dst += 4; src += 4; } src += row_ofs - pitch; // flip? previous row : stay } } else { debug_warn(L"unsupported transform"); return INFO::TEX_CODEC_CANNOT_HANDLE; } t->data = newData; t->dataSize = new_data_size; t->ofs = 0; if(!(t->flags & TEX_MIPMAPS) && transforms & TEX_MIPMAPS) RETURN_ERR(add_mipmaps(t, w, h, bpp, newData.get(), new_data_size)); CHECK_TEX(t); return INFO::OK; } TIMER_ADD_CLIENT(tc_transform); // change 's pixel format by flipping the state of all TEX_* flags // that are set in transforms. LibError tex_transform(Tex* t, size_t transforms) { TIMER_ACCRUE(tc_transform); CHECK_TEX(t); const size_t target_flags = t->flags ^ transforms; size_t remaining_transforms; for(;;) { remaining_transforms = target_flags ^ t->flags; // we're finished (all required transforms have been done) if(remaining_transforms == 0) return INFO::OK; LibError ret = tex_codec_transform(t, remaining_transforms); if(ret != INFO::OK) break; } // last chance RETURN_ERR(plain_transform(t, remaining_transforms)); return INFO::OK; } // change 's pixel format to the new format specified by . // (note: this is equivalent to tex_transform(t, t->flags^new_flags). LibError tex_transform_to(Tex* t, size_t new_flags) { // tex_transform takes care of validating const size_t transforms = t->flags ^ new_flags; return tex_transform(t, transforms); } //----------------------------------------------------------------------------- // image orientation //----------------------------------------------------------------------------- // see "Default Orientation" in docs. static int global_orientation = TEX_TOP_DOWN; // set the orientation (either TEX_BOTTOM_UP or TEX_TOP_DOWN) to which // all loaded images will automatically be converted // (excepting file formats that don't specify their orientation, i.e. DDS). void tex_set_global_orientation(int o) { debug_assert(o == TEX_TOP_DOWN || o == TEX_BOTTOM_UP); global_orientation = o; } static void flip_to_global_orientation(Tex* t) { // (can't use normal CHECK_TEX due to void return) WARN_ERR(tex_validate(t)); size_t orientation = t->flags & TEX_ORIENTATION; // if codec knows which way around the image is (i.e. not DDS): if(orientation) { // flip image if necessary size_t transforms = orientation ^ global_orientation; WARN_ERR(plain_transform(t, transforms)); } // indicate image is at global orientation. this is still done even // if the codec doesn't know: the default orientation should be chosen // to make that work correctly (see "Default Orientation" in docs). t->flags = (t->flags & ~TEX_ORIENTATION) | global_orientation; // (can't use normal CHECK_TEX due to void return) WARN_ERR(tex_validate(t)); } // indicate if the orientation specified by matches // dst_orientation (if the latter is 0, then the global_orientation). // (we ask for src_flags instead of src_orientation so callers don't // have to mask off TEX_ORIENTATION) bool tex_orientations_match(size_t src_flags, size_t dst_orientation) { const size_t src_orientation = src_flags & TEX_ORIENTATION; if(dst_orientation == 0) dst_orientation = global_orientation; return (src_orientation == dst_orientation); } //----------------------------------------------------------------------------- // misc. API //----------------------------------------------------------------------------- // indicate if 's extension is that of a texture format // supported by tex_load. case-insensitive. // // rationale: tex_load complains if the given file is of an // unsupported type. this API allows users to preempt that warning // (by checking the filename themselves), and also provides for e.g. // enumerating only images in a file picker. // an alternative might be a flag to suppress warning about invalid files, // but this is open to misuse. bool tex_is_known_extension(const VfsPath& pathname) { const TexCodecVTbl* dummy; // found codec for it => known extension const OsPath extension = pathname.Extension(); if(tex_codec_for_filename(extension, &dummy) == INFO::OK) return true; return false; } // store the given image data into a Tex object; this will be as if // it had been loaded via tex_load. // // rationale: support for in-memory images is necessary for // emulation of glCompressedTexImage2D and useful overall. // however, we don't want to provide an alternate interface for each API; // these would have to be changed whenever fields are added to Tex. // instead, provide one entry point for specifying images. // // we need only add bookkeeping information and "wrap" it in // our Tex struct, hence the name. LibError tex_wrap(size_t w, size_t h, size_t bpp, size_t flags, const shared_ptr& data, size_t ofs, Tex* t) { t->w = w; t->h = h; t->bpp = bpp; t->flags = flags; t->data = data; t->dataSize = ofs + w*h*bpp/8; t->ofs = ofs; CHECK_TEX(t); return INFO::OK; } // free all resources associated with the image and make further // use of it impossible. void tex_free(Tex* t) { // do not validate - this is called from tex_load if loading // failed, so not all fields may be valid. t->data.reset(); // do not zero out the fields! that could lead to trouble since // ogl_tex_upload followed by ogl_tex_free is legit, but would // cause OglTex_validate to fail (since its Tex.w is == 0). } //----------------------------------------------------------------------------- // getters //----------------------------------------------------------------------------- // returns a pointer to the image data (pixels), taking into account any // header(s) that may come before it. u8* tex_get_data(const Tex* t) { // (can't use normal CHECK_TEX due to u8* return value) WARN_ERR(tex_validate(t)); u8* p = t->data.get(); if(!p) return 0; return p + t->ofs; } // returns colour of 1x1 mipmap level u32 tex_get_average_colour(const Tex* t) { // require mipmaps if(!(t->flags & TEX_MIPMAPS)) return 0; // find the total size of image data size_t size = tex_img_size(t); // compute the size of the last (1x1) mipmap level const size_t data_padding = (t->flags & TEX_DXT)? 4 : 1; size_t last_level_size = (size_t)(data_padding * data_padding * t->bpp/8); // construct a new texture based on the current one, // but set its data pointer offset to the last mipmap level's data Tex basetex = *t; basetex.w = 1; basetex.h = 1; basetex.ofs += size - last_level_size; // convert to BGRA WARN_ERR(tex_transform_to(&basetex, TEX_BGR | TEX_ALPHA)); // extract components into u32 debug_assert(basetex.dataSize >= basetex.ofs+4); u8 b = basetex.data.get()[basetex.ofs]; u8 g = basetex.data.get()[basetex.ofs+1]; u8 r = basetex.data.get()[basetex.ofs+2]; u8 a = basetex.data.get()[basetex.ofs+3]; return b + (g << 8) + (r << 16) + (a << 24); } static void add_level_size(size_t UNUSED(level), size_t UNUSED(level_w), size_t UNUSED(level_h), const u8* RESTRICT UNUSED(level_data), size_t level_data_size, void* RESTRICT cbData) { size_t* ptotal_size = (size_t*)cbData; *ptotal_size += level_data_size; } // return total byte size of the image pixels. (including mipmaps!) // this is preferable to calculating manually because it's // less error-prone (e.g. confusing bits_per_pixel with bytes). size_t tex_img_size(const Tex* t) { // (can't use normal CHECK_TEX due to size_t return value) WARN_ERR(tex_validate(t)); const int levels_to_skip = (t->flags & TEX_MIPMAPS)? 0 : TEX_BASE_LEVEL_ONLY; const size_t data_padding = (t->flags & TEX_DXT)? 4 : 1; size_t out_size = 0; tex_util_foreach_mipmap(t->w, t->h, t->bpp, 0, levels_to_skip, data_padding, add_level_size, &out_size); return out_size; } // return the minimum header size (i.e. offset to pixel data) of the // file format indicated by 's extension (that is all it need contain: // e.g. ".bmp"). returns 0 on error (i.e. no codec found). // this can be used to optimize calls to tex_write: when allocating the // buffer that will hold the image, allocate this much extra and // pass the pointer as base+hdr_size. this allows writing the header // directly into the output buffer and makes for zero-copy IO. size_t tex_hdr_size(const VfsPath& filename) { const TexCodecVTbl* c; const OsPath extension = filename.Extension(); CHECK_ERR(tex_codec_for_filename(extension, &c)); return c->hdr_size(0); } //----------------------------------------------------------------------------- // read/write from memory and disk //----------------------------------------------------------------------------- LibError tex_decode(const shared_ptr& data, size_t data_size, Tex* t) { const TexCodecVTbl* c; RETURN_ERR(tex_codec_for_header(data.get(), data_size, &c)); // make sure the entire header is available const size_t min_hdr_size = c->hdr_size(0); if(data_size < min_hdr_size) WARN_RETURN(ERR::TEX_INCOMPLETE_HEADER); const size_t hdr_size = c->hdr_size(data.get()); if(data_size < hdr_size) WARN_RETURN(ERR::TEX_INCOMPLETE_HEADER); t->data = data; t->dataSize = data_size; t->ofs = hdr_size; // for orthogonality, encode and decode both receive the memory as a // DynArray. package data into one and free it again after decoding: DynArray da; RETURN_ERR(da_wrap_fixed(&da, data.get(), data_size)); RETURN_ERR(c->decode(&da, t)); // note: not reached if decode fails. that's not a problem; // this call just zeroes and could be left out. (void)da_free(&da); // sanity checks if(!t->w || !t->h || t->bpp > 32) WARN_RETURN(ERR::TEX_FMT_INVALID); if(t->dataSize < t->ofs + tex_img_size(t)) WARN_RETURN(ERR::TEX_INVALID_SIZE); flip_to_global_orientation(t); CHECK_TEX(t); return INFO::OK; } LibError tex_encode(Tex* t, const OsPath& extension, DynArray* da) { CHECK_TEX(t); CHECK_ERR(tex_validate_plain_format(t->bpp, t->flags)); // we could be clever here and avoid the extra alloc if our current // memory block ensued from the same kind of texture file. this is // most likely the case if in_img == tex_get_data() + c->hdr_size(0). // this would make for zero-copy IO. const size_t max_out_size = tex_img_size(t)*4 + 256*KiB; RETURN_ERR(da_alloc(da, max_out_size)); const TexCodecVTbl* c; CHECK_ERR(tex_codec_for_filename(extension, &c)); // encode into LibError err = c->encode(t, da); if(err < 0) { (void)da_free(da); WARN_RETURN(err); } return INFO::OK; } Index: ps/trunk/source/lib/tex/tex_png.cpp =================================================================== --- ps/trunk/source/lib/tex/tex_png.cpp (revision 9349) +++ ps/trunk/source/lib/tex/tex_png.cpp (revision 9350) @@ -1,260 +1,262 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * PNG codec using libpng. */ #include "precompiled.h" #include "lib/external_libraries/png.h" #include "lib/byte_order.h" #include "tex_codec.h" +#include "lib/allocators/shared_ptr.h" #include "lib/timer.h" #if MSC_VERSION // squelch "dtor / setjmp interaction" warnings. // all attempts to resolve the underlying problem failed; apparently // the warning is generated if setjmp is used at all in C++ mode. // (png_*_impl have no code that would trigger ctors/dtors, nor are any // called in their prolog/epilog code). # pragma warning(disable: 4611) #endif // MSC_VERSION //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- // pass data from PNG file in memory to libpng static void io_read(png_struct* png_ptr, u8* data, png_size_t length) { DynArray* da = (DynArray*)png_get_io_ptr(png_ptr); if(da_read(da, data, length) != 0) png_error(png_ptr, "io_read failed"); } // write libpng output to PNG file static void io_write(png_struct* png_ptr, u8* data, png_size_t length) { DynArray* da = (DynArray*)png_get_io_ptr(png_ptr); if(da_append(da, data, length) != 0) png_error(png_ptr, "io_write failed"); } static void io_flush(png_structp UNUSED(png_ptr)) { } //----------------------------------------------------------------------------- static LibError png_transform(Tex* UNUSED(t), size_t UNUSED(transforms)) { return INFO::TEX_CODEC_CANNOT_HANDLE; } // note: it's not worth combining png_encode and png_decode, due to // libpng read/write interface differences (grr). // split out of png_decode to simplify resource cleanup and avoid // "dtor / setjmp interaction" warning. static LibError png_decode_impl(DynArray* da, png_structp png_ptr, png_infop info_ptr, Tex* t) { png_set_read_fn(png_ptr, da, io_read); // read header and determine format png_read_info(png_ptr, info_ptr); png_uint_32 w, h; int bit_depth, colour_type; png_get_IHDR(png_ptr, info_ptr, &w, &h, &bit_depth, &colour_type, 0, 0, 0); const size_t pitch = png_get_rowbytes(png_ptr, info_ptr); const u32 bpp = (u32)(pitch/w * 8); size_t flags = 0; if(bpp == 32) flags |= TEX_ALPHA; if(colour_type == PNG_COLOR_TYPE_GRAY) flags |= TEX_GREY; // make sure format is acceptable if(bit_depth != 8) WARN_RETURN(ERR::TEX_NOT_8BIT_PRECISION); if(colour_type & PNG_COLOR_MASK_PALETTE) WARN_RETURN(ERR::TEX_INVALID_COLOR_TYPE); const size_t img_size = pitch * h; - shared_ptr data = io_Allocate(img_size); + shared_ptr data; + AllocateAligned(data, img_size, pageSize); shared_ptr rows = tex_codec_alloc_rows(data.get(), h, pitch, TEX_TOP_DOWN, 0); png_read_image(png_ptr, (png_bytepp)rows.get()); png_read_end(png_ptr, info_ptr); // success; make sure all data was consumed. debug_assert(da->pos == da->cur_size); // store image info t->data = data; t->dataSize = img_size; t->ofs = 0; t->w = w; t->h = h; t->bpp = bpp; t->flags = flags; return INFO::OK; } // split out of png_encode to simplify resource cleanup and avoid // "dtor / setjmp interaction" warning. static LibError png_encode_impl(Tex* t, png_structp png_ptr, png_infop info_ptr, DynArray* da) { const png_uint_32 w = (png_uint_32)t->w, h = (png_uint_32)t->h; const size_t pitch = w * t->bpp / 8; int colour_type; switch(t->flags & (TEX_GREY|TEX_ALPHA)) { case TEX_GREY|TEX_ALPHA: colour_type = PNG_COLOR_TYPE_GRAY_ALPHA; break; case TEX_GREY: colour_type = PNG_COLOR_TYPE_GRAY; break; case TEX_ALPHA: colour_type = PNG_COLOR_TYPE_RGB_ALPHA; break; default: colour_type = PNG_COLOR_TYPE_RGB; break; } png_set_write_fn(png_ptr, da, io_write, io_flush); png_set_IHDR(png_ptr, info_ptr, w, h, 8, colour_type, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); u8* data = tex_get_data(t); shared_ptr rows = tex_codec_alloc_rows(data, h, pitch, t->flags, TEX_TOP_DOWN); // PNG is native RGB. const int png_transforms = (t->flags & TEX_BGR)? PNG_TRANSFORM_BGR : PNG_TRANSFORM_IDENTITY; png_set_rows(png_ptr, info_ptr, (png_bytepp)rows.get()); png_write_png(png_ptr, info_ptr, png_transforms, 0); return INFO::OK; } static bool png_is_hdr(const u8* file) { // don't use png_sig_cmp, so we don't pull in libpng for // this check alone (it might not actually be used). return *(u32*)file == FOURCC('\x89','P','N','G'); } static bool png_is_ext(const OsPath& extension) { return extension == L".png"; } static size_t png_hdr_size(const u8* UNUSED(file)) { return 0; // libpng returns decoded image data; no header } TIMER_ADD_CLIENT(tc_png_decode); // limitation: palette images aren't supported static LibError png_decode(DynArray* RESTRICT da, Tex* RESTRICT t) { TIMER_ACCRUE(tc_png_decode); LibError ret = ERR::FAIL; png_infop info_ptr = 0; // allocate PNG structures; use default stderr and longjmp error handlers png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, 0, 0, 0); if(!png_ptr) WARN_RETURN(ERR::FAIL); info_ptr = png_create_info_struct(png_ptr); if(!info_ptr) goto fail; // setup error handling if(setjmp(png_jmpbuf(png_ptr))) { // libpng longjmps here after an error goto fail; } ret = png_decode_impl(da, png_ptr, info_ptr, t); fail: png_destroy_read_struct(&png_ptr, &info_ptr, 0); return ret; } // limitation: palette images aren't supported static LibError png_encode(Tex* RESTRICT t, DynArray* RESTRICT da) { LibError ret = ERR::FAIL; png_infop info_ptr = 0; // allocate PNG structures; use default stderr and longjmp error handlers png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, 0, 0, 0); if(!png_ptr) WARN_RETURN(ERR::FAIL); info_ptr = png_create_info_struct(png_ptr); if(!info_ptr) goto fail; // setup error handling if(setjmp(png_jmpbuf(png_ptr))) { // libpng longjmps here after an error goto fail; } ret = png_encode_impl(t, png_ptr, info_ptr, da); // shared cleanup fail: png_destroy_write_struct(&png_ptr, &info_ptr); return ret; } TEX_CODEC_REGISTER(png); Index: ps/trunk/source/lib/allocators/allocators.cpp =================================================================== --- ps/trunk/source/lib/allocators/allocators.cpp (revision 9349) +++ ps/trunk/source/lib/allocators/allocators.cpp (revision 9350) @@ -1,164 +1,163 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * memory suballocators. */ #include "precompiled.h" #include "lib/allocators/allocators.h" +#include "lib/alignment.h" #include "lib/sysdep/cpu.h" // cpu_CAS -#include "lib/bits.h" - #include "lib/allocators/mem_util.h" //----------------------------------------------------------------------------- // page aligned allocator //----------------------------------------------------------------------------- void* page_aligned_alloc(size_t unaligned_size) { const size_t size_pa = mem_RoundUpToPage(unaligned_size); u8* p = 0; RETURN0_IF_ERR(mem_Reserve(size_pa, &p)); RETURN0_IF_ERR(mem_Commit(p, size_pa, PROT_READ|PROT_WRITE)); return p; } void page_aligned_free(void* p, size_t unaligned_size) { if(!p) return; debug_assert(mem_IsPageMultiple((uintptr_t)p)); const size_t size_pa = mem_RoundUpToPage(unaligned_size); (void)mem_Release((u8*)p, size_pa); } //----------------------------------------------------------------------------- // matrix allocator //----------------------------------------------------------------------------- void** matrix_alloc(size_t cols, size_t rows, size_t el_size) { const size_t initial_align = 64; // note: no provision for padding rows. this is a bit more work and // if el_size isn't a power-of-2, performance is going to suck anyway. // otherwise, the initial alignment will take care of it. const size_t ptr_array_size = cols*sizeof(void*); const size_t row_size = cols*el_size; const size_t data_size = rows*row_size; const size_t total_size = ptr_array_size + initial_align + data_size; void* p = malloc(total_size); if(!p) return 0; uintptr_t data_addr = (uintptr_t)p + ptr_array_size + initial_align; data_addr -= data_addr % initial_align; // alignment check didn't set address to before allocation debug_assert(data_addr >= (uintptr_t)p+ptr_array_size); void** ptr_array = (void**)p; for(size_t i = 0; i < cols; i++) { ptr_array[i] = (void*)data_addr; data_addr += row_size; } // didn't overrun total allocation debug_assert(data_addr <= (uintptr_t)p+total_size); return ptr_array; } void matrix_free(void** matrix) { free(matrix); } //----------------------------------------------------------------------------- // allocator optimized for single instances //----------------------------------------------------------------------------- void* single_calloc(void* storage, volatile intptr_t* in_use_flag, size_t size) { // sanity check debug_assert(*in_use_flag == 0 || *in_use_flag == 1); void* p; // successfully reserved the single instance if(cpu_CAS(in_use_flag, 0, 1)) p = storage; // already in use (rare) - allocate from heap else p = new u8[size]; memset(p, 0, size); return p; } void single_free(void* storage, volatile intptr_t* in_use_flag, void* p) { // sanity check debug_assert(*in_use_flag == 0 || *in_use_flag == 1); if(p == storage) { if(cpu_CAS(in_use_flag, 1, 0)) { // ok, flag has been reset to 0 } else debug_assert(0); // in_use_flag out of sync (double free?) } // was allocated from heap else { // single instance may have been freed by now - cannot assume // anything about in_use_flag. delete[] (u8*)p; } } //----------------------------------------------------------------------------- // static allocator //----------------------------------------------------------------------------- void* static_calloc(StaticStorage* ss, size_t size) { - void* p = (void*)round_up((uintptr_t)ss->pos, (uintptr_t)16u); + void* p = (void*)Align<16>((uintptr_t)ss->pos); ss->pos = (u8*)p+size; debug_assert(ss->pos <= ss->end); return p; } Index: ps/trunk/source/lib/allocators/mem_util.cpp =================================================================== --- ps/trunk/source/lib/allocators/mem_util.cpp (revision 9349) +++ ps/trunk/source/lib/allocators/mem_util.cpp (revision 9350) @@ -1,120 +1,120 @@ /* Copyright (c) 2010 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * memory allocator helper routines. */ #include "precompiled.h" #include "lib/allocators/mem_util.h" #include "lib/bits.h" // round_up +#include "lib/alignment.h" #include "lib/posix/posix_mman.h" #include "lib/sysdep/os_cpu.h" // os_cpu_PageSize bool mem_IsPageMultiple(uintptr_t x) { return (x & (os_cpu_PageSize()-1)) == 0; } size_t mem_RoundUpToPage(size_t size) { return round_up(size, os_cpu_PageSize()); } size_t mem_RoundUpToAlignment(size_t size) { // all allocators should align to at least this many bytes: - const size_t alignment = 8; - return round_up(size, alignment); + return Align<8>(size); } //----------------------------------------------------------------------------- static inline LibError LibError_from_mmap(void* ret, bool warn_if_failed = true) { if(ret != MAP_FAILED) return INFO::OK; return LibError_from_errno(warn_if_failed); } // "anonymous" effectively means mapping /dev/zero, but is more efficient. // MAP_ANONYMOUS is not in SUSv3, but is a very common extension. // unfortunately, MacOS X only defines MAP_ANON, which Solaris says is // deprecated. workaround there: define MAP_ANONYMOUS in terms of MAP_ANON. #ifndef MAP_ANONYMOUS # define MAP_ANONYMOUS MAP_ANON #endif static const int mmap_flags = MAP_PRIVATE|MAP_ANONYMOUS; LibError mem_Reserve(size_t size, u8** pp) { errno = 0; void* ret = mmap(0, size, PROT_NONE, mmap_flags|MAP_NORESERVE, -1, 0); *pp = (u8*)ret; return LibError_from_mmap(ret); } LibError mem_Release(u8* p, size_t size) { errno = 0; int ret = munmap(p, size); return LibError_from_posix(ret); } LibError mem_Commit(u8* p, size_t size, int prot) { // avoid misinterpretation by mmap. if(prot == PROT_NONE) WARN_RETURN(ERR::INVALID_PARAM); errno = 0; void* ret = mmap(p, size, prot, mmap_flags|MAP_FIXED, -1, 0); return LibError_from_mmap(ret); } LibError mem_Decommit(u8* p, size_t size) { errno = 0; void* ret = mmap(p, size, PROT_NONE, mmap_flags|MAP_NORESERVE|MAP_FIXED, -1, 0); return LibError_from_mmap(ret); } LibError mem_Protect(u8* p, size_t size, int prot) { errno = 0; int ret = mprotect(p, size, prot); return LibError_from_posix(ret); } void* mem_freelist_Sentinel() { // sentinel storing its own address static void* storageForPrevPtr; void* const storageAddress = &storageForPrevPtr; memcpy(&storageForPrevPtr, &storageAddress, sizeof(storageForPrevPtr)); return storageAddress; } Index: ps/trunk/source/simulation2/components/CCmpAIManager.cpp =================================================================== --- ps/trunk/source/simulation2/components/CCmpAIManager.cpp (revision 9349) +++ ps/trunk/source/simulation2/components/CCmpAIManager.cpp (revision 9350) @@ -1,655 +1,657 @@ /* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" #include "simulation2/system/Component.h" #include "ICmpAIManager.h" #include "simulation2/MessageTypes.h" #include "graphics/Terrain.h" #include "lib/timer.h" #include "lib/tex/tex.h" +#include "lib/allocators/shared_ptr.h" #include "ps/CLogger.h" #include "ps/Filesystem.h" #include "ps/Util.h" #include "simulation2/components/ICmpAIInterface.h" #include "simulation2/components/ICmpCommandQueue.h" #include "simulation2/components/ICmpObstructionManager.h" #include "simulation2/components/ICmpTemplateManager.h" #include "simulation2/helpers/Grid.h" #include "simulation2/serialization/DebugSerializer.h" #include "simulation2/serialization/StdDeserializer.h" #include "simulation2/serialization/StdSerializer.h" #include "simulation2/serialization/SerializeTemplates.h" /** * @file Player AI interface. * * AI is primarily scripted, and the CCmpAIManager component defined here * takes care of managing all the scripts. * * To avoid slow AI scripts causing jerky rendering, they are run in a background * thread (maintained by CAIWorker) so that it's okay if they take a whole simulation * turn before returning their results (though preferably they shouldn't use nearly * that much CPU). * * CCmpAIManager grabs the world state after each turn (making use of AIInterface.js * and AIProxy.js to decide what data to include) then passes it to CAIWorker. * The AI scripts will then run asynchronously and return a list of commands to execute. * Any attempts to read the command list (including indirectly via serialization) * will block until it's actually completed, so the rest of the engine should avoid * reading it for as long as possible. * * JS values are passed between the game and AI threads using ScriptInterface::StructuredClone. * * TODO: actually the thread isn't implemented yet, because performance hasn't been * sufficiently problematic to justify the complexity yet, but the CAIWorker interface * is designed to hopefully support threading when we want it. */ class CAIWorker { private: class CAIPlayer { NONCOPYABLE(CAIPlayer); public: CAIPlayer(CAIWorker& worker, const std::wstring& aiName, player_id_t player, const shared_ptr& runtime, boost::rand48& rng) : m_Worker(worker), m_AIName(aiName), m_Player(player), m_ScriptInterface("Engine", "AI", runtime) { m_ScriptInterface.SetCallbackData(static_cast (this)); m_ScriptInterface.ReplaceNondeterministicFunctions(rng); m_ScriptInterface.RegisterFunction("IncludeModule"); m_ScriptInterface.RegisterFunction("PostCommand"); m_ScriptInterface.RegisterFunction, u32, u32, u32, CAIPlayer::DumpImage>("DumpImage"); } ~CAIPlayer() { // Clean up rooted objects before destroying their script context m_Obj = CScriptValRooted(); } static void IncludeModule(void* cbdata, std::wstring name) { CAIPlayer* self = static_cast (cbdata); self->LoadScripts(name); } static void PostCommand(void* cbdata, CScriptValRooted cmd) { CAIPlayer* self = static_cast (cbdata); self->m_Commands.push_back(self->m_ScriptInterface.WriteStructuredClone(cmd.get())); } /** * Debug function for AI scripts to dump 2D array data (e.g. terrain tile weights). */ static void DumpImage(void* UNUSED(cbdata), std::wstring name, std::vector data, u32 w, u32 h, u32 max) { // TODO: this is totally not threadsafe. VfsPath filename = L"screenshots/aidump/" + name; if (data.size() != w*h) { debug_warn(L"DumpImage: data size doesn't match w*h"); return; } if (max == 0) { debug_warn(L"DumpImage: max must not be 0"); return; } const size_t bpp = 8; int flags = TEX_BOTTOM_UP|TEX_GREY; const size_t img_size = w * h * bpp/8; const size_t hdr_size = tex_hdr_size(filename); - shared_ptr buf = io_Allocate(hdr_size+img_size); + shared_ptr buf; + AllocateAligned(buf, hdr_size+img_size, maxSectorSize); Tex t; if (tex_wrap(w, h, bpp, flags, buf, hdr_size, &t) < 0) return; u8* img = buf.get() + hdr_size; for (size_t i = 0; i < data.size(); ++i) img[i] = (data[i] * 255) / max; tex_write(&t, filename); tex_free(&t); } bool LoadScripts(const std::wstring& moduleName) { // Ignore modules that are already loaded if (m_LoadedModules.find(moduleName) != m_LoadedModules.end()) return true; // Mark this as loaded, to prevent it recursively loading itself m_LoadedModules.insert(moduleName); // Load and execute *.js VfsPaths pathnames; fs_util::GetPathnames(g_VFS, L"simulation/ai/" + moduleName + L"/", L"*.js", pathnames); for (VfsPaths::iterator it = pathnames.begin(); it != pathnames.end(); ++it) { if (!m_ScriptInterface.LoadGlobalScriptFile(*it)) { LOGERROR(L"Failed to load script %ls", it->string().c_str()); return false; } } return true; } bool Initialise(bool callConstructor) { if (!LoadScripts(m_AIName)) return false; OsPath path = L"simulation/ai/" + m_AIName + L"/data.json"; CScriptValRooted metadata = m_Worker.LoadMetadata(path); if (metadata.uninitialised()) { LOGERROR(L"Failed to create AI player: can't find %ls", path.string().c_str()); return false; } // Get the constructor name from the metadata std::string constructor; if (!m_ScriptInterface.GetProperty(metadata.get(), "constructor", constructor)) { LOGERROR(L"Failed to create AI player: %ls: missing 'constructor'", path.string().c_str()); return false; } // Get the constructor function from the loaded scripts CScriptVal ctor; if (!m_ScriptInterface.GetProperty(m_ScriptInterface.GetGlobalObject(), constructor.c_str(), ctor) || ctor.undefined()) { LOGERROR(L"Failed to create AI player: %ls: can't find constructor '%hs'", path.string().c_str(), constructor.c_str()); return false; } CScriptVal obj; if (callConstructor) { // Set up the data to pass as the constructor argument CScriptVal settings; m_ScriptInterface.Eval(L"({})", settings); m_ScriptInterface.SetProperty(settings.get(), "player", m_Player, false); m_ScriptInterface.SetProperty(settings.get(), "templates", m_Worker.m_EntityTemplates, false); obj = m_ScriptInterface.CallConstructor(ctor.get(), settings.get()); } else { // For deserialization, we want to create the object with the correct prototype // but don't want to actually run the constructor again obj = m_ScriptInterface.NewObjectFromConstructor(ctor.get()); } if (obj.undefined()) { LOGERROR(L"Failed to create AI player: %ls: error calling constructor '%hs'", path.string().c_str(), constructor.c_str()); return false; } m_Obj = CScriptValRooted(m_ScriptInterface.GetContext(), obj); return true; } void Run(CScriptVal state) { m_Commands.clear(); m_ScriptInterface.CallFunctionVoid(m_Obj.get(), "HandleMessage", state); } CAIWorker& m_Worker; std::wstring m_AIName; player_id_t m_Player; ScriptInterface m_ScriptInterface; CScriptValRooted m_Obj; std::vector > m_Commands; std::set m_LoadedModules; }; public: struct SCommandSets { player_id_t player; std::vector > commands; }; CAIWorker() : m_ScriptRuntime(ScriptInterface::CreateRuntime()), m_ScriptInterface("Engine", "AI", m_ScriptRuntime), m_TurnNum(0), m_CommandsComputed(true) { m_ScriptInterface.SetCallbackData(static_cast (this)); // TODO: ought to seed the RNG (in a network-synchronised way) before we use it m_ScriptInterface.ReplaceNondeterministicFunctions(m_RNG); } ~CAIWorker() { // Clear rooted script values before destructing the script interface m_EntityTemplates = CScriptValRooted(); m_PlayerMetadata.clear(); m_Players.clear(); } bool AddPlayer(const std::wstring& aiName, player_id_t player, bool callConstructor) { shared_ptr ai(new CAIPlayer(*this, aiName, player, m_ScriptRuntime, m_RNG)); if (!ai->Initialise(callConstructor)) return false; m_Players.push_back(ai); return true; } void StartComputation(const shared_ptr& gameState, const Grid& map) { debug_assert(m_CommandsComputed); m_GameState = gameState; if (map.m_DirtyID != m_GameStateMap.m_DirtyID) { m_GameStateMap = map; JSContext* cx = m_ScriptInterface.GetContext(); m_GameStateMapVal = CScriptValRooted(cx, ScriptInterface::ToJSVal(cx, m_GameStateMap)); } m_CommandsComputed = false; } void WaitToFinishComputation() { if (!m_CommandsComputed) { PerformComputation(); m_CommandsComputed = true; } } void GetCommands(std::vector& commands) { WaitToFinishComputation(); commands.clear(); commands.resize(m_Players.size()); for (size_t i = 0; i < m_Players.size(); ++i) { commands[i].player = m_Players[i]->m_Player; commands[i].commands = m_Players[i]->m_Commands; } } void LoadEntityTemplates(const std::vector >& templates) { m_ScriptInterface.Eval("({})", m_EntityTemplates); for (size_t i = 0; i < templates.size(); ++i) { jsval val = templates[i].second->ToJSVal(m_ScriptInterface.GetContext(), false); m_ScriptInterface.SetProperty(m_EntityTemplates.get(), templates[i].first.c_str(), CScriptVal(val), true); } // Since the template data is shared between AI players, freeze it // to stop any of them changing it and confusing the other players m_ScriptInterface.FreezeObject(m_EntityTemplates.get(), true); } void Serialize(std::ostream& stream, bool isDebug) { WaitToFinishComputation(); if (isDebug) { CDebugSerializer serializer(m_ScriptInterface, stream); serializer.Indent(4); SerializeState(serializer); } else { CStdSerializer serializer(m_ScriptInterface, stream); SerializeState(serializer); } } void SerializeState(ISerializer& serializer) { serializer.NumberU32_Unbounded("num ais", m_Players.size()); for (size_t i = 0; i < m_Players.size(); ++i) { serializer.String("name", m_Players[i]->m_AIName, 0, 256); serializer.NumberI32_Unbounded("player", m_Players[i]->m_Player); serializer.ScriptVal("data", m_Players[i]->m_Obj); serializer.NumberU32_Unbounded("num commands", m_Players[i]->m_Commands.size()); for (size_t j = 0; j < m_Players[i]->m_Commands.size(); ++j) { CScriptVal val = m_ScriptInterface.ReadStructuredClone(m_Players[i]->m_Commands[j]); serializer.ScriptVal("command", val); } } } void Deserialize(std::istream& stream) { debug_assert(m_CommandsComputed); // deserializing while we're still actively computing would be bad CStdDeserializer deserializer(m_ScriptInterface, stream); m_PlayerMetadata.clear(); m_Players.clear(); uint32_t numAis; deserializer.NumberU32_Unbounded("num ais", numAis); for (size_t i = 0; i < numAis; ++i) { std::wstring name; player_id_t player; deserializer.String("name", name, 0, 256); deserializer.NumberI32_Unbounded("player", player); if (!AddPlayer(name, player, false)) throw PSERROR_Deserialize_ScriptError(); // Use ScriptObjectAppend so we don't lose the carefully-constructed // prototype/parent of this object deserializer.ScriptObjectAppend("data", m_Players.back()->m_Obj.getRef()); uint32_t numCommands; deserializer.NumberU32_Unbounded("num commands", numCommands); m_Players.back()->m_Commands.reserve(numCommands); for (size_t j = 0; j < numCommands; ++j) { CScriptVal val; deserializer.ScriptVal("command", val); m_Players.back()->m_Commands.push_back(m_ScriptInterface.WriteStructuredClone(val.get())); } } } private: CScriptValRooted LoadMetadata(const VfsPath& path) { if (m_PlayerMetadata.find(path) == m_PlayerMetadata.end()) { // Load and cache the AI player metadata m_PlayerMetadata[path] = m_ScriptInterface.ReadJSONFile(path); } return m_PlayerMetadata[path]; } void PerformComputation() { // Deserialize the game state, to pass to the AI's HandleMessage CScriptVal state; { PROFILE("AI compute read state"); state = m_ScriptInterface.ReadStructuredClone(m_GameState); m_ScriptInterface.SetProperty(state.get(), "map", m_GameStateMapVal, true); } // It would be nice to do // m_ScriptInterface.FreezeObject(state.get(), true); // to prevent AI scripts accidentally modifying the state and // affecting other AI scripts they share it with. But the performance // cost is far too high, so we won't do that. { PROFILE("AI compute scripts"); for (size_t i = 0; i < m_Players.size(); ++i) m_Players[i]->Run(state); } // Run the GC every so often. // (This isn't particularly necessary, but it makes profiling clearer // since it avoids random GC delays while running other scripts) if (m_TurnNum++ % 25 == 0) { PROFILE("AI compute GC"); m_ScriptInterface.MaybeGC(); } } shared_ptr m_ScriptRuntime; ScriptInterface m_ScriptInterface; boost::rand48 m_RNG; size_t m_TurnNum; CScriptValRooted m_EntityTemplates; std::map m_PlayerMetadata; std::vector > m_Players; // use shared_ptr just to avoid copying shared_ptr m_GameState; Grid m_GameStateMap; CScriptValRooted m_GameStateMapVal; bool m_CommandsComputed; }; class CCmpAIManager : public ICmpAIManager { public: static void ClassInit(CComponentManager& componentManager) { componentManager.SubscribeToMessageType(MT_ProgressiveLoad); } DEFAULT_COMPONENT_ALLOCATOR(AIManager) static std::string GetSchema() { return ""; } virtual void Init(const CParamNode& UNUSED(paramNode)) { StartLoadEntityTemplates(); } virtual void Deinit() { } virtual void Serialize(ISerializer& serialize) { // Because the AI worker uses its own ScriptInterface, we can't use the // ISerializer (which was initialised with the simulation ScriptInterface) // directly. So we'll just grab the ISerializer's stream and write to it // with an independent serializer. // TODO: make the serialization/deserialization actually work, and not really slowly // m_Worker.Serialize(serialize.GetStream(), serialize.IsDebug()); UNUSED2(serialize); } virtual void Deserialize(const CParamNode& paramNode, IDeserializer& deserialize) { Init(paramNode); // m_Worker.Deserialize(deserialize.GetStream()); UNUSED2(deserialize); } virtual void HandleMessage(const CMessage& msg, bool UNUSED(global)) { switch (msg.GetType()) { case MT_ProgressiveLoad: { const CMessageProgressiveLoad& msgData = static_cast (msg); *msgData.total += m_TemplateNames.size(); if (*msgData.progressed) break; if (ContinueLoadEntityTemplates()) *msgData.progressed = true; *msgData.progress += m_TemplateLoadedIdx; break; } } } virtual void AddPlayer(std::wstring id, player_id_t player) { m_Worker.AddPlayer(id, player, true); } virtual void StartComputation() { PROFILE("AI setup"); ForceLoadEntityTemplates(); ScriptInterface& scriptInterface = GetSimContext().GetScriptInterface(); CmpPtr cmpAIInterface(GetSimContext(), SYSTEM_ENTITY); debug_assert(!cmpAIInterface.null()); // Get the game state from AIInterface CScriptVal state = cmpAIInterface->GetRepresentation(); // Get the map data Grid dummyGrid; const Grid* map = &dummyGrid; CmpPtr cmpPathfinder(GetSimContext(), SYSTEM_ENTITY); if (!cmpPathfinder.null()) map = &cmpPathfinder->GetPassabilityGrid(); LoadPathfinderClasses(state); m_Worker.StartComputation(scriptInterface.WriteStructuredClone(state.get()), *map); } virtual void PushCommands() { ScriptInterface& scriptInterface = GetSimContext().GetScriptInterface(); std::vector commands; m_Worker.GetCommands(commands); CmpPtr cmpCommandQueue(GetSimContext(), SYSTEM_ENTITY); if (cmpCommandQueue.null()) return; for (size_t i = 0; i < commands.size(); ++i) { for (size_t j = 0; j < commands[i].commands.size(); ++j) { cmpCommandQueue->PushLocalCommand(commands[i].player, scriptInterface.ReadStructuredClone(commands[i].commands[j])); } } } private: std::vector m_TemplateNames; size_t m_TemplateLoadedIdx; std::vector > m_Templates; void StartLoadEntityTemplates() { CmpPtr cmpTemplateManager(GetSimContext(), SYSTEM_ENTITY); debug_assert(!cmpTemplateManager.null()); m_TemplateNames = cmpTemplateManager->FindAllTemplates(false); m_TemplateLoadedIdx = 0; m_Templates.reserve(m_TemplateNames.size()); } // Tries to load the next entity template. Returns true if we did some work. bool ContinueLoadEntityTemplates() { if (m_TemplateLoadedIdx >= m_TemplateNames.size()) return false; CmpPtr cmpTemplateManager(GetSimContext(), SYSTEM_ENTITY); debug_assert(!cmpTemplateManager.null()); const CParamNode* node = cmpTemplateManager->GetTemplateWithoutValidation(m_TemplateNames[m_TemplateLoadedIdx]); if (node) m_Templates.push_back(std::make_pair(m_TemplateNames[m_TemplateLoadedIdx], node)); m_TemplateLoadedIdx++; // If this was the last template, send the data to the worker if (m_TemplateLoadedIdx == m_TemplateNames.size()) m_Worker.LoadEntityTemplates(m_Templates); return true; } void ForceLoadEntityTemplates() { while (ContinueLoadEntityTemplates()) { } } void LoadPathfinderClasses(CScriptVal state) { CmpPtr cmpPathfinder(GetSimContext(), SYSTEM_ENTITY); if (cmpPathfinder.null()) return; ScriptInterface& scriptInterface = GetSimContext().GetScriptInterface(); CScriptVal classesVal; scriptInterface.Eval("({ pathfinderObstruction: 1, foundationObstruction: 2 })", classesVal); std::map classes = cmpPathfinder->GetPassabilityClasses(); for (std::map::iterator it = classes.begin(); it != classes.end(); ++it) scriptInterface.SetProperty(classesVal.get(), it->first.c_str(), it->second, true); scriptInterface.SetProperty(state.get(), "passabilityClasses", classesVal, true); } CAIWorker m_Worker; }; REGISTER_COMPONENT_TYPE(AIManager) Index: ps/trunk/source/renderer/Renderer.cpp =================================================================== --- ps/trunk/source/renderer/Renderer.cpp (revision 9349) +++ ps/trunk/source/renderer/Renderer.cpp (revision 9350) @@ -1,1983 +1,1985 @@ /* Copyright (C) 2011 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ /* * higher level interface on top of OpenGL to render basic objects: * terrain, models, sprites, particles etc. */ #include "precompiled.h" #include #include #include #include #include "Renderer.h" #include "lib/bits.h" // is_pow2 #include "lib/res/graphics/ogl_tex.h" +#include "lib/allocators/shared_ptr.h" #include "maths/Matrix3D.h" #include "maths/MathUtil.h" #include "ps/CLogger.h" #include "ps/Game.h" #include "ps/Profile.h" #include "ps/Filesystem.h" #include "ps/World.h" #include "ps/Loader.h" #include "ps/ProfileViewer.h" #include "graphics/Camera.h" #include "graphics/GameView.h" #include "graphics/LightEnv.h" #include "graphics/Model.h" #include "graphics/ModelDef.h" #include "graphics/ParticleManager.h" #include "graphics/ShaderManager.h" #include "graphics/ShaderTechnique.h" #include "graphics/Terrain.h" #include "graphics/Texture.h" #include "graphics/TextureManager.h" #include "renderer/FixedFunctionModelRenderer.h" #include "renderer/HWLightingModelRenderer.h" #include "renderer/InstancingModelRenderer.h" #include "renderer/ModelRenderer.h" #include "renderer/OverlayRenderer.h" #include "renderer/ParticleRenderer.h" #include "renderer/PlayerRenderer.h" #include "renderer/RenderModifiers.h" #include "renderer/ShadowMap.h" #include "renderer/SkyManager.h" #include "renderer/TerrainOverlay.h" #include "renderer/TerrainRenderer.h" #include "renderer/TransparencyRenderer.h" #include "renderer/VertexBufferManager.h" #include "renderer/WaterManager.h" /////////////////////////////////////////////////////////////////////////////////// // CRendererStatsTable - Profile display of rendering stats /** * Class CRendererStatsTable: Implementation of AbstractProfileTable to * display the renderer stats in-game. * * Accesses CRenderer::m_Stats by keeping the reference passed to the * constructor. */ class CRendererStatsTable : public AbstractProfileTable { NONCOPYABLE(CRendererStatsTable); public: CRendererStatsTable(const CRenderer::Stats& st); // Implementation of AbstractProfileTable interface CStr GetName(); CStr GetTitle(); size_t GetNumberRows(); const std::vector& GetColumns(); CStr GetCellText(size_t row, size_t col); AbstractProfileTable* GetChild(size_t row); private: /// Reference to the renderer singleton's stats const CRenderer::Stats& Stats; /// Column descriptions std::vector columnDescriptions; enum { Row_DrawCalls = 0, Row_TerrainTris, Row_ModelTris, Row_BlendSplats, Row_Particles, Row_VBReserved, Row_VBAllocated, // Must be last to count number of rows NumberRows }; }; // Construction CRendererStatsTable::CRendererStatsTable(const CRenderer::Stats& st) : Stats(st) { columnDescriptions.push_back(ProfileColumn("Name", 230)); columnDescriptions.push_back(ProfileColumn("Value", 100)); } // Implementation of AbstractProfileTable interface CStr CRendererStatsTable::GetName() { return "renderer"; } CStr CRendererStatsTable::GetTitle() { return "Renderer statistics"; } size_t CRendererStatsTable::GetNumberRows() { return NumberRows; } const std::vector& CRendererStatsTable::GetColumns() { return columnDescriptions; } CStr CRendererStatsTable::GetCellText(size_t row, size_t col) { char buf[256]; switch(row) { case Row_DrawCalls: if (col == 0) return "# draw calls"; sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_DrawCalls); return buf; case Row_TerrainTris: if (col == 0) return "# terrain tris"; sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_TerrainTris); return buf; case Row_ModelTris: if (col == 0) return "# model tris"; sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_ModelTris); return buf; case Row_BlendSplats: if (col == 0) return "# blend splats"; sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_BlendSplats); return buf; case Row_Particles: if (col == 0) return "# particles"; sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_Particles); return buf; case Row_VBReserved: if (col == 0) return "VB bytes reserved"; sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)g_VBMan.GetBytesReserved()); return buf; case Row_VBAllocated: if (col == 0) return "VB bytes allocated"; sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)g_VBMan.GetBytesAllocated()); return buf; default: return "???"; } } AbstractProfileTable* CRendererStatsTable::GetChild(size_t UNUSED(row)) { return 0; } /////////////////////////////////////////////////////////////////////////////////// // CRenderer implementation /** * Struct CRendererInternals: Truly hide data that is supposed to be hidden * in this structure so it won't even appear in header files. */ struct CRendererInternals { NONCOPYABLE(CRendererInternals); public: /// true if CRenderer::Open has been called bool IsOpen; /// true if shaders need to be reloaded bool ShadersDirty; /// Table to display renderer stats in-game via profile system CRendererStatsTable profileTable; /// Shader manager CShaderManager shaderManager; /// Water manager WaterManager waterManager; /// Sky manager SkyManager skyManager; /// Texture manager CTextureManager textureManager; /// Terrain renderer TerrainRenderer* terrainRenderer; /// Overlay renderer OverlayRenderer overlayRenderer; /// Particle manager CParticleManager particleManager; /// Particle renderer ParticleRenderer particleRenderer; /// Shadow map ShadowMap* shadow; /// Various model renderers struct Models { // The following model renderers are aliases for the appropriate real_* // model renderers (depending on hardware availability and current settings) // and must be used for actual model submission and rendering ModelRenderer* Normal; ModelRenderer* NormalInstancing; ModelRenderer* Player; ModelRenderer* PlayerInstancing; ModelRenderer* Transp; // "Palette" of available ModelRenderers. Do not use these directly for // rendering and submission; use the aliases above instead. ModelRenderer* pal_NormalFF; ModelRenderer* pal_PlayerFF; ModelRenderer* pal_TranspFF; ModelRenderer* pal_TranspSortAll; ModelRenderer* pal_NormalShader; ModelRenderer* pal_NormalInstancingShader; ModelRenderer* pal_PlayerShader; ModelRenderer* pal_PlayerInstancingShader; ModelRenderer* pal_TranspShader; ModelVertexRendererPtr VertexFF; ModelVertexRendererPtr VertexPolygonSort; ModelVertexRendererPtr VertexRendererShader; ModelVertexRendererPtr VertexInstancingShader; // generic RenderModifiers that are supposed to be used directly RenderModifierPtr ModWireframe; RenderModifierPtr ModSolidColor; RenderModifierPtr ModSolidPlayerColor; RenderModifierPtr ModTransparentDepthShadow; // RenderModifiers that are selected from the palette below RenderModifierPtr ModNormal; RenderModifierPtr ModNormalInstancing; RenderModifierPtr ModPlayer; RenderModifierPtr ModPlayerInstancing; RenderModifierPtr ModSolid; RenderModifierPtr ModSolidInstancing; RenderModifierPtr ModSolidPlayer; RenderModifierPtr ModSolidPlayerInstancing; RenderModifierPtr ModTransparent; // Palette of available RenderModifiers RenderModifierPtr ModPlainUnlit; RenderModifierPtr ModPlayerUnlit; RenderModifierPtr ModTransparentUnlit; RenderModifierPtr ModShaderSolidColor; RenderModifierPtr ModShaderSolidColorInstancing; RenderModifierPtr ModShaderSolidPlayerColor; RenderModifierPtr ModShaderSolidPlayerColorInstancing; RenderModifierPtr ModShaderSolidTex; LitRenderModifierPtr ModShaderNormal; LitRenderModifierPtr ModShaderNormalInstancing; LitRenderModifierPtr ModShaderPlayer; LitRenderModifierPtr ModShaderPlayerInstancing; LitRenderModifierPtr ModShaderTransparent; RenderModifierPtr ModShaderTransparentShadow; } Model; CRendererInternals() : IsOpen(false), ShadersDirty(true), profileTable(g_Renderer.m_Stats), textureManager(g_VFS, false, false) { terrainRenderer = new TerrainRenderer(); shadow = new ShadowMap(); Model.pal_NormalFF = 0; Model.pal_PlayerFF = 0; Model.pal_TranspFF = 0; Model.pal_TranspSortAll = 0; Model.pal_NormalShader = 0; Model.pal_NormalInstancingShader = 0; Model.pal_PlayerShader = 0; Model.pal_PlayerInstancingShader = 0; Model.pal_TranspShader = 0; Model.Normal = 0; Model.NormalInstancing = 0; Model.Player = 0; Model.PlayerInstancing = 0; Model.Transp = 0; } ~CRendererInternals() { delete shadow; delete terrainRenderer; } /** * Load the OpenGL projection and modelview matrices and the viewport according * to the given camera. */ void SetOpenGLCamera(const CCamera& camera) { CMatrix3D view; camera.m_Orientation.GetInverse(view); const CMatrix3D& proj = camera.GetProjection(); glMatrixMode(GL_PROJECTION); glLoadMatrixf(&proj._11); glMatrixMode(GL_MODELVIEW); glLoadMatrixf(&view._11); const SViewPort &vp = camera.GetViewPort(); glViewport((GLint)vp.m_X,(GLint)vp.m_Y,(GLsizei)vp.m_Width,(GLsizei)vp.m_Height); } /** * Renders all non-transparent models with the given modifiers. */ void CallModelRenderers( const RenderModifierPtr& modNormal, const RenderModifierPtr& modNormalInstancing, const RenderModifierPtr& modPlayer, const RenderModifierPtr& modPlayerInstancing, int flags) { Model.Normal->Render(modNormal, flags); if (Model.Normal != Model.NormalInstancing) Model.NormalInstancing->Render(modNormalInstancing, flags); Model.Player->Render(modPlayer, flags); if (Model.Player != Model.PlayerInstancing) Model.PlayerInstancing->Render(modPlayerInstancing, flags); } }; /////////////////////////////////////////////////////////////////////////////////// // CRenderer constructor CRenderer::CRenderer() { m = new CRendererInternals; m_WaterManager = &m->waterManager; m_SkyManager = &m->skyManager; g_ProfileViewer.AddRootTable(&m->profileTable); m_Width=0; m_Height=0; m_TerrainRenderMode=SOLID; m_ModelRenderMode=SOLID; m_ClearColor[0]=m_ClearColor[1]=m_ClearColor[2]=m_ClearColor[3]=0; m_SortAllTransparent = false; m_DisplayFrustum = false; m_DisableCopyShadow = false; m_DisplayTerrainPriorities = false; m_FastPlayerColor = true; m_SkipSubmit = false; m_Options.m_NoVBO = false; m_Options.m_RenderPath = RP_DEFAULT; m_Options.m_FancyWater = false; m_Options.m_Shadows = false; m_Options.m_ShadowAlphaFix = true; m_Options.m_ARBProgramShadow = true; m_ShadowZBias = 0.02f; m_ShadowMapSize = 0; m_LightEnv = NULL; m_CurrentScene = NULL; m_hCompositeAlphaMap = 0; AddLocalProperty(L"fancyWater", &m_Options.m_FancyWater, false); AddLocalProperty(L"horizonHeight", &m->skyManager.m_HorizonHeight, false); AddLocalProperty(L"waterMurkiness", &m->waterManager.m_Murkiness, false); AddLocalProperty(L"waterReflTintStrength", &m->waterManager.m_ReflectionTintStrength, false); AddLocalProperty(L"waterRepeatPeriod", &m->waterManager.m_RepeatPeriod, false); AddLocalProperty(L"waterShininess", &m->waterManager.m_Shininess, false); AddLocalProperty(L"waterSpecularStrength", &m->waterManager.m_SpecularStrength, false); AddLocalProperty(L"waterWaviness", &m->waterManager.m_Waviness, false); RegisterFileReloadFunc(ReloadChangedFileCB, this); } /////////////////////////////////////////////////////////////////////////////////// // CRenderer destructor CRenderer::~CRenderer() { UnregisterFileReloadFunc(ReloadChangedFileCB, this); // model rendering delete m->Model.pal_NormalFF; delete m->Model.pal_PlayerFF; delete m->Model.pal_TranspFF; delete m->Model.pal_TranspSortAll; delete m->Model.pal_NormalShader; delete m->Model.pal_NormalInstancingShader; delete m->Model.pal_PlayerShader; delete m->Model.pal_PlayerInstancingShader; delete m->Model.pal_TranspShader; // we no longer UnloadAlphaMaps / UnloadWaterTextures here - // that is the responsibility of the module that asked for // them to be loaded (i.e. CGameView). delete m; } /////////////////////////////////////////////////////////////////////////////////// // EnumCaps: build card cap bits void CRenderer::EnumCaps() { // assume support for nothing m_Caps.m_VBO = false; m_Caps.m_ARBProgram = false; m_Caps.m_ARBProgramShadow = false; m_Caps.m_VertexShader = false; m_Caps.m_FragmentShader = false; m_Caps.m_Shadows = false; // now start querying extensions if (!m_Options.m_NoVBO) { if (ogl_HaveExtension("GL_ARB_vertex_buffer_object")) { m_Caps.m_VBO=true; } } if (0 == ogl_HaveExtensions(0, "GL_ARB_vertex_program", "GL_ARB_fragment_program", NULL)) { m_Caps.m_ARBProgram = true; if (ogl_HaveExtension("GL_ARB_fragment_program_shadow")) m_Caps.m_ARBProgramShadow = true; } if (0 == ogl_HaveExtensions(0, "GL_ARB_shader_objects", "GL_ARB_shading_language_100", NULL)) { if (ogl_HaveExtension("GL_ARB_vertex_shader")) m_Caps.m_VertexShader = true; if (ogl_HaveExtension("GL_ARB_fragment_shader")) m_Caps.m_FragmentShader = true; } if (0 == ogl_HaveExtensions(0, "GL_ARB_shadow", "GL_ARB_depth_texture", "GL_EXT_framebuffer_object", NULL)) { if (ogl_max_tex_units >= 4) m_Caps.m_Shadows = true; } } void CRenderer::ReloadShaders() { debug_assert(m->IsOpen); typedef std::map Defines; Defines defNull; Defines defBasic; if (m_Options.m_Shadows) { defBasic["USE_SHADOW"] = "1"; if (m_Caps.m_ARBProgramShadow && m_Options.m_ARBProgramShadow) defBasic["USE_FP_SHADOW"] = "1"; } if (m_LightEnv) defBasic["LIGHTING_MODEL_" + m_LightEnv->GetLightingModel()] = "1"; Defines defColored = defBasic; defColored["USE_OBJECTCOLOR"] = "1"; Defines defTransparent = defBasic; defTransparent["USE_TRANSPARENT"] = "1"; // TODO: it'd be nicer to load this technique from an XML file or something CShaderPass passTransparent0(m->shaderManager.LoadProgram("solid_tex", defNull)); passTransparent0.AlphaFunc(GL_GREATER, 0.975f); passTransparent0.ColorMask(0, 0, 0, 0); CShaderPass passTransparent1(m->shaderManager.LoadProgram("model_common", defTransparent)); passTransparent1.AlphaFunc(GL_GREATER, 0.0f); passTransparent1.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); passTransparent1.DepthMask(0); CShaderTechnique techTransparent(passTransparent0); techTransparent.AddPass(passTransparent1); CShaderPass passTransparentShadow(m->shaderManager.LoadProgram("solid_tex", defBasic)); passTransparentShadow.AlphaFunc(GL_GREATER, 0.4f); CShaderTechnique techTransparentShadow(passTransparentShadow); m->Model.ModShaderSolidColor = RenderModifierPtr(new ShaderRenderModifier(CShaderTechnique(m->shaderManager.LoadProgram( "solid", defNull)))); m->Model.ModShaderSolidColorInstancing = RenderModifierPtr(new ShaderRenderModifier(CShaderTechnique(m->shaderManager.LoadProgram( "solid_instancing", defNull)))); m->Model.ModShaderSolidPlayerColor = RenderModifierPtr(new ShaderRenderModifier(CShaderTechnique(m->shaderManager.LoadProgram( "solid_player", defNull)))); m->Model.ModShaderSolidPlayerColorInstancing = RenderModifierPtr(new ShaderRenderModifier(CShaderTechnique(m->shaderManager.LoadProgram( "solid_player_instancing", defNull)))); m->Model.ModShaderSolidTex = RenderModifierPtr(new ShaderRenderModifier(CShaderTechnique(m->shaderManager.LoadProgram( "solid_tex", defNull)))); m->Model.ModShaderNormal = LitRenderModifierPtr(new ShaderRenderModifier(CShaderTechnique(m->shaderManager.LoadProgram( "model_common", defBasic)))); m->Model.ModShaderNormalInstancing = LitRenderModifierPtr(new ShaderRenderModifier(CShaderTechnique(m->shaderManager.LoadProgram( "model_common_instancing", defBasic)))); m->Model.ModShaderPlayer = LitRenderModifierPtr(new ShaderRenderModifier(CShaderTechnique(m->shaderManager.LoadProgram( "model_common", defColored)))); m->Model.ModShaderPlayerInstancing = LitRenderModifierPtr(new ShaderRenderModifier(CShaderTechnique(m->shaderManager.LoadProgram( "model_common_instancing", defColored)))); m->Model.ModShaderTransparent = LitRenderModifierPtr(new ShaderRenderModifier( techTransparent)); m->Model.ModShaderTransparentShadow = LitRenderModifierPtr(new ShaderRenderModifier( techTransparentShadow)); m->ShadersDirty = false; } bool CRenderer::Open(int width, int height) { m->IsOpen = true; // Must query card capabilities before creating renderers that depend // on card capabilities. EnumCaps(); // model rendering m->Model.VertexFF = ModelVertexRendererPtr(new FixedFunctionModelRenderer); m->Model.VertexPolygonSort = ModelVertexRendererPtr(new PolygonSortModelRenderer); m->Model.VertexRendererShader = ModelVertexRendererPtr(new ShaderModelRenderer); m->Model.VertexInstancingShader = ModelVertexRendererPtr(new InstancingModelRenderer); m->Model.pal_NormalFF = new BatchModelRenderer(m->Model.VertexFF); m->Model.pal_PlayerFF = new BatchModelRenderer(m->Model.VertexFF); m->Model.pal_TranspFF = new SortModelRenderer(m->Model.VertexFF); m->Model.pal_TranspSortAll = new SortModelRenderer(m->Model.VertexPolygonSort); m->Model.pal_NormalShader = new BatchModelRenderer(m->Model.VertexRendererShader); m->Model.pal_NormalInstancingShader = new BatchModelRenderer(m->Model.VertexInstancingShader); m->Model.pal_PlayerShader = new BatchModelRenderer(m->Model.VertexRendererShader); m->Model.pal_PlayerInstancingShader = new BatchModelRenderer(m->Model.VertexInstancingShader); m->Model.pal_TranspShader = new SortModelRenderer(m->Model.VertexRendererShader); m->Model.ModWireframe = RenderModifierPtr(new WireframeRenderModifier); m->Model.ModPlainUnlit = RenderModifierPtr(new PlainRenderModifier); SetFastPlayerColor(true); m->Model.ModSolidColor = RenderModifierPtr(new SolidColorRenderModifier); m->Model.ModSolidPlayerColor = RenderModifierPtr(new SolidPlayerColorRender); m->Model.ModTransparentUnlit = RenderModifierPtr(new TransparentRenderModifier); m->Model.ModTransparentDepthShadow = RenderModifierPtr(new TransparentDepthShadowModifier); // Dimensions m_Width = width; m_Height = height; // set packing parameters glPixelStorei(GL_PACK_ALIGNMENT,1); glPixelStorei(GL_UNPACK_ALIGNMENT,1); // setup default state glDepthFunc(GL_LEQUAL); glEnable(GL_DEPTH_TEST); glCullFace(GL_BACK); glFrontFace(GL_CCW); glEnable(GL_CULL_FACE); GLint bits; glGetIntegerv(GL_DEPTH_BITS,&bits); LOGMESSAGE(L"CRenderer::Open: depth bits %d",bits); glGetIntegerv(GL_STENCIL_BITS,&bits); LOGMESSAGE(L"CRenderer::Open: stencil bits %d",bits); glGetIntegerv(GL_ALPHA_BITS,&bits); LOGMESSAGE(L"CRenderer::Open: alpha bits %d",bits); // Validate the currently selected render path SetRenderPath(m_Options.m_RenderPath); return true; } // resize renderer view void CRenderer::Resize(int width,int height) { // need to recreate the shadow map object to resize the shadow texture m->shadow->RecreateTexture(); m_Width = width; m_Height = height; } ////////////////////////////////////////////////////////////////////////////////////////// // SetOptionBool: set boolean renderer option void CRenderer::SetOptionBool(enum Option opt,bool value) { switch (opt) { case OPT_NOVBO: m_Options.m_NoVBO=value; break; case OPT_SHADOWS: m_Options.m_Shadows=value; MakeShadersDirty(); break; case OPT_FANCYWATER: m_Options.m_FancyWater=value; break; default: debug_warn(L"CRenderer::SetOptionBool: unknown option"); break; } } ////////////////////////////////////////////////////////////////////////////////////////// // GetOptionBool: get boolean renderer option bool CRenderer::GetOptionBool(enum Option opt) const { switch (opt) { case OPT_NOVBO: return m_Options.m_NoVBO; case OPT_SHADOWS: return m_Options.m_Shadows; case OPT_FANCYWATER: return m_Options.m_FancyWater; default: debug_warn(L"CRenderer::GetOptionBool: unknown option"); break; } return false; } void CRenderer::SetOptionFloat(enum Option opt, float val) { switch(opt) { case OPT_LODBIAS: m_Options.m_LodBias = val; break; default: debug_warn(L"CRenderer::SetOptionFloat: unknown option"); break; } } ////////////////////////////////////////////////////////////////////////////////////////// // SetRenderPath: Select the preferred render path. // This may only be called before Open(), because the layout of vertex arrays and other // data may depend on the chosen render path. void CRenderer::SetRenderPath(RenderPath rp) { if (!m->IsOpen) { // Delay until Open() is called. m_Options.m_RenderPath = rp; return; } // Renderer has been opened, so validate the selected renderpath if (rp == RP_DEFAULT) { if (m_Caps.m_ARBProgram) rp = RP_SHADER; else rp = RP_FIXED; } if (rp == RP_SHADER) { if (!m_Caps.m_ARBProgram) { LOGWARNING(L"Falling back to fixed function\n"); rp = RP_FIXED; } } m_Options.m_RenderPath = rp; // We might need to regenerate some render data after changing path if (g_Game) g_Game->GetWorld()->GetTerrain()->MakeDirty(RENDERDATA_UPDATE_COLOR); } CStr CRenderer::GetRenderPathName(RenderPath rp) { switch(rp) { case RP_DEFAULT: return "default"; case RP_FIXED: return "fixed"; case RP_SHADER: return "shader"; default: return "(invalid)"; } } CRenderer::RenderPath CRenderer::GetRenderPathByName(const CStr& name) { if (name == "fixed") return RP_FIXED; if (name == "shader") return RP_SHADER; if (name == "default") return RP_DEFAULT; LOGWARNING(L"Unknown render path name '%hs', assuming 'default'", name.c_str()); return RP_DEFAULT; } ////////////////////////////////////////////////////////////////////////////////////////// // SetFastPlayerColor void CRenderer::SetFastPlayerColor(bool fast) { m_FastPlayerColor = fast; if (m_FastPlayerColor) { if (!FastPlayerColorRender::IsAvailable()) { LOGWARNING(L"Falling back to slower player color rendering."); m_FastPlayerColor = false; } } if (m_FastPlayerColor) m->Model.ModPlayerUnlit = RenderModifierPtr(new FastPlayerColorRender); else m->Model.ModPlayerUnlit = RenderModifierPtr(new SlowPlayerColorRender); } ////////////////////////////////////////////////////////////////////////////////////////// // BeginFrame: signal frame start void CRenderer::BeginFrame() { PROFILE("begin frame"); // zero out all the per-frame stats m_Stats.Reset(); // choose model renderers for this frame if (m_Options.m_RenderPath == RP_SHADER) { if (m->ShadersDirty) ReloadShaders(); m->Model.ModShaderNormal->SetShadowMap(m->shadow); m->Model.ModShaderNormal->SetLightEnv(m_LightEnv); m->Model.ModShaderNormalInstancing->SetShadowMap(m->shadow); m->Model.ModShaderNormalInstancing->SetLightEnv(m_LightEnv); m->Model.ModShaderPlayer->SetShadowMap(m->shadow); m->Model.ModShaderPlayer->SetLightEnv(m_LightEnv); m->Model.ModShaderPlayerInstancing->SetShadowMap(m->shadow); m->Model.ModShaderPlayerInstancing->SetLightEnv(m_LightEnv); m->Model.ModShaderTransparent->SetShadowMap(m->shadow); m->Model.ModShaderTransparent->SetLightEnv(m_LightEnv); m->Model.ModNormal = m->Model.ModShaderNormal; m->Model.ModNormalInstancing = m->Model.ModShaderNormalInstancing; m->Model.ModPlayer = m->Model.ModShaderPlayer; m->Model.ModPlayerInstancing = m->Model.ModShaderPlayerInstancing; m->Model.ModSolid = m->Model.ModShaderSolidColor; m->Model.ModSolidInstancing = m->Model.ModShaderSolidColorInstancing; m->Model.ModSolidPlayer = m->Model.ModShaderSolidPlayerColor; m->Model.ModSolidPlayerInstancing = m->Model.ModShaderSolidPlayerColorInstancing; m->Model.ModTransparent = m->Model.ModShaderTransparent; m->Model.Normal = m->Model.pal_NormalShader; m->Model.NormalInstancing = m->Model.pal_NormalInstancingShader; m->Model.Player = m->Model.pal_PlayerShader; m->Model.PlayerInstancing = m->Model.pal_PlayerInstancingShader; m->Model.Transp = m->Model.pal_TranspShader; } else { m->Model.ModNormal = m->Model.ModPlainUnlit; m->Model.ModNormalInstancing = m->Model.ModPlainUnlit; m->Model.ModPlayer = m->Model.ModPlayerUnlit; m->Model.ModPlayerInstancing = m->Model.ModPlayerUnlit; m->Model.ModTransparent = m->Model.ModTransparentUnlit; m->Model.NormalInstancing = m->Model.pal_NormalFF; m->Model.Normal = m->Model.pal_NormalFF; m->Model.PlayerInstancing = m->Model.pal_PlayerFF; m->Model.Player = m->Model.pal_PlayerFF; m->Model.ModSolid = m->Model.ModSolidColor; m->Model.ModSolidInstancing = m->Model.ModSolidColor; m->Model.ModSolidPlayer = m->Model.ModSolidPlayerColor; m->Model.ModSolidPlayerInstancing = m->Model.ModSolidPlayerColor; if (m_SortAllTransparent) m->Model.Transp = m->Model.pal_TranspSortAll; else m->Model.Transp = m->Model.pal_TranspFF; } } ////////////////////////////////////////////////////////////////////////////////////////// // SetClearColor: set color used to clear screen in BeginFrame() void CRenderer::SetClearColor(SColor4ub color) { m_ClearColor[0] = float(color.R) / 255.0f; m_ClearColor[1] = float(color.G) / 255.0f; m_ClearColor[2] = float(color.B) / 255.0f; m_ClearColor[3] = float(color.A) / 255.0f; } void CRenderer::RenderShadowMap() { PROFILE("render shadow map"); m->shadow->BeginRender(); float shadowTransp = m_LightEnv->GetTerrainShadowTransparency(); glColor3f(shadowTransp, shadowTransp, shadowTransp); // Figure out transparent rendering strategy RenderModifierPtr transparentShadows; if (GetRenderPath() == RP_SHADER) { transparentShadows = m->Model.ModShaderTransparentShadow; } else { transparentShadows = m->Model.ModTransparentDepthShadow; } // Render all closed models (i.e. models where rendering back faces will produce // the correct result) glCullFace(GL_FRONT); { PROFILE("render patches"); m->terrainRenderer->RenderPatches(); } glCullFace(GL_BACK); // Render models that aren't closed glDisable(GL_CULL_FACE); { PROFILE("render models"); m->CallModelRenderers(m->Model.ModSolid, m->Model.ModSolidInstancing, m->Model.ModSolid, m->Model.ModSolidInstancing, MODELFLAG_CASTSHADOWS); } { PROFILE("render transparent models"); m->Model.Transp->Render(transparentShadows, MODELFLAG_CASTSHADOWS); } glEnable(GL_CULL_FACE); glColor3f(1.0, 1.0, 1.0); m->shadow->EndRender(); } void CRenderer::RenderPatches() { PROFILE("render patches"); // switch on wireframe if we need it if (m_TerrainRenderMode == WIREFRAME) { glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); } // render all the patches, including blend pass if (GetRenderPath() == RP_SHADER) m->terrainRenderer->RenderTerrainShader((m_Caps.m_Shadows && m_Options.m_Shadows) ? m->shadow : 0); else m->terrainRenderer->RenderTerrain(); if (m_TerrainRenderMode == WIREFRAME) { // switch wireframe off again glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); } else if (m_TerrainRenderMode == EDGED_FACES) { // edged faces: need to make a second pass over the data: // first switch on wireframe glPolygonMode(GL_FRONT_AND_BACK,GL_LINE); // setup some renderstate .. glDisable(GL_TEXTURE_2D); glColor3f(0.5f, 0.5f, 1.0f); glLineWidth(2.0f); // render tiles edges m->terrainRenderer->RenderPatches(); // set color for outline glColor3f(0, 0, 1); glLineWidth(4.0f); // render outline of each patch m->terrainRenderer->RenderOutlines(); // .. and restore the renderstates glLineWidth(1.0f); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); } } void CRenderer::RenderModels() { PROFILE("render models"); if (m_ModelRenderMode == WIREFRAME) { glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); } m->CallModelRenderers(m->Model.ModNormal, m->Model.ModNormalInstancing, m->Model.ModPlayer, m->Model.ModPlayerInstancing, 0); if (m_ModelRenderMode == WIREFRAME) { glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); } else if (m_ModelRenderMode == EDGED_FACES) { glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); glDisable(GL_TEXTURE_2D); glColor3f(1.0f, 1.0f, 0.0f); m->CallModelRenderers(m->Model.ModSolid, m->Model.ModSolidInstancing, m->Model.ModSolid, m->Model.ModSolidInstancing, 0); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); } } void CRenderer::RenderTransparentModels() { PROFILE("render transparent models"); // switch on wireframe if we need it if (m_ModelRenderMode == WIREFRAME) { glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); } m->Model.Transp->Render(m->Model.ModTransparent, 0); if (m_ModelRenderMode == WIREFRAME) { // switch wireframe off again glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); } else if (m_ModelRenderMode == EDGED_FACES) { glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); glDisable(GL_TEXTURE_2D); glColor3f(1.0f, 0.0f, 0.0f); m->Model.Transp->Render(m->Model.ModSolid, 0); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); } } /////////////////////////////////////////////////////////////////////////////////////////////////// // GetModelViewProjectionMatrix: save the current OpenGL model-view-projection matrix CMatrix3D CRenderer::GetModelViewProjectionMatrix() { CMatrix3D proj; CMatrix3D view; glGetFloatv( GL_PROJECTION_MATRIX, &proj._11 ); glGetFloatv( GL_MODELVIEW_MATRIX, &view._11 ); return( proj * view ); } /////////////////////////////////////////////////////////////////////////////////////////////////// // SetObliqueFrustumClipping: change the near plane to the given clip plane (in world space) // Based on code from Game Programming Gems 5, from http://www.terathon.com/code/oblique.html // - cp is a clip plane in camera space (cp.Dot(v) = 0 for any vector v on the plane) // - sign is 1 or -1, to specify the side to clip on void CRenderer::SetObliqueFrustumClipping(const CVector4D& cp, int sign) { float matrix[16]; CVector4D q; // First, we'll convert the given clip plane to camera space, then we'll // Get the view matrix and normal matrix (top 3x3 part of view matrix) CMatrix3D viewMatrix; m_ViewCamera.m_Orientation.GetInverse(viewMatrix); CMatrix3D normalMatrix = viewMatrix; normalMatrix._14 = 0; normalMatrix._24 = 0; normalMatrix._34 = 0; normalMatrix._44 = 1; normalMatrix._41 = 0; normalMatrix._42 = 0; normalMatrix._43 = 0; // Convert the normal to camera space CVector4D planeNormal(cp.m_X, cp.m_Y, cp.m_Z, 0); planeNormal = normalMatrix.Transform(planeNormal); planeNormal.Normalize(); // Find a point on the plane: we'll take the normal times -D float oldD = cp.m_W; CVector4D pointOnPlane(-oldD * cp.m_X, -oldD * cp.m_Y, -oldD * cp.m_Z, 1); pointOnPlane = viewMatrix.Transform(pointOnPlane); float newD = -pointOnPlane.Dot(planeNormal); // Now create a clip plane from the new normal and new D CVector4D camPlane = planeNormal; camPlane.m_W = newD; // Grab the current projection matrix from OpenGL glGetFloatv(GL_PROJECTION_MATRIX, matrix); // Calculate the clip-space corner point opposite the clipping plane // as (sgn(camPlane.x), sgn(camPlane.y), 1, 1) and // transform it into camera space by multiplying it // by the inverse of the projection matrix q.m_X = (sgn(camPlane.m_X) + matrix[8]) / matrix[0]; q.m_Y = (sgn(camPlane.m_Y) + matrix[9]) / matrix[5]; q.m_Z = -1.0f; q.m_W = (1.0f + matrix[10]) / matrix[14]; // Calculate the scaled plane vector CVector4D c = camPlane * (sign * 2.0f / camPlane.Dot(q)); // Replace the third row of the projection matrix matrix[2] = c.m_X; matrix[6] = c.m_Y; matrix[10] = c.m_Z + 1.0f; matrix[14] = c.m_W; // Load it back into OpenGL glMatrixMode(GL_PROJECTION); glLoadMatrixf(matrix); glMatrixMode(GL_MODELVIEW); } /////////////////////////////////////////////////////////////////////////////////////////////////// // RenderReflections: render the water reflections to the reflection texture void CRenderer::RenderReflections() { PROFILE("render reflections"); WaterManager& wm = m->waterManager; // Remember old camera CCamera normalCamera = m_ViewCamera; // Temporarily change the camera to one that is reflected. // Also, for texturing purposes, make it render to a view port the size of the // water texture, stretch the image according to our aspect ratio so it covers // the whole screen despite being rendered into a square, and cover slightly more // of the view so we can see wavy reflections of slightly off-screen objects. m_ViewCamera.m_Orientation.Translate(0, -wm.m_WaterHeight, 0); m_ViewCamera.m_Orientation.Scale(1, -1, 1); m_ViewCamera.m_Orientation.Translate(0, wm.m_WaterHeight, 0); SViewPort vp; vp.m_Height = wm.m_ReflectionTextureSize; vp.m_Width = wm.m_ReflectionTextureSize; vp.m_X = 0; vp.m_Y = 0; m_ViewCamera.SetViewPort(vp); m_ViewCamera.SetProjection(CGameView::defaultNear, CGameView::defaultFar, CGameView::defaultFOV*1.05f); // Slightly higher than view FOV CMatrix3D scaleMat; scaleMat.SetScaling(m_Height/float(std::max(1, m_Width)), 1.0f, 1.0f); m_ViewCamera.m_ProjMat = scaleMat * m_ViewCamera.m_ProjMat; m->SetOpenGLCamera(m_ViewCamera); CVector4D camPlane(0, 1, 0, -wm.m_WaterHeight); SetObliqueFrustumClipping(camPlane, -1); // Save the model-view-projection matrix so the shaders can use it for projective texturing wm.m_ReflectionMatrix = GetModelViewProjectionMatrix(); // Disable backface culling so trees render properly (it might also be possible to flip // the culling direction here, but this seems to lead to problems) glDisable(GL_CULL_FACE); // Make the depth buffer work backwards; there seems to be some oddness with // oblique frustum clipping and the "sign" parameter here glClearDepth(0); glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); glDepthFunc(GL_GEQUAL); // Render sky, terrain and models m->skyManager.RenderSky(); ogl_WarnIfError(); RenderPatches(); ogl_WarnIfError(); RenderModels(); ogl_WarnIfError(); RenderTransparentModels(); ogl_WarnIfError(); // Copy the image to a texture pglActiveTextureARB(GL_TEXTURE0_ARB); glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, wm.m_ReflectionTexture); glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, (GLsizei)wm.m_ReflectionTextureSize, (GLsizei)wm.m_ReflectionTextureSize); //Reset old camera and re-enable backface culling m_ViewCamera = normalCamera; m->SetOpenGLCamera(m_ViewCamera); glEnable(GL_CULL_FACE); //glClearDepth(1); //glClear(GL_DEPTH_BUFFER_BIT); //glDepthFunc(GL_LEQUAL); } /////////////////////////////////////////////////////////////////////////////////////////////////// // RenderRefractions: render the water refractions to the refraction texture void CRenderer::RenderRefractions() { PROFILE("render refractions"); WaterManager& wm = m->waterManager; // Remember old camera CCamera normalCamera = m_ViewCamera; // Temporarily change the camera to make it render to a view port the size of the // water texture, stretch the image according to our aspect ratio so it covers // the whole screen despite being rendered into a square, and cover slightly more // of the view so we can see wavy refractions of slightly off-screen objects. SViewPort vp; vp.m_Height = wm.m_RefractionTextureSize; vp.m_Width = wm.m_RefractionTextureSize; vp.m_X = 0; vp.m_Y = 0; m_ViewCamera.SetViewPort(vp); m_ViewCamera.SetProjection(CGameView::defaultNear, CGameView::defaultFar, CGameView::defaultFOV*1.05f); // Slightly higher than view FOV CMatrix3D scaleMat; scaleMat.SetScaling(m_Height/float(std::max(1, m_Width)), 1.0f, 1.0f); m_ViewCamera.m_ProjMat = scaleMat * m_ViewCamera.m_ProjMat; m->SetOpenGLCamera(m_ViewCamera); CVector4D camPlane(0, 1, 0, -wm.m_WaterHeight); SetObliqueFrustumClipping(camPlane, -1); // Save the model-view-projection matrix so the shaders can use it for projective texturing wm.m_RefractionMatrix = GetModelViewProjectionMatrix(); // Make the depth buffer work backwards; there seems to be some oddness with // oblique frustum clipping and the "sign" parameter here glClearDepth(0); glClearColor(0.5f, 0.5f, 0.5f, 1.0f); // a neutral gray to blend in with shores glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); glDepthFunc(GL_GEQUAL); // Render terrain and models RenderPatches(); ogl_WarnIfError(); RenderModels(); ogl_WarnIfError(); RenderTransparentModels(); ogl_WarnIfError(); // Copy the image to a texture pglActiveTextureARB(GL_TEXTURE0_ARB); glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, wm.m_RefractionTexture); glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, (GLsizei)wm.m_RefractionTextureSize, (GLsizei)wm.m_RefractionTextureSize); //Reset old camera and re-enable backface culling m_ViewCamera = normalCamera; m->SetOpenGLCamera(m_ViewCamera); glEnable(GL_CULL_FACE); glClearDepth(1); glDepthFunc(GL_LEQUAL); } void CRenderer::RenderSilhouettes() { PROFILE("render silhouettes"); // Render silhouettes of units hidden behind terrain or occluders. // To avoid breaking the standard rendering of alpha-blended objects, this // has to be done in a separate pass. // First we render all occluders into depth, then render all units with // inverted depth test so any behind an occluder will get drawn in a constant // colour. float silhouetteAlpha = 0.75f; // Silhouette blending requires an almost-universally-supported extension; // fall back to non-blended if unavailable if (!ogl_HaveExtension("GL_EXT_blend_color")) silhouetteAlpha = 1.f; glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); glColorMask(0, 0, 0, 0); // Render occluders: { PROFILE("render patches"); // To prevent units displaying silhouettes when parts of their model // protrude into the ground, only occlude with the back faces of the // terrain (so silhouettes will still display when behind hills) glCullFace(GL_FRONT); m->terrainRenderer->RenderPatches(); glCullFace(GL_BACK); } { PROFILE("render model occluders"); m->CallModelRenderers(m->Model.ModSolid, m->Model.ModSolidInstancing, m->Model.ModSolid, m->Model.ModSolidInstancing, MODELFLAG_SILHOUETTE_OCCLUDER); } { PROFILE("render transparent occluders"); if (GetRenderPath() == RP_SHADER) { glEnable(GL_ALPHA_TEST); glAlphaFunc(GL_GREATER, 0.4f); m->Model.Transp->Render(m->Model.ModShaderSolidTex, MODELFLAG_SILHOUETTE_OCCLUDER); glDisable(GL_ALPHA_TEST); } else { // Reuse the depth shadow modifier to get alpha-tested rendering m->Model.Transp->Render(m->Model.ModTransparentDepthShadow, MODELFLAG_SILHOUETTE_OCCLUDER); } } glDepthFunc(GL_GEQUAL); glColorMask(1, 1, 1, 1); // Render more efficiently if alpha == 1 if (silhouetteAlpha == 1.f) { // Ideally we'd render objects back-to-front so nearer silhouettes would // appear on top, but sorting has non-zero cost. So we'll keep the depth // write enabled, to do the opposite - far objects will consistently appear // on top. glDepthMask(0); } else { // Since we can't sort, we'll use the stencil buffer to ensure we only draw // a pixel once (using the colour of whatever model happens to be drawn first). glEnable(GL_BLEND); glBlendFunc(GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA); pglBlendColorEXT(0, 0, 0, silhouetteAlpha); glEnable(GL_STENCIL_TEST); glStencilFunc(GL_NOTEQUAL, 1, (GLuint)-1); glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); } // TODO: For performance, we probably ought to do a quick raycasting check // to see which units are likely blocked by occluders and not bother // rendering any of the others { PROFILE("render models"); m->CallModelRenderers(m->Model.ModSolidPlayer, m->Model.ModSolidPlayerInstancing, m->Model.ModSolidPlayer, m->Model.ModSolidPlayerInstancing, MODELFLAG_SILHOUETTE_DISPLAY); // (This won't render transparent objects with SILHOUETTE_DISPLAY - will // we have any units that need that?) } // Restore state glDepthFunc(GL_LEQUAL); if (silhouetteAlpha == 1.f) { glDepthMask(1); } else { glDisable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); pglBlendColorEXT(0, 0, 0, 0); glDisable(GL_STENCIL_TEST); } } void CRenderer::RenderParticles() { // Only supported in shader modes if (GetRenderPath() != RP_SHADER) return; PROFILE("render particles"); m->particleRenderer.RenderParticles(); if (m_ModelRenderMode == EDGED_FACES) { glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); glDisable(GL_TEXTURE_2D); glColor3f(0.0f, 0.5f, 0.0f); m->particleRenderer.RenderParticles(true); glDisable(GL_TEXTURE_2D); glColor3f(0.0f, 1.0f, 0.0f); m->particleRenderer.RenderBounds(); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); } } /////////////////////////////////////////////////////////////////////////////////////////////////// // RenderSubmissions: force rendering of any batched objects void CRenderer::RenderSubmissions() { PROFILE("render submissions"); ogl_WarnIfError(); // Set the camera m->SetOpenGLCamera(m_ViewCamera); // Prepare model renderers PROFILE_START("prepare models"); m->Model.Normal->PrepareModels(); m->Model.Player->PrepareModels(); if (m->Model.Normal != m->Model.NormalInstancing) m->Model.NormalInstancing->PrepareModels(); if (m->Model.Player != m->Model.PlayerInstancing) m->Model.PlayerInstancing->PrepareModels(); m->Model.Transp->PrepareModels(); PROFILE_END("prepare models"); PROFILE_START("prepare terrain"); m->terrainRenderer->PrepareForRendering(); PROFILE_END("prepare terrain"); PROFILE_START("prepare overlays"); m->overlayRenderer.PrepareForRendering(); PROFILE_END("prepare overlays"); PROFILE_START("prepare particles"); m->particleRenderer.PrepareForRendering(); PROFILE_END("prepare particles"); if (m_Caps.m_Shadows && m_Options.m_Shadows && GetRenderPath() == RP_SHADER) { RenderShadowMap(); } // clear buffers PROFILE_START("clear buffers"); glClearColor(m_ClearColor[0],m_ClearColor[1],m_ClearColor[2],m_ClearColor[3]); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); PROFILE_END("clear buffers"); ogl_WarnIfError(); if (m_WaterManager->m_RenderWater && m_WaterManager->WillRenderFancyWater()) { // render reflected and refracted scenes, then re-clear the screen RenderReflections(); RenderRefractions(); glClearColor(m_ClearColor[0],m_ClearColor[1],m_ClearColor[2],m_ClearColor[3]); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); } // render submitted patches and models RenderPatches(); ogl_WarnIfError(); if (g_Game) { // g_Game->GetWorld()->GetTerritoryManager()->RenderTerritories(); // TODO: implement in new sim system ogl_WarnIfError(); } // render debug-related terrain overlays TerrainOverlay::RenderOverlays(); ogl_WarnIfError(); // render other debug-related overlays before water (so they can be displayed when underwater) PROFILE_START("render overlays"); m->overlayRenderer.RenderOverlays(); PROFILE_END("render overlays"); ogl_WarnIfError(); RenderModels(); ogl_WarnIfError(); // render transparent stuff, so it can overlap models/terrain RenderTransparentModels(); ogl_WarnIfError(); // render water if (m_WaterManager->m_RenderWater && g_Game) { m->terrainRenderer->RenderWater(); ogl_WarnIfError(); // render transparent stuff again, so it can overlap the water RenderTransparentModels(); ogl_WarnIfError(); // TODO: Maybe think of a better way to deal with transparent objects; // they can appear both under and above water (seaweed vs. trees), but doing // 2 renders causes (a) inefficiency and (b) darker over-water objects (e.g. // trees) than usual because the transparent bits get overwritten twice. // This doesn't look particularly bad, but it is noticeable if you try // turning the water off. On the other hand every user will have water // on all the time, so it might not be worth worrying about. } // particles are transparent so render after water RenderParticles(); ogl_WarnIfError(); RenderSilhouettes(); // Clean up texture blend mode so particles and other things render OK // (really this should be cleaned up by whoever set it) glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); // render debug lines if (m_DisplayFrustum) { DisplayFrustum(); m->shadow->RenderDebugDisplay(); ogl_WarnIfError(); } // render overlays that should appear on top of all other objects PROFILE_START("render fg overlays"); m->overlayRenderer.RenderForegroundOverlays(m_ViewCamera); PROFILE_END("render fg overlays"); ogl_WarnIfError(); } /////////////////////////////////////////////////////////////////////////////////////////////////// // EndFrame: signal frame end void CRenderer::EndFrame() { PROFILE("end frame"); // empty lists m->terrainRenderer->EndFrame(); m->overlayRenderer.EndFrame(); m->particleRenderer.EndFrame(); // Finish model renderers m->Model.Normal->EndFrame(); m->Model.Player->EndFrame(); if (m->Model.Normal != m->Model.NormalInstancing) m->Model.NormalInstancing->EndFrame(); if (m->Model.Player != m->Model.PlayerInstancing) m->Model.PlayerInstancing->EndFrame(); m->Model.Transp->EndFrame(); ogl_tex_bind(0, 0); if (glGetError()) { ONCE(LOGERROR(L"CRenderer::EndFrame: GL errors occurred")); } } /////////////////////////////////////////////////////////////////////////////////////////////////// // DisplayFrustum: debug displays // - white: cull camera frustum // - red: bounds of shadow casting objects void CRenderer::DisplayFrustum() { glDepthMask(0); glDisable(GL_CULL_FACE); glEnable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glColor4ub(255,255,255,64); m_CullCamera.Render(2); glDisable(GL_BLEND); glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); glColor3ub(255,255,255); m_CullCamera.Render(2); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); glEnable(GL_CULL_FACE); glDepthMask(1); } /////////////////////////////////////////////////////////////////////////////////////////////////// // Text overlay rendering void CRenderer::RenderTextOverlays() { PROFILE("render text overlays"); if (m_DisplayTerrainPriorities) m->terrainRenderer->RenderPriorities(); ogl_WarnIfError(); } /////////////////////////////////////////////////////////////////////////////////////////////////// // SetSceneCamera: setup projection and transform of camera and adjust viewport to current view // The camera always represents the actual camera used to render a scene, not any virtual camera // used for shadow rendering or reflections. void CRenderer::SetSceneCamera(const CCamera& viewCamera, const CCamera& cullCamera) { m_ViewCamera = viewCamera; m_CullCamera = cullCamera; if (m_Caps.m_Shadows && m_Options.m_Shadows && GetRenderPath() == RP_SHADER) m->shadow->SetupFrame(m_CullCamera, m_LightEnv->GetSunDir()); } void CRenderer::SetViewport(const SViewPort &vp) { glViewport((GLint)vp.m_X,(GLint)vp.m_Y,(GLsizei)vp.m_Width,(GLsizei)vp.m_Height); } void CRenderer::Submit(CPatch* patch) { m->terrainRenderer->Submit(patch); } void CRenderer::Submit(SOverlayLine* overlay) { m->overlayRenderer.Submit(overlay); } void CRenderer::Submit(SOverlaySprite* overlay) { m->overlayRenderer.Submit(overlay); } void CRenderer::Submit(CModelDecal* decal) { m->terrainRenderer->Submit(decal); } void CRenderer::Submit(CParticleEmitter* emitter) { m->particleRenderer.Submit(emitter); } void CRenderer::SubmitNonRecursive(CModel* model) { if (model->GetFlags() & MODELFLAG_CASTSHADOWS) { // PROFILE( "updating shadow bounds" ); m->shadow->AddShadowedBound(model->GetBounds()); } // Tricky: The call to GetBounds() above can invalidate the position model->ValidatePosition(); bool canUseInstancing = false; if (model->GetModelDef()->GetNumBones() == 0) canUseInstancing = true; if (model->GetMaterial().IsPlayer()) { if (canUseInstancing) m->Model.PlayerInstancing->Submit(model); else m->Model.Player->Submit(model); } else if (model->GetMaterial().UsesAlpha()) { m->Model.Transp->Submit(model); } else { if (canUseInstancing) m->Model.NormalInstancing->Submit(model); else m->Model.Normal->Submit(model); } } /////////////////////////////////////////////////////////// // Render the given scene void CRenderer::RenderScene(Scene& scene) { m_CurrentScene = &scene; CFrustum frustum = m_CullCamera.GetFrustum(); scene.EnumerateObjects(frustum, this); m->particleManager.RenderSubmit(*this, frustum); ogl_WarnIfError(); RenderSubmissions(); m_CurrentScene = NULL; } Scene& CRenderer::GetScene() { debug_assert(m_CurrentScene); return *m_CurrentScene; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // BindTexture: bind a GL texture object to current active unit void CRenderer::BindTexture(int unit,GLuint tex) { pglActiveTextureARB(GL_TEXTURE0+unit); glBindTexture(GL_TEXTURE_2D,tex); if (tex) { glEnable(GL_TEXTURE_2D); } else { glDisable(GL_TEXTURE_2D); } } static inline void CopyTriple(unsigned char* dst,const unsigned char* src) { dst[0]=src[0]; dst[1]=src[1]; dst[2]=src[2]; } /////////////////////////////////////////////////////////////////////////////////////////////////// // LoadAlphaMaps: load the 14 default alpha maps, pack them into one composite texture and // calculate the coordinate of each alphamap within this packed texture int CRenderer::LoadAlphaMaps() { const wchar_t* const key = L"(alpha map composite)"; Handle ht = ogl_tex_find(key); // alpha map texture had already been created and is still in memory: // reuse it, do not load again. if(ht > 0) { m_hCompositeAlphaMap = ht; return 0; } // // load all textures and store Handle in array // Handle textures[NumAlphaMaps] = {0}; VfsPath path(L"art/textures/terrain/alphamaps/standard"); const wchar_t* fnames[NumAlphaMaps] = { L"blendcircle.png", L"blendlshape.png", L"blendedge.png", L"blendedgecorner.png", L"blendedgetwocorners.png", L"blendfourcorners.png", L"blendtwooppositecorners.png", L"blendlshapecorner.png", L"blendtwocorners.png", L"blendcorner.png", L"blendtwoedges.png", L"blendthreecorners.png", L"blendushape.png", L"blendbad.png" }; size_t base = 0; // texture width/height (see below) // for convenience, we require all alpha maps to be of the same BPP // (avoids another ogl_tex_get_size call, and doesn't hurt) size_t bpp = 0; for(size_t i=0;i data = io_Allocate(total_w*total_h*3); + shared_ptr data; + AllocateAligned(data, total_w*total_h*3, maxSectorSize); // for each tile on row for(size_t i=0;i(param); // If an alpha map changed, and we already loaded them, then reload them if (boost::algorithm::starts_with(path.string(), L"art/textures/terrain/alphamaps/")) { if (renderer->m_hCompositeAlphaMap) { renderer->UnloadAlphaMaps(); renderer->LoadAlphaMaps(); } } return INFO::OK; } void CRenderer::MakeShadersDirty() { m->ShadersDirty = true; } /////////////////////////////////////////////////////////////////////////////////////////////////// // Scripting Interface jsval CRenderer::JSI_GetFastPlayerColor(JSContext*) { return ToJSVal(m_FastPlayerColor); } void CRenderer::JSI_SetFastPlayerColor(JSContext* ctx, jsval newval) { bool fast; if (!ToPrimitive(ctx, newval, fast)) return; SetFastPlayerColor(fast); } jsval CRenderer::JSI_GetRenderPath(JSContext*) { return ToJSVal(GetRenderPathName(m_Options.m_RenderPath)); } void CRenderer::JSI_SetRenderPath(JSContext* ctx, jsval newval) { CStr name; if (!ToPrimitive(ctx, newval, name)) return; SetRenderPath(GetRenderPathByName(name)); } jsval CRenderer::JSI_GetDepthTextureBits(JSContext*) { return ToJSVal(m->shadow->GetDepthTextureBits()); } void CRenderer::JSI_SetDepthTextureBits(JSContext* ctx, jsval newval) { int depthTextureBits; if (!ToPrimitive(ctx, newval, depthTextureBits)) return; m->shadow->SetDepthTextureBits(depthTextureBits); } jsval CRenderer::JSI_GetShadows(JSContext*) { return ToJSVal(m_Options.m_Shadows); } void CRenderer::JSI_SetShadows(JSContext* ctx, jsval newval) { if (!ToPrimitive(ctx, newval, m_Options.m_Shadows)) return; ReloadShaders(); } jsval CRenderer::JSI_GetShadowAlphaFix(JSContext*) { return ToJSVal(m_Options.m_ShadowAlphaFix); } void CRenderer::JSI_SetShadowAlphaFix(JSContext* ctx, jsval newval) { if (!ToPrimitive(ctx, newval, m_Options.m_ShadowAlphaFix)) return; m->shadow->RecreateTexture(); } jsval CRenderer::JSI_GetSky(JSContext*) { return ToJSVal(m->skyManager.GetSkySet()); } void CRenderer::JSI_SetSky(JSContext* ctx, jsval newval) { CStrW skySet; if (!ToPrimitive(ctx, newval, skySet)) return; m->skyManager.SetSkySet(skySet); } void CRenderer::ScriptingInit() { AddProperty(L"fastPlayerColor", &CRenderer::JSI_GetFastPlayerColor, &CRenderer::JSI_SetFastPlayerColor); AddProperty(L"renderpath", &CRenderer::JSI_GetRenderPath, &CRenderer::JSI_SetRenderPath); AddProperty(L"sortAllTransparent", &CRenderer::m_SortAllTransparent); AddProperty(L"displayFrustum", &CRenderer::m_DisplayFrustum); AddProperty(L"shadowZBias", &CRenderer::m_ShadowZBias); AddProperty(L"shadowMapSize", &CRenderer::m_ShadowMapSize); AddProperty(L"disableCopyShadow", &CRenderer::m_DisableCopyShadow); AddProperty(L"shadows", &CRenderer::JSI_GetShadows, &CRenderer::JSI_SetShadows); AddProperty(L"depthTextureBits", &CRenderer::JSI_GetDepthTextureBits, &CRenderer::JSI_SetDepthTextureBits); AddProperty(L"shadowAlphaFix", &CRenderer::JSI_GetShadowAlphaFix, &CRenderer::JSI_SetShadowAlphaFix); AddProperty(L"skipSubmit", &CRenderer::m_SkipSubmit); AddProperty(L"skySet", &CRenderer::JSI_GetSky, &CRenderer::JSI_SetSky); CJSObject::ScriptingInit("Renderer"); } CTextureManager& CRenderer::GetTextureManager() { return m->textureManager; } CShaderManager& CRenderer::GetShaderManager() { return m->shaderManager; } CParticleManager& CRenderer::GetParticleManager() { return m->particleManager; } Index: ps/trunk/source/lib/allocators/unique_range.cpp =================================================================== --- ps/trunk/source/lib/allocators/unique_range.cpp (nonexistent) +++ ps/trunk/source/lib/allocators/unique_range.cpp (revision 9350) @@ -0,0 +1,40 @@ +#include "precompiled.h" +#include "lib/allocators/unique_range.h" + +#include "lib/sysdep/cpu.h" // cpu_AtomicAdd +#include "lib/sysdep/rtl.h" // rtl_FreeAligned + + +static void UniqueRangeDeleterNone(void* UNUSED(pointer), size_t UNUSED(size)) +{ + // (introducing this do-nothing function avoids having to check whether deleter != 0) +} + +static void UniqueRangeDeleterAligned(void* pointer, size_t UNUSED(size)) +{ + return rtl_FreeAligned(pointer); +} + + +static UniqueRangeDeleter deleters[idxDeleterBits+1] = { UniqueRangeDeleterNone, UniqueRangeDeleterAligned }; + +static IdxDeleter numDeleters = 2; + + +IdxDeleter AddUniqueRangeDeleter(UniqueRangeDeleter deleter) +{ + debug_assert(deleter); + IdxDeleter idxDeleter = cpu_AtomicAdd(&numDeleters, 1); + debug_assert(idxDeleter < (IdxDeleter)ARRAY_SIZE(deleters)); + deleters[idxDeleter] = deleter; + return idxDeleter; +} + + +void CallUniqueRangeDeleter(void* pointer, size_t size, IdxDeleter idxDeleter) throw() +{ + ASSERT(idxDeleter < numDeleters); + // (some deleters do not tolerate null pointers) + if(pointer) + deleters[idxDeleter](pointer, size); +} Index: ps/trunk/source/lib/allocators/unique_range.h =================================================================== --- ps/trunk/source/lib/allocators/unique_range.h (nonexistent) +++ ps/trunk/source/lib/allocators/unique_range.h (revision 9350) @@ -0,0 +1,185 @@ +#ifndef INCLUDED_UNIQUE_RANGE +#define INCLUDED_UNIQUE_RANGE + + +#define ASSERT debug_assert + + +#include "lib/lib_api.h" + +// we usually don't hold multiple references to allocations, so unique_ptr +// can be used instead of the more complex (ICC generated incorrect code on +// 2 occasions) and expensive shared_ptr. +// a custom deleter is required because allocators such as ReserveAddressSpace need to +// pass the size to their deleter. we want to mix pointers from various allocators, but +// unique_ptr's deleter is fixed at compile-time, so it would need to be general enough +// to handle all allocators. +// storing the size and a function pointer would be one such solution, with the added +// bonus of no longer requiring a complete type at the invocation of ~unique_ptr. +// however, this inflates the pointer size to 3 words. if only a few allocator types +// are needed, we can replace the function pointer with an index stashed into the +// lower bits of the pointer (safe because allocations are always aligned to the +// word size). +typedef intptr_t IdxDeleter; + +// no-op deleter (use when returning part of an existing allocation) +// must be zero because reset() sets address (which includes idxDeleter) to zero. +static const IdxDeleter idxDeleterNone = 0; + +static const IdxDeleter idxDeleterAligned = 1; + +// (temporary value to prevent concurrent calls to AddUniqueRangeDeleter) +static const IdxDeleter idxDeleterBusy = -IdxDeleter(1); + +// governs the maximum number of IdxDeleter and each pointer's alignment requirements +static const IdxDeleter idxDeleterBits = 0x7; + +typedef void (*UniqueRangeDeleter)(void* pointer, size_t size); + +/** + * @return the next available IdxDeleter and associate it with the deleter. + * halts the program if the idxDeleterBits limit has been reached. + * + * thread-safe, but no attempt is made to detect whether the deleter has already been + * registered (would require a mutex). each allocator must ensure they only call this once. + **/ +LIB_API IdxDeleter AddUniqueRangeDeleter(UniqueRangeDeleter deleter); + +LIB_API void CallUniqueRangeDeleter(void* pointer, size_t size, IdxDeleter idxDeleter) throw(); + + +// unfortunately, unique_ptr allows constructing without a custom deleter. to ensure callers can +// rely upon pointers being associated with a size, we introduce a `UniqueRange' replacement. +// its interface is identical to unique_ptr except for the constructors, the addition of +// size() and the removal of operator bool (which avoids implicit casts to int). +class UniqueRange +{ +public: + typedef void* pointer; + typedef void element_type; + + UniqueRange() + { + Set(0, 0, idxDeleterNone); + } + + UniqueRange(pointer p, size_t size, IdxDeleter deleter) + { + Set(p, size, deleter); + } + + UniqueRange(RVREF(UniqueRange) rvref) + { + UniqueRange& rhs = LVALUE(rvref); + address_ = rhs.address_; + size_ = rhs.size_; + rhs.address_ = 0; + } + + UniqueRange& operator=(RVREF(UniqueRange) rvref) + { + UniqueRange& rhs = LVALUE(rvref); + if(this != &rhs) + { + Delete(); + address_ = rhs.address_; + size_ = rhs.size_; + rhs.address_ = 0; + } + return *this; + } + + ~UniqueRange() + { + Delete(); + } + + pointer get() const + { + return pointer(address_ & ~idxDeleterBits); + } + + IdxDeleter get_deleter() const + { + return IdxDeleter(address_ & idxDeleterBits); + } + + size_t size() const + { + return size_; + } + + // side effect: subsequent get_deleter will return idxDeleterNone + pointer release() // relinquish ownership + { + pointer ret = get(); + address_ = 0; + return ret; + } + + void reset() + { + Delete(); + address_ = 0; + } + + void reset(pointer p, size_t size, IdxDeleter deleter) + { + Delete(); + Set(p, size, deleter); + } + + void swap(UniqueRange& rhs) + { + std::swap(address_, rhs.address_); + std::swap(size_, rhs.size_); + } + +private: + void Set(pointer p, size_t size, IdxDeleter deleter) + { + ASSERT((uintptr_t(p) & idxDeleterBits) == 0); + ASSERT(deleter <= idxDeleterBits); + + address_ = uintptr_t(p) | deleter; + size_ = size; + + ASSERT(get() == p); + ASSERT(get_deleter() == deleter); + ASSERT(this->size() == size); + } + + void Delete() + { + CallUniqueRangeDeleter(get(), size(), get_deleter()); + } + + // disallow construction and assignment from lvalue + UniqueRange(const UniqueRange&); + UniqueRange& operator=(const UniqueRange&); + + // (IdxDeleter is stored in the lower bits of address since size might not even be a multiple of 4.) + uintptr_t address_; + size_t size_; +}; + +namespace std { + +static inline void swap(UniqueRange& p1, UniqueRange& p2) +{ + p1.swap(p2); +} + +static inline void swap(RVREF(UniqueRange) p1, UniqueRange& p2) +{ + p2.swap(LVALUE(p1)); +} + +static inline void swap(UniqueRange& p1, RVREF(UniqueRange) p2) +{ + p1.swap(LVALUE(p2)); +} + +} + +#endif // #ifndef INCLUDED_UNIQUE_RANGE