Index: ps/trunk/source/lib/sysdep/arch/x86_x64/cache.h =================================================================== --- ps/trunk/source/lib/sysdep/arch/x86_x64/cache.h +++ ps/trunk/source/lib/sysdep/arch/x86_x64/cache.h @@ -1,144 +0,0 @@ -/* Copyright (C) 2018 Wildfire Games. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef INCLUDED_X86_X64_CACHE -#define INCLUDED_X86_X64_CACHE - -namespace x86_x64 { - -struct Cache // POD (may be used before static constructors) -{ - enum Type - { - // (values match the CPUID.4 definition) - kNull, - kData, - kInstruction, - kUnified - // note: further values are "reserved" - }; - - static const size_t maxLevels = 4; - - static const size_t fullyAssociative = 0xFF; // (CPUID.4 definition) - - /** - * 1..maxLevels - **/ - size_t m_Level; - - /** - * never kNull - **/ - Type m_Type; - - /** - * if 0, the cache is disabled and all other values are zero - **/ - size_t m_NumEntries; - - /** - * NB: cache entries are lines, TLB entries are pages - **/ - size_t m_EntrySize; - - /** - * = fullyAssociative or the actual ways of m_Associativity - **/ - size_t m_Associativity; - - /** - * how many logical processors share this cache? - **/ - size_t m_SharedBy; - - void Initialize(size_t level, Type type) - { - m_Level = level; - m_Type = type; - m_NumEntries = 0; - m_EntrySize = 0; - m_Associativity = 0; - m_SharedBy = 0; - - ENSURE(Validate()); - } - - bool Validate() const - { - if(!(1 <= m_Level && m_Level <= maxLevels)) - return false; - - if(m_Type == kNull) - return false; - - if(m_NumEntries == 0) // disabled - { - if(m_EntrySize != 0) - return false; - if(m_Associativity != 0) - return false; - if(m_SharedBy != 0) - return false; - } - else - { - if(m_EntrySize == 0) - return false; - if(m_Associativity == 0 || m_Associativity > fullyAssociative) - return false; - if(m_SharedBy == 0) - return false; - } - - return true; - } - - u64 TotalSize() const - { - return u64(m_NumEntries)*m_EntrySize; - } -}; - -enum IdxCache -{ - // (AddCache relies upon this order) - L1D = 1, - L2D, - L3D, - L4D, - L1I, - L2I, - L3I, - L4I, - TLB -}; - -/** - * @return 0 if idxCache >= TLB+numTLBs, otherwise a valid pointer to - * a Cache whose m_NumEntries is 0 if disabled / not present. - **/ -LIB_API const Cache* Caches(size_t idxCache); - -} // namespace x86_x64 - -#endif // #ifndef INCLUDED_X86_X64_CACHE Index: ps/trunk/source/lib/sysdep/arch/x86_x64/cache.cpp =================================================================== --- ps/trunk/source/lib/sysdep/arch/x86_x64/cache.cpp +++ ps/trunk/source/lib/sysdep/arch/x86_x64/cache.cpp @@ -1,663 +0,0 @@ -/* Copyright (C) 2020 Wildfire Games. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "precompiled.h" -#include "lib/sysdep/arch/x86_x64/cache.h" - -#include "lib/bits.h" -#include "lib/alignment.h" -#include "lib/module_init.h" -#include "lib/sysdep/os_cpu.h" -#include "lib/sysdep/arch/x86_x64/x86_x64.h" - -#include - -namespace x86_x64 { - -static const size_t maxTLBs = 2*2*4; // (level0, level1) x (D,I) x (4K, 2M, 4M, 1G) -static size_t numTLBs = 0; - -static const size_t numCaches = x86_x64::Cache::maxLevels * 2 + maxTLBs; -static Cache caches[numCaches]; - - -static void AddCache(const x86_x64::Cache& cache) -{ - ENSURE(cache.Validate()); - - if(cache.m_Type == x86_x64::Cache::kData || cache.m_Type == x86_x64::Cache::kUnified) - caches[L1D + cache.m_Level-1] = cache; - if(cache.m_Type == x86_x64::Cache::kInstruction || cache.m_Type == x86_x64::Cache::kUnified) - caches[L1I + cache.m_Level-1] = cache; -} - - -static void AddTLB(const x86_x64::Cache& tlb) -{ - ENSURE(tlb.Validate()); - ENSURE(tlb.m_Level == 1 || tlb.m_Level == 2); // see maxTLBs - - ENSURE(numTLBs < maxTLBs); - caches[TLB+numTLBs++] = tlb; -} - - -//----------------------------------------------------------------------------- -// AMD - -// (Intel has subsequently added support for function 0x80000006, but -// only returns ECX, i.e. L2 information.) -namespace AMD -{ - -static x86_x64::Cache L1Cache(u32 reg, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(1, type); - - const size_t lineSize = bits(reg, 0, 7); - const size_t associativity = bits(reg, 16, 23); // 0 = reserved - const size_t totalSize = bits(reg, 24, 31)*KiB; - if(lineSize != 0 && associativity != 0 && totalSize != 0) - { - cache.m_NumEntries = totalSize / lineSize; - cache.m_EntrySize = lineSize; - cache.m_Associativity = associativity; - cache.m_SharedBy = 1; - } - return cache; -} - -// applies to L2, L3 and TLB2 -static const size_t associativityTable[16] = -{ - 0, 1, 2, 0, 4, 0, 8, 0, - // TODO: The second '16' does not obey to the specifications and is only a workaround. For a correct implementation please look here: https://community.amd.com/thread/244207 - 16, 16, 32, 48, 64, 96, 128, x86_x64::Cache::fullyAssociative -}; - -static x86_x64::Cache L2Cache(u32 reg, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(2, type); - - const size_t lineSize = bits(reg, 0, 7); - const size_t idxAssociativity = bits(reg, 12, 15); // 0 = disabled - const size_t totalSize = bits(reg, 16, 31)*KiB; - if(lineSize != 0 && idxAssociativity != 0 && totalSize != 0) - { - cache.m_NumEntries = totalSize / lineSize; - cache.m_EntrySize = lineSize; - cache.m_Associativity = associativityTable[idxAssociativity]; - cache.m_SharedBy = 1; - } - return cache; -} - -// (same as L2 except for the size) -static x86_x64::Cache L3Cache(u32 reg, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(3, type); - - const size_t lineSize = bits(reg, 0, 7); - const size_t idxAssociativity = bits(reg, 12, 15); // 0 = disabled - const size_t totalSize = bits(reg, 18, 31)*512*KiB; // (rounded down) - // NB: some Athlon 64 X2 models have no L3 cache - if(lineSize != 0 && idxAssociativity != 0 && totalSize != 0) - { - cache.m_NumEntries = totalSize / lineSize; - cache.m_EntrySize = lineSize; - cache.m_Associativity = associativityTable[idxAssociativity]; - cache.m_SharedBy = 1; - } - return cache; -} - -static x86_x64::Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(1, type); - - const size_t numEntries = bits(reg, bitOffset+0, bitOffset+ 7); - const size_t associativity = bits(reg, bitOffset+8, bitOffset+15); // 0 = reserved - if(numEntries != 0 && associativity != 0) - { - cache.m_NumEntries = numEntries; - cache.m_EntrySize = pageSize; - cache.m_Associativity = associativity; - cache.m_SharedBy = 1; - } - return cache; -} - -static x86_x64::Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(2, type); - - const size_t numEntries = bits(reg, bitOffset+ 0, bitOffset+11); - const size_t idxAssociativity = bits(reg, bitOffset+12, bitOffset+15); // 0 = disabled - if(numEntries != 0 && idxAssociativity != 0) - { - cache.m_NumEntries = numEntries; - cache.m_EntrySize = pageSize; - cache.m_Associativity = associativityTable[idxAssociativity]; - cache.m_SharedBy = 1; - } - return cache; -} - -static void AddTLB2Pair(u32 reg, size_t pageSize) -{ - x86_x64::Cache::Type type = x86_x64::Cache::kUnified; - if(bits(reg, 16, 31) != 0) // not unified - { - AddTLB(TLB2(reg, 16, pageSize, x86_x64::Cache::kData)); - type = x86_x64::Cache::kInstruction; - } - AddTLB(TLB2(reg, 0, pageSize, type)); -} - -// AMD reports maxCpuidIdFunction > 4 but consider functions 2..4 to be -// "reserved". cache characteristics are returned via ext. functions. -static void DetectCacheAndTLB() -{ - x86_x64::CpuidRegs regs = { 0 }; - - regs.eax = 0x80000005; - if(x86_x64::cpuid(®s)) - { - AddCache(L1Cache(regs.ecx, x86_x64::Cache::kData)); - AddCache(L1Cache(regs.edx, x86_x64::Cache::kInstruction)); - - AddTLB(TLB1(regs.eax, 0, 2*MiB, x86_x64::Cache::kInstruction)); - AddTLB(TLB1(regs.eax, 16, 2*MiB, x86_x64::Cache::kData)); - AddTLB(TLB1(regs.ebx, 0, 4*KiB, x86_x64::Cache::kInstruction)); - AddTLB(TLB1(regs.ebx, 16, 4*KiB, x86_x64::Cache::kData)); - } - - regs.eax = 0x80000006; - if(x86_x64::cpuid(®s)) - { - AddCache(L2Cache(regs.ecx, x86_x64::Cache::kUnified)); - AddCache(L3Cache(regs.edx, x86_x64::Cache::kUnified)); - - AddTLB2Pair(regs.eax, 2*MiB); - AddTLB2Pair(regs.ebx, 4*KiB); - } -} - -} // namespace AMD - - -//----------------------------------------------------------------------------- -// CPUID.4 - -namespace CPUID4 { - -static bool DetectCache() -{ - // note: level order is unspecified (see Intel AP-485) - for(u32 count = 0; ; count++) - { - x86_x64::CpuidRegs regs = { 0 }; - regs.eax = 4; - regs.ecx = count; - if(!x86_x64::cpuid(®s)) - return false; - - const x86_x64::Cache::Type type = (x86_x64::Cache::Type)bits(regs.eax, 0, 4); - if(type == x86_x64::Cache::kNull) // no more remaining - break; - - const size_t level = (size_t)bits(regs.eax, 5, 7); - const size_t partitions = (size_t)bits(regs.ebx, 12, 21)+1; - const size_t sets = (size_t)bits(regs.ecx, 0, 31)+1; - - x86_x64::Cache cache; - cache.Initialize(level, type); - cache.m_EntrySize = static_cast(bits(regs.ebx, 0, 11) + 1); // (yes, this also uses +1 encoding) - cache.m_Associativity = static_cast(bits(regs.ebx, 22, 31) + 1); - cache.m_SharedBy = static_cast(bits(regs.eax, 14, 25) + 1); - cache.m_NumEntries = cache.m_Associativity * partitions * sets; - - AddCache(cache); - } - - return true; -} - -} // namespace CPUID4 - - -//----------------------------------------------------------------------------- -// CPUID.2 (descriptors) - -namespace CPUID2 { - -typedef u8 Descriptor; -typedef std::vector Descriptors; - -static void AppendDescriptors(u32 reg, Descriptors& descriptors) -{ - if(IsBitSet(reg, 31)) // register contents are reserved - return; - for(int pos = 24; pos >= 0; pos -= 8) - { - const u8 descriptor = (u8)bits(reg, pos, pos+7); - if(descriptor != 0) - descriptors.push_back(descriptor); - } -} - - -static Descriptors GetDescriptors() -{ - // ensure consistency by pinning to a CPU. - // (don't use a hard-coded mask because process affinity may be restricted) - const uintptr_t allProcessors = os_cpu_ProcessorMask(); - const uintptr_t firstProcessor = allProcessors & -intptr_t(allProcessors); - const uintptr_t prevAffinityMask = os_cpu_SetThreadAffinityMask(firstProcessor); - - x86_x64::CpuidRegs regs = { 0 }; - regs.eax = 2; - if(!x86_x64::cpuid(®s)) - return Descriptors(); - - Descriptors descriptors; - size_t iterations = bits(regs.eax, 0, 7); - for(;;) // abort mid-loop (invoke CPUID exactly times) - { - AppendDescriptors(bits(regs.eax, 8, 31), descriptors); - AppendDescriptors(regs.ebx, descriptors); - AppendDescriptors(regs.ecx, descriptors); - AppendDescriptors(regs.edx, descriptors); - if(--iterations == 0) - break; - regs.eax = 2; - const bool ok = x86_x64::cpuid(®s); - ENSURE(ok); - } - - os_cpu_SetThreadAffinityMask(prevAffinityMask); - - return descriptors; -} - - -// note: the following cannot be moved into a function because -// ARRAY_SIZE's template argument must not reference a local type. - -enum Flags -{ - // level (bits 0..1) - L1 = 1, - L2, - L3, - - // type (bits 2..3) - I = 0x04, // instruction - D = 0x08, // data - U = I|D // unified - - // largeSize (bits 4..31 with bits 0..3 zeroed): TLB entrySize or cache numEntries -}; - -// (there are > 100 descriptors, so we squeeze all fields into 8 bytes.) -struct Characteristics // POD -{ - x86_x64::Cache::Type Type() const - { - switch(flags & U) - { - case D: - return x86_x64::Cache::kData; - case I: - return x86_x64::Cache::kInstruction; - case U: - return x86_x64::Cache::kUnified; - default: - DEBUG_WARN_ERR(ERR::LOGIC); - return x86_x64::Cache::kNull; - } - } - - size_t Level() const - { - const size_t level = flags & 3; - ENSURE(level != 0); - return level; - } - - bool IsTLB() const - { - return smallSize >= 0; - } - - size_t NumEntries() const - { - return IsTLB()? (size_t)smallSize : (flags & ~0xF); - } - - size_t EntrySize() const - { - return IsTLB()? (flags & ~0xF) : (size_t)(-smallSize); - } - - u8 descriptor; - u8 associativity; - i16 smallSize; // negative cache entrySize or TLB numEntries - u32 flags; // level, type, largeSize -}; - -static const u8 F = x86_x64::Cache::fullyAssociative; - -#define CACHE(descriptor, flags, totalSize, assoc, entrySize) { descriptor, assoc, -entrySize, flags | ((totalSize)/(entrySize)) } -#define TLB(descriptor, flags, entrySize, assoc, numEntries) { descriptor, assoc, numEntries, flags | (entrySize) } - -// (we need to include cache descriptors because early Pentium4 don't implement CPUID.4) -// references: [accessed 2011-02-26] -// AP485 http://www.intel.com/Assets/PDF/appnote/241618.pdf -// sdman http://www.intel.com/Assets/PDF/manual/253666.pdf -// sandp http://www.sandpile.org/ia32/cpuid.htm -// opsol http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/i86pc/os/cpuid.c -static const Characteristics characteristicsTable[] = -{ - TLB (0x01, L1|I, 4*KiB, 4, 32), - TLB (0x02, L1|I, 4*MiB, F, 2), - TLB (0x03, L1|D, 4*KiB, 4, 64), - TLB (0x04, L1|D, 4*MiB, 4, 8), - TLB (0x05, L1|D, 4*MiB, 4, 32), - - CACHE(0x06, L1|I, 8*KiB, 4, 32), - CACHE(0x08, L1|I, 16*KiB, 4, 32), - CACHE(0x09, L1|I, 32*KiB, 4, 64), - CACHE(0x0A, L1|I, 8*KiB, 2, 32), - - TLB (0x0B, L1|I, 4*MiB, 4, 4), - - CACHE(0x0C, L1|D, 16*KiB, 4, 32), - CACHE(0x0D, L1|D, 16*KiB, 4, 64), // opsol: 32B (would be redundant with 0x0C), AP485: 64B, sdman: 64B - CACHE(0x0E, L1|D, 24*KiB, 6, 64), - - CACHE(0x21, L2|U, 256*KiB, 8, 64), - - CACHE(0x22, L3|U, 512*KiB, 4, 64), - CACHE(0x23, L3|U, 1*MiB, 8, 64), - CACHE(0x25, L3|U, 2*MiB, 8, 64), - CACHE(0x29, L3|U, 4*MiB, 8, 64), - - CACHE(0x2c, L1|D, 32*KiB, 8, 64), - - CACHE(0x30, L1|I, 32*KiB, 8, 64), - - CACHE(0x39, L2|U, 128*KiB, 4, 64), - CACHE(0x3A, L2|U, 192*KiB, 6, 64), - CACHE(0x3B, L2|U, 128*KiB, 2, 64), - CACHE(0x3C, L2|U, 256*KiB, 4, 64), - CACHE(0x3D, L2|U, 384*KiB, 6, 64), - CACHE(0x3E, L2|U, 512*KiB, 4, 64), - CACHE(0x41, L2|U, 128*KiB, 4, 32), - CACHE(0x42, L2|U, 256*KiB, 4, 32), - CACHE(0x43, L2|U, 512*KiB, 4, 32), - CACHE(0x44, L2|U, 1*MiB, 4, 32), - CACHE(0x45, L2|U, 2*MiB, 4, 32), - - CACHE(0x46, L3|U, 4*MiB, 4, 64), - CACHE(0x47, L3|U, 8*MiB, 8, 64), - CACHE(0x48, L2|U, 3*MiB, 12, 64), - CACHE(0x49, L2|U, 4*MiB, 16, 64), - CACHE(0x49, L3|U, 4*MiB, 16, 64), - CACHE(0x4A, L3|U, 6*MiB, 12, 64), - CACHE(0x4B, L3|U, 8*MiB, 16, 64), - CACHE(0x4C, L3|U, 12*MiB, 12, 64), - CACHE(0x4D, L3|U, 16*MiB, 16, 64), - CACHE(0x4E, L2|U, 6*MiB, 24, 64), - - TLB (0x4F, L1|I, 4*KiB, F, 32), // sandp: unknown assoc, opsol: full, AP485: unspecified - TLB (0x50, L1|I, 4*KiB, F, 64), - TLB (0x50, L1|I, 4*MiB, F, 64), - TLB (0x50, L1|I, 2*MiB, F, 64), - TLB (0x51, L1|I, 4*KiB, F, 128), - TLB (0x51, L1|I, 4*MiB, F, 128), - TLB (0x51, L1|I, 2*MiB, F, 128), - TLB (0x52, L1|I, 4*KiB, F, 256), - TLB (0x52, L1|I, 4*MiB, F, 256), - TLB (0x52, L1|I, 2*MiB, F, 256), - TLB (0x55, L1|I, 4*MiB, F, 7), - TLB (0x55, L1|I, 2*MiB, F, 7), - - TLB (0x56, L1|D, 4*MiB, 4, 16), - TLB (0x57, L1|D, 4*KiB, 4, 16), - TLB (0x59, L1|D, 4*KiB, F, 16), - TLB (0x5A, L1|D, 4*MiB, 4, 32), - TLB (0x5A, L1|D, 2*MiB, 4, 32), - TLB (0x5B, L1|D, 4*KiB, F, 64), - TLB (0x5B, L1|D, 4*MiB, F, 64), - TLB (0x5C, L1|D, 4*KiB, F, 128), - TLB (0x5C, L1|D, 4*MiB, F, 128), - TLB (0x5D, L1|D, 4*KiB, F, 256), - TLB (0x5D, L1|D, 4*MiB, F, 256), - - CACHE(0x60, L1|D, 16*KiB, 8, 64), - TLB (0x63, L1|D, 1*GiB, 4, 4), // speculation - CACHE(0x66, L1|D, 8*KiB, 4, 64), - CACHE(0x67, L1|D, 16*KiB, 4, 64), - CACHE(0x68, L1|D, 32*KiB, 4, 64), - - CACHE(0x70, L1|I, 12*KiB, 8, 1), - CACHE(0x71, L1|I, 16*KiB, 8, 1), - CACHE(0x72, L1|I, 32*KiB, 8, 1), - CACHE(0x73, L1|I, 64*KiB, 8, 1), - - TLB (0x76, L1|I, 4*MiB, F, 8), // AP485: internally inconsistent, sdman: TLB - TLB (0x76, L1|I, 2*MiB, F, 8), - - CACHE(0x78, L2|U, 1*MiB, 4, 64), - CACHE(0x79, L2|U, 128*KiB, 8, 64), - CACHE(0x7A, L2|U, 256*KiB, 8, 64), - CACHE(0x7B, L2|U, 512*KiB, 8, 64), - CACHE(0x7C, L2|U, 1*MiB, 8, 64), - CACHE(0x7D, L2|U, 2*MiB, 8, 64), - CACHE(0x7F, L2|U, 512*KiB, 2, 64), - - CACHE(0x80, L2|U, 512*KiB, 8, 64), - CACHE(0x82, L2|U, 256*KiB, 8, 32), - CACHE(0x83, L2|U, 512*KiB, 8, 32), - CACHE(0x84, L2|U, 1*MiB, 8, 32), - CACHE(0x85, L2|U, 2*MiB, 8, 32), - CACHE(0x86, L2|U, 512*KiB, 4, 64), - CACHE(0x87, L2|U, 1*MiB, 8, 64), - - TLB (0xB0, L1|I, 4*KiB, 4, 128), - TLB (0xB1, L1|I, 2*MiB, 4, 8), - TLB (0xB1, L1|I, 4*MiB, 4, 4), - TLB (0xB2, L1|I, 4*KiB, 4, 64), - - TLB (0xB3, L1|D, 4*KiB, 4, 128), - TLB (0xB3, L1|D, 4*MiB, 4, 128), - TLB (0xB4, L1|D, 4*KiB, 4, 256), - TLB (0xB4, L1|D, 4*MiB, 4, 256), - TLB (0xB5, L1|I, 4*KiB, 4, 128), // speculation - TLB (0xB6, L1|I, 4*KiB, 8, 128), // http://software.intel.com/en-us/forums/topic/401012 - - TLB (0xBA, L1|D, 4*KiB, 4, 64), - TLB (0xC0, L1|D, 4*KiB, 4, 8), - TLB (0xC0, L1|D, 4*MiB, 4, 8), - TLB (0xC1, L2|U, 4*KiB, 8, 1024), // http://software.intel.com/en-us/forums/topic/401012 - TLB (0xC1, L2|U, 4*MiB, 8, 1024), - TLB (0xC1, L2|U, 2*MiB, 8, 1024), - TLB (0xCA, L2|U, 4*KiB, 4, 512), - - CACHE(0xD0, L3|U, 512*KiB, 4, 64), - CACHE(0xD1, L3|U, 1*MiB, 4, 64), - CACHE(0xD2, L3|U, 2*MiB, 4, 64), - CACHE(0xD6, L3|U, 1*MiB, 8, 64), - CACHE(0xD7, L3|U, 2*MiB, 8, 64), - CACHE(0xD8, L3|U, 4*MiB, 8, 64), - CACHE(0xDC, L3|U, 3*MiB/2, 12, 64), - CACHE(0xDD, L3|U, 3*MiB, 12, 64), - CACHE(0xDE, L3|U, 6*MiB, 12, 64), - CACHE(0xE2, L3|U, 2*MiB, 16, 64), - CACHE(0xE3, L3|U, 4*MiB, 16, 64), - CACHE(0xE4, L3|U, 8*MiB, 16, 64), - CACHE(0xEA, L3|U, 12*MiB, 24, 64), - CACHE(0xEB, L3|U, 18*MiB, 24, 64), - CACHE(0xEC, L3|U, 24*MiB, 24, 64), -}; -#undef CACHE -#undef TLB - -static const Characteristics* CharacteristicsFromDescriptor(Descriptor descriptor) -{ - // note: we can't use bsearch because characteristicsTable contains multiple - // entries with the same descriptor. - for(size_t i = 0; i < ARRAY_SIZE(characteristicsTable); i++) - { - const Characteristics& characteristics = characteristicsTable[i]; - if(characteristics.descriptor == descriptor) - return &characteristics; - } - - debug_printf("Unknown cache/TLB descriptor 0x%x\n", (unsigned int)descriptor); - return 0; -} - - -enum DescriptorFlags -{ - SKIP_CACHE_DESCRIPTORS = 1, - NO_LAST_LEVEL_CACHE = 2, - PREFETCH64 = 64, - PREFETCH128 = 128 -}; - -static bool HandleSpecialDescriptor(Descriptor descriptor, size_t& descriptorFlags) -{ - switch(descriptor) - { - case 0: // carries no information - return true; - - case 0x40: - descriptorFlags |= NO_LAST_LEVEL_CACHE; - return true; - - case 0xF0: - descriptorFlags |= PREFETCH64; - return true; - - case 0xF1: - descriptorFlags |= PREFETCH128; - return true; - - case 0xFF: // descriptors don't include caches (use CPUID.4 instead) - descriptorFlags |= SKIP_CACHE_DESCRIPTORS; - return true; - - default: - return false; - } -} - - -static void DetectCacheAndTLB(size_t& descriptorFlags) -{ - const Descriptors descriptors = GetDescriptors(); - for(Descriptors::const_iterator it = descriptors.begin(); it != descriptors.end(); ++it) - { - const Descriptor descriptor = *it; - if(HandleSpecialDescriptor(descriptor, descriptorFlags)) - continue; - - const Characteristics* characteristics = CharacteristicsFromDescriptor(*it); - if(!characteristics) - continue; - - if((descriptorFlags & SKIP_CACHE_DESCRIPTORS) && !characteristics->IsTLB()) - continue; - - x86_x64::Cache cache; - cache.Initialize(characteristics->Level(), characteristics->Type()); - cache.m_NumEntries = characteristics->NumEntries(); - cache.m_EntrySize = characteristics->EntrySize(); - cache.m_Associativity = characteristics->associativity; - cache.m_SharedBy = 1; // (safe default) - if(characteristics->IsTLB()) - AddTLB(cache); - else - AddCache(cache); - } -} - -} // namespace CPUID2 - - -static Status DetectCacheAndTLB() -{ - // ensure all cache entries are initialized (DetectCache* might not set them all) - for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++) - { - caches[L1D+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kData); - caches[L1I+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kInstruction); - } - - if(x86_x64::Vendor() == x86_x64::VENDOR_AMD) - AMD::DetectCacheAndTLB(); - else - { - size_t descriptorFlags = 0; - if(CPUID4::DetectCache()) // success, ignore less reliable CPUID.2 cache information - descriptorFlags |= CPUID2::SKIP_CACHE_DESCRIPTORS; - CPUID2::DetectCacheAndTLB(descriptorFlags); - } - - // sanity checks - for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++) - { - ENSURE(caches[L1D+idxLevel].m_Type == x86_x64::Cache::kData || caches[L1D+idxLevel].m_Type == x86_x64::Cache::kUnified); - ENSURE(caches[L1D+idxLevel].m_Level == idxLevel+1); - ENSURE(caches[L1D+idxLevel].Validate() == true); - - ENSURE(caches[L1I+idxLevel].m_Type == x86_x64::Cache::kInstruction || caches[L1I+idxLevel].m_Type == x86_x64::Cache::kUnified); - ENSURE(caches[L1I+idxLevel].m_Level == idxLevel+1); - ENSURE(caches[L1I+idxLevel].Validate() == true); - } - for(size_t i = 0; i < numTLBs; i++) - ENSURE(caches[TLB+i].Validate() == true); - - return INFO::OK; -} - -const x86_x64::Cache* Caches(size_t idxCache) -{ - static ModuleInitState initState; - ModuleInit(&initState, DetectCacheAndTLB); - - if(idxCache >= TLB+numTLBs) - return 0; - - return &caches[idxCache]; -} - -} // namespace x86_x64 Index: ps/trunk/source/lib/sysdep/arch/x86_x64/topology.h =================================================================== --- ps/trunk/source/lib/sysdep/arch/x86_x64/topology.h +++ ps/trunk/source/lib/sysdep/arch/x86_x64/topology.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Wildfire Games. +/* Copyright (C) 2020 Wildfire Games. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the @@ -80,32 +80,6 @@ **/ LIB_API ApicId ApicIdFromIndices(size_t idxPackage, size_t idxCore, size_t idxLogical); - -//----------------------------------------------------------------------------- -// L2 cache - -// knowledge of the cache topology, i.e. which processors share which caches, -// can be used to reduce contention and increase effective capacity by -// assigning the partner processors to work on the same dataset. -// -// example: Intel Core2 micro-architectures feature L2 caches shared by -// two cores. - -/** - * @return number of distinct L2 caches. - **/ -LIB_API size_t NumCaches(); - -/** - * @return L2 cache number (zero-based) to which the given processor belongs. - **/ -LIB_API size_t CacheFromProcessor(size_t processor); - -/** - * @return bit-mask of all processors sharing the given cache. - **/ -LIB_API uintptr_t ProcessorMaskFromCache(size_t cache); - } // namespace topology #endif // #ifndef INCLUDED_X86_X64_TOPOLOGY Index: ps/trunk/source/lib/sysdep/arch/x86_x64/topology.cpp =================================================================== --- ps/trunk/source/lib/sysdep/arch/x86_x64/topology.cpp +++ ps/trunk/source/lib/sysdep/arch/x86_x64/topology.cpp @@ -21,7 +21,7 @@ */ /* - * detection of CPU and cache topology + * Detection of CPU topology */ #include "precompiled.h" @@ -33,7 +33,6 @@ #include "lib/sysdep/os_cpu.h" #include "lib/sysdep/numa.h" #include "lib/sysdep/arch/x86_x64/x86_x64.h" -#include "lib/sysdep/arch/x86_x64/cache.h" #include "lib/sysdep/arch/x86_x64/apic.h" #include @@ -42,7 +41,7 @@ namespace topology { //--------------------------------------------------------------------------------------------------------------------- -// detect *maximum* number of cores/packages/caches. +// detect *maximum* number of cores/packages. // note: some of them may be disabled by the OS or BIOS. // note: Intel Appnote 485 assures us that they are uniform across packages. @@ -111,13 +110,6 @@ return 1; } - -static size_t MaxLogicalPerCache() -{ - return x86_x64::Caches(x86_x64::L2D)->m_SharedBy; -} - - //--------------------------------------------------------------------------------------------------------------------- // CPU topology interface @@ -299,177 +291,4 @@ return ApicIdFromContiguousId(contiguousId); } - -//--------------------------------------------------------------------------------------------------------------------- -// cache topology - -// note: Windows 2003 GetLogicalProcessorInformation provides similar -// functionality but returns incorrect results. (it claims all cores in -// an Intel Core2 Quad processor share a single L2 cache.) - -class CacheRelations -{ -public: - /** - * add processor to the processor mask owned by cache identified by \ - **/ - void Add(u8 cacheId, size_t processor) - { - SharedCache* cache = Find(cacheId); - if(!cache) - { - m_caches.push_back(cacheId); - cache = &m_caches.back(); - } - cache->Add(processor); - } - - size_t NumCaches() const - { - return m_caches.size(); - } - - /** - * store topology in an array (one entry per cache) of masks - * representing the processors that share a cache. - **/ - void StoreProcessorMasks(uintptr_t* cachesProcessorMask) - { - for(size_t i = 0; i < NumCaches(); i++) - cachesProcessorMask[i] = m_caches[i].ProcessorMask(); - } - -private: - /** - * stores ID and tracks which processors share this cache - **/ - class SharedCache - { - public: - SharedCache(u8 cacheId) - : m_cacheId(cacheId), m_processorMask(0) - { - } - - bool Matches(u8 cacheId) const - { - return m_cacheId == cacheId; - } - - void Add(size_t processor) - { - m_processorMask |= uintptr_t(1) << processor; - } - - uintptr_t ProcessorMask() const - { - return m_processorMask; - } - - private: - u8 m_cacheId; - uintptr_t m_processorMask; - }; - - SharedCache* Find(u8 cacheId) - { - for(size_t i = 0; i < m_caches.size(); i++) - { - if(m_caches[i].Matches(cacheId)) - return &m_caches[i]; - } - - return 0; - } - - std::vector m_caches; -}; - -static void DetermineCachesProcessorMask(uintptr_t* cachesProcessorMask, size_t& numCaches) -{ - CacheRelations cacheRelations; - if(AreApicIdsReliable()) - { - const size_t numBits = ceil_log2(MaxLogicalPerCache()); - const u8 cacheIdMask = u8((0xFF << numBits) & 0xFF); - for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) - { - const ApicId apicId = ApicIdFromProcessor(processor); - const u8 cacheId = u8(apicId & cacheIdMask); - cacheRelations.Add(cacheId, processor); - } - } - else - { - for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) - { - // assume each processor has exactly one cache with matching IDs - const u8 cacheId = (u8)processor; - cacheRelations.Add(cacheId, processor); - } - } - - numCaches = cacheRelations.NumCaches(); - cacheRelations.StoreProcessorMasks(cachesProcessorMask); -} - - -static void DetermineProcessorsCache(const uintptr_t* cachesProcessorMask, size_t numCaches, size_t* processorsCache, size_t numProcessors) -{ - for(size_t cache = 0; cache < numCaches; cache++) - { - // write to all entries that share this cache - const uintptr_t processorMask = cachesProcessorMask[cache]; - for(size_t processor = 0; processor < numProcessors; processor++) - { - if(IsBitSet(processorMask, processor)) - { - ENSURE(processorsCache[processor] == 0); - processorsCache[processor] = cache; - } - } - } -} - - -//--------------------------------------------------------------------------------------------------------------------- -// cache topology interface - -struct CacheTopology // POD -{ - size_t numCaches; - size_t processorsCache[os_cpu_MaxProcessors]; - uintptr_t cachesProcessorMask[os_cpu_MaxProcessors]; -}; -static CacheTopology cacheTopology; -static ModuleInitState cacheInitState; - -static Status InitCacheTopology() -{ - ModuleInit(&cpuInitState, InitCpuTopology); - DetermineCachesProcessorMask(cacheTopology.cachesProcessorMask, cacheTopology.numCaches); - DetermineProcessorsCache(cacheTopology.cachesProcessorMask, cacheTopology.numCaches, cacheTopology.processorsCache, os_cpu_NumProcessors()); - return INFO::OK; -} - -size_t NumCaches() -{ - ModuleInit(&cacheInitState, InitCacheTopology); - return cacheTopology.numCaches; -} - -size_t CacheFromProcessor(size_t processor) -{ - ModuleInit(&cacheInitState, InitCacheTopology); - ENSURE(processor < os_cpu_NumProcessors()); - return cacheTopology.processorsCache[processor]; -} - -uintptr_t ProcessorMaskFromCache(size_t cache) -{ - ModuleInit(&cacheInitState, InitCacheTopology); - ENSURE(cache < cacheTopology.numCaches); - return cacheTopology.cachesProcessorMask[cache]; -} - } // namespace topology Index: ps/trunk/source/ps/GameSetup/HWDetect.cpp =================================================================== --- ps/trunk/source/ps/GameSetup/HWDetect.cpp +++ ps/trunk/source/ps/GameSetup/HWDetect.cpp @@ -31,7 +31,6 @@ #include "lib/sysdep/numa.h" #include "lib/sysdep/os_cpu.h" #if ARCH_X86_X64 -# include "lib/sysdep/arch/x86_x64/cache.h" # include "lib/sysdep/arch/x86_x64/topology.h" #endif #if CONFIG2_AUDIO @@ -75,63 +74,6 @@ static void ReportSDL(const ScriptInterface& scriptInterface, JS::HandleValue settings); static void ReportGLLimits(const ScriptInterface& scriptInterface, JS::HandleValue settings); -#if ARCH_X86_X64 -void ConvertCaches(const ScriptInterface& scriptInterface, x86_x64::IdxCache idxCache, JS::MutableHandleValue ret) -{ - ScriptRequest rq(scriptInterface); - - ScriptInterface::CreateArray(rq, ret); - - for (size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; ++idxLevel) - { - const x86_x64::Cache* pcache = x86_x64::Caches(idxCache+idxLevel); - if (pcache->m_Type == x86_x64::Cache::kNull || pcache->m_NumEntries == 0) - continue; - - JS::RootedValue cache(rq.cx); - - ScriptInterface::CreateObject( - rq, - &cache, - "type", static_cast(pcache->m_Type), - "level", static_cast(pcache->m_Level), - "associativity", static_cast(pcache->m_Associativity), - "linesize", static_cast(pcache->m_EntrySize), - "sharedby", static_cast(pcache->m_SharedBy), - "totalsize", static_cast(pcache->TotalSize())); - - scriptInterface.SetPropertyInt(ret, idxLevel, cache); - } -} - -void ConvertTLBs(const ScriptInterface& scriptInterface, JS::MutableHandleValue ret) -{ - ScriptRequest rq(scriptInterface); - - ScriptInterface::CreateArray(rq, ret); - - for(size_t i = 0; ; i++) - { - const x86_x64::Cache* ptlb = x86_x64::Caches(x86_x64::TLB+i); - if (!ptlb) - break; - - JS::RootedValue tlb(rq.cx); - - ScriptInterface::CreateObject( - rq, - &tlb, - "type", static_cast(ptlb->m_Type), - "level", static_cast(ptlb->m_Level), - "associativity", static_cast(ptlb->m_Associativity), - "pagesize", static_cast(ptlb->m_EntrySize), - "entries", static_cast(ptlb->m_NumEntries)); - - scriptInterface.SetPropertyInt(ret, i, tlb); - } -} -#endif - void SetDisableAudio(ScriptInterface::CmptPrivate* UNUSED(pCmptPrivate), bool disabled) { g_DisableAudio = disabled; @@ -240,7 +182,6 @@ scriptInterface.SetProperty(settings, "cpu_numpackages", (u32)topology::NumPackages()); scriptInterface.SetProperty(settings, "cpu_coresperpackage", (u32)topology::CoresPerPackage()); scriptInterface.SetProperty(settings, "cpu_logicalpercore", (u32)topology::LogicalPerCore()); - scriptInterface.SetProperty(settings, "cpu_numcaches", (u32)topology::NumCaches()); #endif scriptInterface.SetProperty(settings, "numa_numnodes", (u32)numa_NumNodes()); @@ -261,20 +202,12 @@ scriptInterface.SetProperty(settings, "x86_caps[1]", caps1); scriptInterface.SetProperty(settings, "x86_caps[2]", caps2); scriptInterface.SetProperty(settings, "x86_caps[3]", caps3); - - JS::RootedValue tmpVal(rq.cx); - ConvertCaches(scriptInterface, x86_x64::L1I, &tmpVal); - scriptInterface.SetProperty(settings, "x86_icaches", tmpVal); - ConvertCaches(scriptInterface, x86_x64::L1D, &tmpVal); - scriptInterface.SetProperty(settings, "x86_dcaches", tmpVal); - ConvertTLBs(scriptInterface, &tmpVal); - scriptInterface.SetProperty(settings, "x86_tlbs", tmpVal); #endif scriptInterface.SetProperty(settings, "timer_resolution", timer_Resolution()); // The version should be increased for every meaningful change. - const int reportVersion = 13; + const int reportVersion = 14; // Send the same data to the reporting system g_UserReporter.SubmitReport(