Index: ps/trunk/source/lib/sysdep/arch/x86_x64/cache.cpp =================================================================== --- ps/trunk/source/lib/sysdep/arch/x86_x64/cache.cpp (revision 24549) +++ ps/trunk/source/lib/sysdep/arch/x86_x64/cache.cpp (nonexistent) @@ -1,663 +0,0 @@ -/* Copyright (C) 2020 Wildfire Games. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "precompiled.h" -#include "lib/sysdep/arch/x86_x64/cache.h" - -#include "lib/bits.h" -#include "lib/alignment.h" -#include "lib/module_init.h" -#include "lib/sysdep/os_cpu.h" -#include "lib/sysdep/arch/x86_x64/x86_x64.h" - -#include - -namespace x86_x64 { - -static const size_t maxTLBs = 2*2*4; // (level0, level1) x (D,I) x (4K, 2M, 4M, 1G) -static size_t numTLBs = 0; - -static const size_t numCaches = x86_x64::Cache::maxLevels * 2 + maxTLBs; -static Cache caches[numCaches]; - - -static void AddCache(const x86_x64::Cache& cache) -{ - ENSURE(cache.Validate()); - - if(cache.m_Type == x86_x64::Cache::kData || cache.m_Type == x86_x64::Cache::kUnified) - caches[L1D + cache.m_Level-1] = cache; - if(cache.m_Type == x86_x64::Cache::kInstruction || cache.m_Type == x86_x64::Cache::kUnified) - caches[L1I + cache.m_Level-1] = cache; -} - - -static void AddTLB(const x86_x64::Cache& tlb) -{ - ENSURE(tlb.Validate()); - ENSURE(tlb.m_Level == 1 || tlb.m_Level == 2); // see maxTLBs - - ENSURE(numTLBs < maxTLBs); - caches[TLB+numTLBs++] = tlb; -} - - -//----------------------------------------------------------------------------- -// AMD - -// (Intel has subsequently added support for function 0x80000006, but -// only returns ECX, i.e. L2 information.) -namespace AMD -{ - -static x86_x64::Cache L1Cache(u32 reg, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(1, type); - - const size_t lineSize = bits(reg, 0, 7); - const size_t associativity = bits(reg, 16, 23); // 0 = reserved - const size_t totalSize = bits(reg, 24, 31)*KiB; - if(lineSize != 0 && associativity != 0 && totalSize != 0) - { - cache.m_NumEntries = totalSize / lineSize; - cache.m_EntrySize = lineSize; - cache.m_Associativity = associativity; - cache.m_SharedBy = 1; - } - return cache; -} - -// applies to L2, L3 and TLB2 -static const size_t associativityTable[16] = -{ - 0, 1, 2, 0, 4, 0, 8, 0, - // TODO: The second '16' does not obey to the specifications and is only a workaround. For a correct implementation please look here: https://community.amd.com/thread/244207 - 16, 16, 32, 48, 64, 96, 128, x86_x64::Cache::fullyAssociative -}; - -static x86_x64::Cache L2Cache(u32 reg, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(2, type); - - const size_t lineSize = bits(reg, 0, 7); - const size_t idxAssociativity = bits(reg, 12, 15); // 0 = disabled - const size_t totalSize = bits(reg, 16, 31)*KiB; - if(lineSize != 0 && idxAssociativity != 0 && totalSize != 0) - { - cache.m_NumEntries = totalSize / lineSize; - cache.m_EntrySize = lineSize; - cache.m_Associativity = associativityTable[idxAssociativity]; - cache.m_SharedBy = 1; - } - return cache; -} - -// (same as L2 except for the size) -static x86_x64::Cache L3Cache(u32 reg, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(3, type); - - const size_t lineSize = bits(reg, 0, 7); - const size_t idxAssociativity = bits(reg, 12, 15); // 0 = disabled - const size_t totalSize = bits(reg, 18, 31)*512*KiB; // (rounded down) - // NB: some Athlon 64 X2 models have no L3 cache - if(lineSize != 0 && idxAssociativity != 0 && totalSize != 0) - { - cache.m_NumEntries = totalSize / lineSize; - cache.m_EntrySize = lineSize; - cache.m_Associativity = associativityTable[idxAssociativity]; - cache.m_SharedBy = 1; - } - return cache; -} - -static x86_x64::Cache TLB1(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(1, type); - - const size_t numEntries = bits(reg, bitOffset+0, bitOffset+ 7); - const size_t associativity = bits(reg, bitOffset+8, bitOffset+15); // 0 = reserved - if(numEntries != 0 && associativity != 0) - { - cache.m_NumEntries = numEntries; - cache.m_EntrySize = pageSize; - cache.m_Associativity = associativity; - cache.m_SharedBy = 1; - } - return cache; -} - -static x86_x64::Cache TLB2(u32 reg, size_t bitOffset, size_t pageSize, x86_x64::Cache::Type type) -{ - x86_x64::Cache cache; - cache.Initialize(2, type); - - const size_t numEntries = bits(reg, bitOffset+ 0, bitOffset+11); - const size_t idxAssociativity = bits(reg, bitOffset+12, bitOffset+15); // 0 = disabled - if(numEntries != 0 && idxAssociativity != 0) - { - cache.m_NumEntries = numEntries; - cache.m_EntrySize = pageSize; - cache.m_Associativity = associativityTable[idxAssociativity]; - cache.m_SharedBy = 1; - } - return cache; -} - -static void AddTLB2Pair(u32 reg, size_t pageSize) -{ - x86_x64::Cache::Type type = x86_x64::Cache::kUnified; - if(bits(reg, 16, 31) != 0) // not unified - { - AddTLB(TLB2(reg, 16, pageSize, x86_x64::Cache::kData)); - type = x86_x64::Cache::kInstruction; - } - AddTLB(TLB2(reg, 0, pageSize, type)); -} - -// AMD reports maxCpuidIdFunction > 4 but consider functions 2..4 to be -// "reserved". cache characteristics are returned via ext. functions. -static void DetectCacheAndTLB() -{ - x86_x64::CpuidRegs regs = { 0 }; - - regs.eax = 0x80000005; - if(x86_x64::cpuid(®s)) - { - AddCache(L1Cache(regs.ecx, x86_x64::Cache::kData)); - AddCache(L1Cache(regs.edx, x86_x64::Cache::kInstruction)); - - AddTLB(TLB1(regs.eax, 0, 2*MiB, x86_x64::Cache::kInstruction)); - AddTLB(TLB1(regs.eax, 16, 2*MiB, x86_x64::Cache::kData)); - AddTLB(TLB1(regs.ebx, 0, 4*KiB, x86_x64::Cache::kInstruction)); - AddTLB(TLB1(regs.ebx, 16, 4*KiB, x86_x64::Cache::kData)); - } - - regs.eax = 0x80000006; - if(x86_x64::cpuid(®s)) - { - AddCache(L2Cache(regs.ecx, x86_x64::Cache::kUnified)); - AddCache(L3Cache(regs.edx, x86_x64::Cache::kUnified)); - - AddTLB2Pair(regs.eax, 2*MiB); - AddTLB2Pair(regs.ebx, 4*KiB); - } -} - -} // namespace AMD - - -//----------------------------------------------------------------------------- -// CPUID.4 - -namespace CPUID4 { - -static bool DetectCache() -{ - // note: level order is unspecified (see Intel AP-485) - for(u32 count = 0; ; count++) - { - x86_x64::CpuidRegs regs = { 0 }; - regs.eax = 4; - regs.ecx = count; - if(!x86_x64::cpuid(®s)) - return false; - - const x86_x64::Cache::Type type = (x86_x64::Cache::Type)bits(regs.eax, 0, 4); - if(type == x86_x64::Cache::kNull) // no more remaining - break; - - const size_t level = (size_t)bits(regs.eax, 5, 7); - const size_t partitions = (size_t)bits(regs.ebx, 12, 21)+1; - const size_t sets = (size_t)bits(regs.ecx, 0, 31)+1; - - x86_x64::Cache cache; - cache.Initialize(level, type); - cache.m_EntrySize = static_cast(bits(regs.ebx, 0, 11) + 1); // (yes, this also uses +1 encoding) - cache.m_Associativity = static_cast(bits(regs.ebx, 22, 31) + 1); - cache.m_SharedBy = static_cast(bits(regs.eax, 14, 25) + 1); - cache.m_NumEntries = cache.m_Associativity * partitions * sets; - - AddCache(cache); - } - - return true; -} - -} // namespace CPUID4 - - -//----------------------------------------------------------------------------- -// CPUID.2 (descriptors) - -namespace CPUID2 { - -typedef u8 Descriptor; -typedef std::vector Descriptors; - -static void AppendDescriptors(u32 reg, Descriptors& descriptors) -{ - if(IsBitSet(reg, 31)) // register contents are reserved - return; - for(int pos = 24; pos >= 0; pos -= 8) - { - const u8 descriptor = (u8)bits(reg, pos, pos+7); - if(descriptor != 0) - descriptors.push_back(descriptor); - } -} - - -static Descriptors GetDescriptors() -{ - // ensure consistency by pinning to a CPU. - // (don't use a hard-coded mask because process affinity may be restricted) - const uintptr_t allProcessors = os_cpu_ProcessorMask(); - const uintptr_t firstProcessor = allProcessors & -intptr_t(allProcessors); - const uintptr_t prevAffinityMask = os_cpu_SetThreadAffinityMask(firstProcessor); - - x86_x64::CpuidRegs regs = { 0 }; - regs.eax = 2; - if(!x86_x64::cpuid(®s)) - return Descriptors(); - - Descriptors descriptors; - size_t iterations = bits(regs.eax, 0, 7); - for(;;) // abort mid-loop (invoke CPUID exactly times) - { - AppendDescriptors(bits(regs.eax, 8, 31), descriptors); - AppendDescriptors(regs.ebx, descriptors); - AppendDescriptors(regs.ecx, descriptors); - AppendDescriptors(regs.edx, descriptors); - if(--iterations == 0) - break; - regs.eax = 2; - const bool ok = x86_x64::cpuid(®s); - ENSURE(ok); - } - - os_cpu_SetThreadAffinityMask(prevAffinityMask); - - return descriptors; -} - - -// note: the following cannot be moved into a function because -// ARRAY_SIZE's template argument must not reference a local type. - -enum Flags -{ - // level (bits 0..1) - L1 = 1, - L2, - L3, - - // type (bits 2..3) - I = 0x04, // instruction - D = 0x08, // data - U = I|D // unified - - // largeSize (bits 4..31 with bits 0..3 zeroed): TLB entrySize or cache numEntries -}; - -// (there are > 100 descriptors, so we squeeze all fields into 8 bytes.) -struct Characteristics // POD -{ - x86_x64::Cache::Type Type() const - { - switch(flags & U) - { - case D: - return x86_x64::Cache::kData; - case I: - return x86_x64::Cache::kInstruction; - case U: - return x86_x64::Cache::kUnified; - default: - DEBUG_WARN_ERR(ERR::LOGIC); - return x86_x64::Cache::kNull; - } - } - - size_t Level() const - { - const size_t level = flags & 3; - ENSURE(level != 0); - return level; - } - - bool IsTLB() const - { - return smallSize >= 0; - } - - size_t NumEntries() const - { - return IsTLB()? (size_t)smallSize : (flags & ~0xF); - } - - size_t EntrySize() const - { - return IsTLB()? (flags & ~0xF) : (size_t)(-smallSize); - } - - u8 descriptor; - u8 associativity; - i16 smallSize; // negative cache entrySize or TLB numEntries - u32 flags; // level, type, largeSize -}; - -static const u8 F = x86_x64::Cache::fullyAssociative; - -#define CACHE(descriptor, flags, totalSize, assoc, entrySize) { descriptor, assoc, -entrySize, flags | ((totalSize)/(entrySize)) } -#define TLB(descriptor, flags, entrySize, assoc, numEntries) { descriptor, assoc, numEntries, flags | (entrySize) } - -// (we need to include cache descriptors because early Pentium4 don't implement CPUID.4) -// references: [accessed 2011-02-26] -// AP485 http://www.intel.com/Assets/PDF/appnote/241618.pdf -// sdman http://www.intel.com/Assets/PDF/manual/253666.pdf -// sandp http://www.sandpile.org/ia32/cpuid.htm -// opsol http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/i86pc/os/cpuid.c -static const Characteristics characteristicsTable[] = -{ - TLB (0x01, L1|I, 4*KiB, 4, 32), - TLB (0x02, L1|I, 4*MiB, F, 2), - TLB (0x03, L1|D, 4*KiB, 4, 64), - TLB (0x04, L1|D, 4*MiB, 4, 8), - TLB (0x05, L1|D, 4*MiB, 4, 32), - - CACHE(0x06, L1|I, 8*KiB, 4, 32), - CACHE(0x08, L1|I, 16*KiB, 4, 32), - CACHE(0x09, L1|I, 32*KiB, 4, 64), - CACHE(0x0A, L1|I, 8*KiB, 2, 32), - - TLB (0x0B, L1|I, 4*MiB, 4, 4), - - CACHE(0x0C, L1|D, 16*KiB, 4, 32), - CACHE(0x0D, L1|D, 16*KiB, 4, 64), // opsol: 32B (would be redundant with 0x0C), AP485: 64B, sdman: 64B - CACHE(0x0E, L1|D, 24*KiB, 6, 64), - - CACHE(0x21, L2|U, 256*KiB, 8, 64), - - CACHE(0x22, L3|U, 512*KiB, 4, 64), - CACHE(0x23, L3|U, 1*MiB, 8, 64), - CACHE(0x25, L3|U, 2*MiB, 8, 64), - CACHE(0x29, L3|U, 4*MiB, 8, 64), - - CACHE(0x2c, L1|D, 32*KiB, 8, 64), - - CACHE(0x30, L1|I, 32*KiB, 8, 64), - - CACHE(0x39, L2|U, 128*KiB, 4, 64), - CACHE(0x3A, L2|U, 192*KiB, 6, 64), - CACHE(0x3B, L2|U, 128*KiB, 2, 64), - CACHE(0x3C, L2|U, 256*KiB, 4, 64), - CACHE(0x3D, L2|U, 384*KiB, 6, 64), - CACHE(0x3E, L2|U, 512*KiB, 4, 64), - CACHE(0x41, L2|U, 128*KiB, 4, 32), - CACHE(0x42, L2|U, 256*KiB, 4, 32), - CACHE(0x43, L2|U, 512*KiB, 4, 32), - CACHE(0x44, L2|U, 1*MiB, 4, 32), - CACHE(0x45, L2|U, 2*MiB, 4, 32), - - CACHE(0x46, L3|U, 4*MiB, 4, 64), - CACHE(0x47, L3|U, 8*MiB, 8, 64), - CACHE(0x48, L2|U, 3*MiB, 12, 64), - CACHE(0x49, L2|U, 4*MiB, 16, 64), - CACHE(0x49, L3|U, 4*MiB, 16, 64), - CACHE(0x4A, L3|U, 6*MiB, 12, 64), - CACHE(0x4B, L3|U, 8*MiB, 16, 64), - CACHE(0x4C, L3|U, 12*MiB, 12, 64), - CACHE(0x4D, L3|U, 16*MiB, 16, 64), - CACHE(0x4E, L2|U, 6*MiB, 24, 64), - - TLB (0x4F, L1|I, 4*KiB, F, 32), // sandp: unknown assoc, opsol: full, AP485: unspecified - TLB (0x50, L1|I, 4*KiB, F, 64), - TLB (0x50, L1|I, 4*MiB, F, 64), - TLB (0x50, L1|I, 2*MiB, F, 64), - TLB (0x51, L1|I, 4*KiB, F, 128), - TLB (0x51, L1|I, 4*MiB, F, 128), - TLB (0x51, L1|I, 2*MiB, F, 128), - TLB (0x52, L1|I, 4*KiB, F, 256), - TLB (0x52, L1|I, 4*MiB, F, 256), - TLB (0x52, L1|I, 2*MiB, F, 256), - TLB (0x55, L1|I, 4*MiB, F, 7), - TLB (0x55, L1|I, 2*MiB, F, 7), - - TLB (0x56, L1|D, 4*MiB, 4, 16), - TLB (0x57, L1|D, 4*KiB, 4, 16), - TLB (0x59, L1|D, 4*KiB, F, 16), - TLB (0x5A, L1|D, 4*MiB, 4, 32), - TLB (0x5A, L1|D, 2*MiB, 4, 32), - TLB (0x5B, L1|D, 4*KiB, F, 64), - TLB (0x5B, L1|D, 4*MiB, F, 64), - TLB (0x5C, L1|D, 4*KiB, F, 128), - TLB (0x5C, L1|D, 4*MiB, F, 128), - TLB (0x5D, L1|D, 4*KiB, F, 256), - TLB (0x5D, L1|D, 4*MiB, F, 256), - - CACHE(0x60, L1|D, 16*KiB, 8, 64), - TLB (0x63, L1|D, 1*GiB, 4, 4), // speculation - CACHE(0x66, L1|D, 8*KiB, 4, 64), - CACHE(0x67, L1|D, 16*KiB, 4, 64), - CACHE(0x68, L1|D, 32*KiB, 4, 64), - - CACHE(0x70, L1|I, 12*KiB, 8, 1), - CACHE(0x71, L1|I, 16*KiB, 8, 1), - CACHE(0x72, L1|I, 32*KiB, 8, 1), - CACHE(0x73, L1|I, 64*KiB, 8, 1), - - TLB (0x76, L1|I, 4*MiB, F, 8), // AP485: internally inconsistent, sdman: TLB - TLB (0x76, L1|I, 2*MiB, F, 8), - - CACHE(0x78, L2|U, 1*MiB, 4, 64), - CACHE(0x79, L2|U, 128*KiB, 8, 64), - CACHE(0x7A, L2|U, 256*KiB, 8, 64), - CACHE(0x7B, L2|U, 512*KiB, 8, 64), - CACHE(0x7C, L2|U, 1*MiB, 8, 64), - CACHE(0x7D, L2|U, 2*MiB, 8, 64), - CACHE(0x7F, L2|U, 512*KiB, 2, 64), - - CACHE(0x80, L2|U, 512*KiB, 8, 64), - CACHE(0x82, L2|U, 256*KiB, 8, 32), - CACHE(0x83, L2|U, 512*KiB, 8, 32), - CACHE(0x84, L2|U, 1*MiB, 8, 32), - CACHE(0x85, L2|U, 2*MiB, 8, 32), - CACHE(0x86, L2|U, 512*KiB, 4, 64), - CACHE(0x87, L2|U, 1*MiB, 8, 64), - - TLB (0xB0, L1|I, 4*KiB, 4, 128), - TLB (0xB1, L1|I, 2*MiB, 4, 8), - TLB (0xB1, L1|I, 4*MiB, 4, 4), - TLB (0xB2, L1|I, 4*KiB, 4, 64), - - TLB (0xB3, L1|D, 4*KiB, 4, 128), - TLB (0xB3, L1|D, 4*MiB, 4, 128), - TLB (0xB4, L1|D, 4*KiB, 4, 256), - TLB (0xB4, L1|D, 4*MiB, 4, 256), - TLB (0xB5, L1|I, 4*KiB, 4, 128), // speculation - TLB (0xB6, L1|I, 4*KiB, 8, 128), // http://software.intel.com/en-us/forums/topic/401012 - - TLB (0xBA, L1|D, 4*KiB, 4, 64), - TLB (0xC0, L1|D, 4*KiB, 4, 8), - TLB (0xC0, L1|D, 4*MiB, 4, 8), - TLB (0xC1, L2|U, 4*KiB, 8, 1024), // http://software.intel.com/en-us/forums/topic/401012 - TLB (0xC1, L2|U, 4*MiB, 8, 1024), - TLB (0xC1, L2|U, 2*MiB, 8, 1024), - TLB (0xCA, L2|U, 4*KiB, 4, 512), - - CACHE(0xD0, L3|U, 512*KiB, 4, 64), - CACHE(0xD1, L3|U, 1*MiB, 4, 64), - CACHE(0xD2, L3|U, 2*MiB, 4, 64), - CACHE(0xD6, L3|U, 1*MiB, 8, 64), - CACHE(0xD7, L3|U, 2*MiB, 8, 64), - CACHE(0xD8, L3|U, 4*MiB, 8, 64), - CACHE(0xDC, L3|U, 3*MiB/2, 12, 64), - CACHE(0xDD, L3|U, 3*MiB, 12, 64), - CACHE(0xDE, L3|U, 6*MiB, 12, 64), - CACHE(0xE2, L3|U, 2*MiB, 16, 64), - CACHE(0xE3, L3|U, 4*MiB, 16, 64), - CACHE(0xE4, L3|U, 8*MiB, 16, 64), - CACHE(0xEA, L3|U, 12*MiB, 24, 64), - CACHE(0xEB, L3|U, 18*MiB, 24, 64), - CACHE(0xEC, L3|U, 24*MiB, 24, 64), -}; -#undef CACHE -#undef TLB - -static const Characteristics* CharacteristicsFromDescriptor(Descriptor descriptor) -{ - // note: we can't use bsearch because characteristicsTable contains multiple - // entries with the same descriptor. - for(size_t i = 0; i < ARRAY_SIZE(characteristicsTable); i++) - { - const Characteristics& characteristics = characteristicsTable[i]; - if(characteristics.descriptor == descriptor) - return &characteristics; - } - - debug_printf("Unknown cache/TLB descriptor 0x%x\n", (unsigned int)descriptor); - return 0; -} - - -enum DescriptorFlags -{ - SKIP_CACHE_DESCRIPTORS = 1, - NO_LAST_LEVEL_CACHE = 2, - PREFETCH64 = 64, - PREFETCH128 = 128 -}; - -static bool HandleSpecialDescriptor(Descriptor descriptor, size_t& descriptorFlags) -{ - switch(descriptor) - { - case 0: // carries no information - return true; - - case 0x40: - descriptorFlags |= NO_LAST_LEVEL_CACHE; - return true; - - case 0xF0: - descriptorFlags |= PREFETCH64; - return true; - - case 0xF1: - descriptorFlags |= PREFETCH128; - return true; - - case 0xFF: // descriptors don't include caches (use CPUID.4 instead) - descriptorFlags |= SKIP_CACHE_DESCRIPTORS; - return true; - - default: - return false; - } -} - - -static void DetectCacheAndTLB(size_t& descriptorFlags) -{ - const Descriptors descriptors = GetDescriptors(); - for(Descriptors::const_iterator it = descriptors.begin(); it != descriptors.end(); ++it) - { - const Descriptor descriptor = *it; - if(HandleSpecialDescriptor(descriptor, descriptorFlags)) - continue; - - const Characteristics* characteristics = CharacteristicsFromDescriptor(*it); - if(!characteristics) - continue; - - if((descriptorFlags & SKIP_CACHE_DESCRIPTORS) && !characteristics->IsTLB()) - continue; - - x86_x64::Cache cache; - cache.Initialize(characteristics->Level(), characteristics->Type()); - cache.m_NumEntries = characteristics->NumEntries(); - cache.m_EntrySize = characteristics->EntrySize(); - cache.m_Associativity = characteristics->associativity; - cache.m_SharedBy = 1; // (safe default) - if(characteristics->IsTLB()) - AddTLB(cache); - else - AddCache(cache); - } -} - -} // namespace CPUID2 - - -static Status DetectCacheAndTLB() -{ - // ensure all cache entries are initialized (DetectCache* might not set them all) - for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++) - { - caches[L1D+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kData); - caches[L1I+idxLevel].Initialize(idxLevel+1, x86_x64::Cache::kInstruction); - } - - if(x86_x64::Vendor() == x86_x64::VENDOR_AMD) - AMD::DetectCacheAndTLB(); - else - { - size_t descriptorFlags = 0; - if(CPUID4::DetectCache()) // success, ignore less reliable CPUID.2 cache information - descriptorFlags |= CPUID2::SKIP_CACHE_DESCRIPTORS; - CPUID2::DetectCacheAndTLB(descriptorFlags); - } - - // sanity checks - for(size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; idxLevel++) - { - ENSURE(caches[L1D+idxLevel].m_Type == x86_x64::Cache::kData || caches[L1D+idxLevel].m_Type == x86_x64::Cache::kUnified); - ENSURE(caches[L1D+idxLevel].m_Level == idxLevel+1); - ENSURE(caches[L1D+idxLevel].Validate() == true); - - ENSURE(caches[L1I+idxLevel].m_Type == x86_x64::Cache::kInstruction || caches[L1I+idxLevel].m_Type == x86_x64::Cache::kUnified); - ENSURE(caches[L1I+idxLevel].m_Level == idxLevel+1); - ENSURE(caches[L1I+idxLevel].Validate() == true); - } - for(size_t i = 0; i < numTLBs; i++) - ENSURE(caches[TLB+i].Validate() == true); - - return INFO::OK; -} - -const x86_x64::Cache* Caches(size_t idxCache) -{ - static ModuleInitState initState; - ModuleInit(&initState, DetectCacheAndTLB); - - if(idxCache >= TLB+numTLBs) - return 0; - - return &caches[idxCache]; -} - -} // namespace x86_x64 Property changes on: ps/trunk/source/lib/sysdep/arch/x86_x64/cache.cpp ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Index: ps/trunk/source/lib/sysdep/arch/x86_x64/cache.h =================================================================== --- ps/trunk/source/lib/sysdep/arch/x86_x64/cache.h (revision 24549) +++ ps/trunk/source/lib/sysdep/arch/x86_x64/cache.h (nonexistent) @@ -1,144 +0,0 @@ -/* Copyright (C) 2018 Wildfire Games. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef INCLUDED_X86_X64_CACHE -#define INCLUDED_X86_X64_CACHE - -namespace x86_x64 { - -struct Cache // POD (may be used before static constructors) -{ - enum Type - { - // (values match the CPUID.4 definition) - kNull, - kData, - kInstruction, - kUnified - // note: further values are "reserved" - }; - - static const size_t maxLevels = 4; - - static const size_t fullyAssociative = 0xFF; // (CPUID.4 definition) - - /** - * 1..maxLevels - **/ - size_t m_Level; - - /** - * never kNull - **/ - Type m_Type; - - /** - * if 0, the cache is disabled and all other values are zero - **/ - size_t m_NumEntries; - - /** - * NB: cache entries are lines, TLB entries are pages - **/ - size_t m_EntrySize; - - /** - * = fullyAssociative or the actual ways of m_Associativity - **/ - size_t m_Associativity; - - /** - * how many logical processors share this cache? - **/ - size_t m_SharedBy; - - void Initialize(size_t level, Type type) - { - m_Level = level; - m_Type = type; - m_NumEntries = 0; - m_EntrySize = 0; - m_Associativity = 0; - m_SharedBy = 0; - - ENSURE(Validate()); - } - - bool Validate() const - { - if(!(1 <= m_Level && m_Level <= maxLevels)) - return false; - - if(m_Type == kNull) - return false; - - if(m_NumEntries == 0) // disabled - { - if(m_EntrySize != 0) - return false; - if(m_Associativity != 0) - return false; - if(m_SharedBy != 0) - return false; - } - else - { - if(m_EntrySize == 0) - return false; - if(m_Associativity == 0 || m_Associativity > fullyAssociative) - return false; - if(m_SharedBy == 0) - return false; - } - - return true; - } - - u64 TotalSize() const - { - return u64(m_NumEntries)*m_EntrySize; - } -}; - -enum IdxCache -{ - // (AddCache relies upon this order) - L1D = 1, - L2D, - L3D, - L4D, - L1I, - L2I, - L3I, - L4I, - TLB -}; - -/** - * @return 0 if idxCache >= TLB+numTLBs, otherwise a valid pointer to - * a Cache whose m_NumEntries is 0 if disabled / not present. - **/ -LIB_API const Cache* Caches(size_t idxCache); - -} // namespace x86_x64 - -#endif // #ifndef INCLUDED_X86_X64_CACHE Property changes on: ps/trunk/source/lib/sysdep/arch/x86_x64/cache.h ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Index: ps/trunk/source/lib/sysdep/arch/x86_x64/topology.cpp =================================================================== --- ps/trunk/source/lib/sysdep/arch/x86_x64/topology.cpp (revision 24549) +++ ps/trunk/source/lib/sysdep/arch/x86_x64/topology.cpp (revision 24550) @@ -1,475 +1,294 @@ /* Copyright (C) 2020 Wildfire Games. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* - * detection of CPU and cache topology + * Detection of CPU topology */ #include "precompiled.h" #include "lib/sysdep/arch/x86_x64/topology.h" #include "lib/bits.h" #include "lib/module_init.h" #include "lib/sysdep/cpu.h" // ERR::CPU_FEATURE_MISSING #include "lib/sysdep/os_cpu.h" #include "lib/sysdep/numa.h" #include "lib/sysdep/arch/x86_x64/x86_x64.h" -#include "lib/sysdep/arch/x86_x64/cache.h" #include "lib/sysdep/arch/x86_x64/apic.h" #include #include namespace topology { //--------------------------------------------------------------------------------------------------------------------- -// detect *maximum* number of cores/packages/caches. +// detect *maximum* number of cores/packages. // note: some of them may be disabled by the OS or BIOS. // note: Intel Appnote 485 assures us that they are uniform across packages. static size_t MaxCoresPerPackage() { // assume single-core unless one of the following applies: size_t maxCoresPerPackage = 1; x86_x64::CpuidRegs regs = { 0 }; switch(x86_x64::Vendor()) { case x86_x64::VENDOR_INTEL: regs.eax = 4; regs.ecx = 0; if(x86_x64::cpuid(®s)) maxCoresPerPackage = bits(regs.eax, 26, 31)+1; break; case x86_x64::VENDOR_AMD: regs.eax = 0x80000008; if(x86_x64::cpuid(®s)) maxCoresPerPackage = bits(regs.ecx, 0, 7)+1; break; default: break; } return maxCoresPerPackage; } static size_t MaxLogicalPerCore() { struct IsHyperthreadingCapable { bool operator()() const { // definitely not if(!x86_x64::Cap(x86_x64::CAP_HT)) return false; // multi-core AMD systems falsely set the HT bit for reasons of // compatibility. we'll just ignore it, because clearing it might // confuse other callers. if(x86_x64::Vendor() == x86_x64::VENDOR_AMD && x86_x64::Cap(x86_x64::CAP_AMD_CMP_LEGACY)) return false; return true; } }; if(IsHyperthreadingCapable()()) { x86_x64::CpuidRegs regs = { 0 }; regs.eax = 1; if(!x86_x64::cpuid(®s)) DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING); const size_t logicalPerPackage = bits(regs.ebx, 16, 23); const size_t maxCoresPerPackage = MaxCoresPerPackage(); // cores ought to be uniform WRT # logical processors ENSURE(logicalPerPackage % maxCoresPerPackage == 0); const size_t maxLogicalPerCore = logicalPerPackage / maxCoresPerPackage; return maxLogicalPerCore; } else return 1; } - -static size_t MaxLogicalPerCache() -{ - return x86_x64::Caches(x86_x64::L2D)->m_SharedBy; -} - - //--------------------------------------------------------------------------------------------------------------------- // CPU topology interface // APIC IDs consist of variable-length bit fields indicating the logical, // core, package and cache IDs. Vol3a says they aren't guaranteed to be // contiguous, but that also applies to the individual fields. // for example, quad-core E5630 CPUs report 4-bit core IDs 0, 1, 6, 7. struct ApicField // POD { size_t operator()(size_t bits) const { return (bits >> shift) & mask; } size_t mask; // zero for zero-width fields size_t shift; }; struct CpuTopology // POD { size_t numProcessors; // total reported by OS ApicField logical; ApicField core; ApicField package; // how many are actually enabled size_t logicalPerCore; size_t coresPerPackage; size_t numPackages; }; static CpuTopology cpuTopology; static ModuleInitState cpuInitState; static Status InitCpuTopology() { cpuTopology.numProcessors = os_cpu_NumProcessors(); const size_t maxLogicalPerCore = MaxLogicalPerCore(); const size_t maxCoresPerPackage = MaxCoresPerPackage(); const size_t maxPackages = 256; // "enough" const size_t logicalWidth = ceil_log2(maxLogicalPerCore); const size_t coreWidth = ceil_log2(maxCoresPerPackage); const size_t packageWidth = ceil_log2(maxPackages); cpuTopology.logical.mask = bit_mask(logicalWidth); cpuTopology.core.mask = bit_mask(coreWidth); cpuTopology.package.mask = bit_mask(packageWidth); cpuTopology.logical.shift = 0; cpuTopology.core.shift = logicalWidth; cpuTopology.package.shift = logicalWidth + coreWidth; if(AreApicIdsReliable()) { struct NumUniqueValuesInField { size_t operator()(const ApicField& apicField) const { std::bitset values; for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) { const ApicId apicId = ApicIdFromProcessor(processor); const size_t value = apicField(apicId); values.set(value); } return values.count(); } }; cpuTopology.logicalPerCore = NumUniqueValuesInField()(cpuTopology.logical); cpuTopology.coresPerPackage = NumUniqueValuesInField()(cpuTopology.core); cpuTopology.numPackages = NumUniqueValuesInField()(cpuTopology.package); } else // processor lacks an xAPIC, or IDs are invalid { struct MinPackages { size_t operator()(size_t maxCoresPerPackage, size_t maxLogicalPerCore) const { const size_t numNodes = numa_NumNodes(); const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0)); // NB: some cores or logical processors may be disabled. const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore; const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage); return minPackagesPerNode*numNodes; } }; // we can't differentiate between cores and logical processors. // since the former are less likely to be disabled, we seek the // maximum feasible number of cores and minimal number of packages: const size_t minPackages = MinPackages()(maxCoresPerPackage, maxLogicalPerCore); for(size_t numPackages = minPackages; numPackages <= cpuTopology.numProcessors; numPackages++) { if(cpuTopology.numProcessors % numPackages != 0) continue; const size_t logicalPerPackage = cpuTopology.numProcessors / numPackages; const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore); for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--) { if(logicalPerPackage % coresPerPackage != 0) continue; const size_t logicalPerCore = logicalPerPackage / coresPerPackage; if(logicalPerCore <= maxLogicalPerCore) { ENSURE(cpuTopology.numProcessors == numPackages*coresPerPackage*logicalPerCore); cpuTopology.logicalPerCore = logicalPerCore; cpuTopology.coresPerPackage = coresPerPackage; cpuTopology.numPackages = numPackages; return INFO::OK; } } } DEBUG_WARN_ERR(ERR::LOGIC); // didn't find a feasible topology } return INFO::OK; } size_t NumPackages() { ModuleInit(&cpuInitState, InitCpuTopology); return cpuTopology.numPackages; } size_t CoresPerPackage() { ModuleInit(&cpuInitState, InitCpuTopology); return cpuTopology.coresPerPackage; } size_t LogicalPerCore() { ModuleInit(&cpuInitState, InitCpuTopology); return cpuTopology.logicalPerCore; } size_t LogicalFromApicId(ApicId apicId) { const size_t contiguousId = ContiguousIdFromApicId(apicId); return contiguousId % cpuTopology.logicalPerCore; } size_t CoreFromApicId(ApicId apicId) { const size_t contiguousId = ContiguousIdFromApicId(apicId); return (contiguousId / cpuTopology.logicalPerCore) % cpuTopology.coresPerPackage; } size_t PackageFromApicId(ApicId apicId) { const size_t contiguousId = ContiguousIdFromApicId(apicId); return contiguousId / (cpuTopology.logicalPerCore * cpuTopology.coresPerPackage); } ApicId ApicIdFromIndices(size_t idxLogical, size_t idxCore, size_t idxPackage) { ModuleInit(&cpuInitState, InitCpuTopology); size_t contiguousId = 0; ENSURE(idxPackage < cpuTopology.numPackages); contiguousId += idxPackage; contiguousId *= cpuTopology.coresPerPackage; ENSURE(idxCore < cpuTopology.coresPerPackage); contiguousId += idxCore; contiguousId *= cpuTopology.logicalPerCore; ENSURE(idxLogical < cpuTopology.logicalPerCore); contiguousId += idxLogical; ENSURE(contiguousId < cpuTopology.numProcessors); return ApicIdFromContiguousId(contiguousId); } - -//--------------------------------------------------------------------------------------------------------------------- -// cache topology - -// note: Windows 2003 GetLogicalProcessorInformation provides similar -// functionality but returns incorrect results. (it claims all cores in -// an Intel Core2 Quad processor share a single L2 cache.) - -class CacheRelations -{ -public: - /** - * add processor to the processor mask owned by cache identified by \ - **/ - void Add(u8 cacheId, size_t processor) - { - SharedCache* cache = Find(cacheId); - if(!cache) - { - m_caches.push_back(cacheId); - cache = &m_caches.back(); - } - cache->Add(processor); - } - - size_t NumCaches() const - { - return m_caches.size(); - } - - /** - * store topology in an array (one entry per cache) of masks - * representing the processors that share a cache. - **/ - void StoreProcessorMasks(uintptr_t* cachesProcessorMask) - { - for(size_t i = 0; i < NumCaches(); i++) - cachesProcessorMask[i] = m_caches[i].ProcessorMask(); - } - -private: - /** - * stores ID and tracks which processors share this cache - **/ - class SharedCache - { - public: - SharedCache(u8 cacheId) - : m_cacheId(cacheId), m_processorMask(0) - { - } - - bool Matches(u8 cacheId) const - { - return m_cacheId == cacheId; - } - - void Add(size_t processor) - { - m_processorMask |= uintptr_t(1) << processor; - } - - uintptr_t ProcessorMask() const - { - return m_processorMask; - } - - private: - u8 m_cacheId; - uintptr_t m_processorMask; - }; - - SharedCache* Find(u8 cacheId) - { - for(size_t i = 0; i < m_caches.size(); i++) - { - if(m_caches[i].Matches(cacheId)) - return &m_caches[i]; - } - - return 0; - } - - std::vector m_caches; -}; - -static void DetermineCachesProcessorMask(uintptr_t* cachesProcessorMask, size_t& numCaches) -{ - CacheRelations cacheRelations; - if(AreApicIdsReliable()) - { - const size_t numBits = ceil_log2(MaxLogicalPerCache()); - const u8 cacheIdMask = u8((0xFF << numBits) & 0xFF); - for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) - { - const ApicId apicId = ApicIdFromProcessor(processor); - const u8 cacheId = u8(apicId & cacheIdMask); - cacheRelations.Add(cacheId, processor); - } - } - else - { - for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) - { - // assume each processor has exactly one cache with matching IDs - const u8 cacheId = (u8)processor; - cacheRelations.Add(cacheId, processor); - } - } - - numCaches = cacheRelations.NumCaches(); - cacheRelations.StoreProcessorMasks(cachesProcessorMask); -} - - -static void DetermineProcessorsCache(const uintptr_t* cachesProcessorMask, size_t numCaches, size_t* processorsCache, size_t numProcessors) -{ - for(size_t cache = 0; cache < numCaches; cache++) - { - // write to all entries that share this cache - const uintptr_t processorMask = cachesProcessorMask[cache]; - for(size_t processor = 0; processor < numProcessors; processor++) - { - if(IsBitSet(processorMask, processor)) - { - ENSURE(processorsCache[processor] == 0); - processorsCache[processor] = cache; - } - } - } -} - - -//--------------------------------------------------------------------------------------------------------------------- -// cache topology interface - -struct CacheTopology // POD -{ - size_t numCaches; - size_t processorsCache[os_cpu_MaxProcessors]; - uintptr_t cachesProcessorMask[os_cpu_MaxProcessors]; -}; -static CacheTopology cacheTopology; -static ModuleInitState cacheInitState; - -static Status InitCacheTopology() -{ - ModuleInit(&cpuInitState, InitCpuTopology); - DetermineCachesProcessorMask(cacheTopology.cachesProcessorMask, cacheTopology.numCaches); - DetermineProcessorsCache(cacheTopology.cachesProcessorMask, cacheTopology.numCaches, cacheTopology.processorsCache, os_cpu_NumProcessors()); - return INFO::OK; -} - -size_t NumCaches() -{ - ModuleInit(&cacheInitState, InitCacheTopology); - return cacheTopology.numCaches; -} - -size_t CacheFromProcessor(size_t processor) -{ - ModuleInit(&cacheInitState, InitCacheTopology); - ENSURE(processor < os_cpu_NumProcessors()); - return cacheTopology.processorsCache[processor]; -} - -uintptr_t ProcessorMaskFromCache(size_t cache) -{ - ModuleInit(&cacheInitState, InitCacheTopology); - ENSURE(cache < cacheTopology.numCaches); - return cacheTopology.cachesProcessorMask[cache]; -} - } // namespace topology Index: ps/trunk/source/lib/sysdep/arch/x86_x64/topology.h =================================================================== --- ps/trunk/source/lib/sysdep/arch/x86_x64/topology.h (revision 24549) +++ ps/trunk/source/lib/sysdep/arch/x86_x64/topology.h (revision 24550) @@ -1,111 +1,85 @@ -/* Copyright (C) 2011 Wildfire Games. +/* Copyright (C) 2020 Wildfire Games. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * detection of CPU and cache topology. * thread-safe, no explicit initialization is required. */ #ifndef INCLUDED_X86_X64_TOPOLOGY #define INCLUDED_X86_X64_TOPOLOGY #include "lib/sysdep/arch/x86_x64/apic.h" // ApicId namespace topology { //----------------------------------------------------------------------------- // cpu // the CPU topology, i.e. how many packages, cores and logical processors are // actually present and enabled, is useful for parameterizing parallel // algorithms, especially on NUMA systems. // // note: OS abstractions usually only mention "processors", which could be // any mix of the above. /** * @return number of *enabled* CPU packages / sockets. **/ LIB_API size_t NumPackages(); /** * @return number of *enabled* CPU cores per package. * (2 on dual-core systems) **/ LIB_API size_t CoresPerPackage(); /** * @return number of *enabled* logical processors (aka Hyperthreads) * per core. (2 on P4 EE) **/ LIB_API size_t LogicalPerCore(); /** * @return index of processor package/socket in [0, NumPackages()) **/ LIB_API size_t PackageFromApicId(ApicId apicId); /** * @return index of processor core in [0, CoresPerPackage()) **/ LIB_API size_t CoreFromApicId(ApicId apicId); /** * @return index of logical processor in [0, LogicalPerCore()) **/ LIB_API size_t LogicalFromApicId(ApicId apicId); /** * @param idxPackage, idxCore, idxLogical return values of *FromApicId * @return APIC ID (see note at AreApicIdsReliable) **/ LIB_API ApicId ApicIdFromIndices(size_t idxPackage, size_t idxCore, size_t idxLogical); - -//----------------------------------------------------------------------------- -// L2 cache - -// knowledge of the cache topology, i.e. which processors share which caches, -// can be used to reduce contention and increase effective capacity by -// assigning the partner processors to work on the same dataset. -// -// example: Intel Core2 micro-architectures feature L2 caches shared by -// two cores. - -/** - * @return number of distinct L2 caches. - **/ -LIB_API size_t NumCaches(); - -/** - * @return L2 cache number (zero-based) to which the given processor belongs. - **/ -LIB_API size_t CacheFromProcessor(size_t processor); - -/** - * @return bit-mask of all processors sharing the given cache. - **/ -LIB_API uintptr_t ProcessorMaskFromCache(size_t cache); - } // namespace topology #endif // #ifndef INCLUDED_X86_X64_TOPOLOGY Index: ps/trunk/source/ps/GameSetup/HWDetect.cpp =================================================================== --- ps/trunk/source/ps/GameSetup/HWDetect.cpp (revision 24549) +++ ps/trunk/source/ps/GameSetup/HWDetect.cpp (revision 24550) @@ -1,719 +1,652 @@ /* Copyright (C) 2020 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" #include "scriptinterface/ScriptInterface.h" #include "lib/ogl.h" #include "lib/svn_revision.h" #include "lib/timer.h" #include "lib/utf8.h" #include "lib/external_libraries/libsdl.h" #include "lib/res/graphics/ogl_tex.h" #include "lib/posix/posix_utsname.h" #include "lib/sysdep/cpu.h" #include "lib/sysdep/gfx.h" #include "lib/sysdep/numa.h" #include "lib/sysdep/os_cpu.h" #if ARCH_X86_X64 -# include "lib/sysdep/arch/x86_x64/cache.h" # include "lib/sysdep/arch/x86_x64/topology.h" #endif #if CONFIG2_AUDIO #include "soundmanager/SoundManager.h" #endif #include "ps/CLogger.h" #include "ps/ConfigDB.h" #include "ps/Filesystem.h" #include "ps/GameSetup/Config.h" #include "ps/Profile.h" #include "ps/scripting/JSInterface_ConfigDB.h" #include "ps/scripting/JSInterface_Debug.h" #include "ps/UserReport.h" #include "ps/VideoMode.h" // TODO: Support OpenGL platforms which don’t use GLX as well. #if defined(SDL_VIDEO_DRIVER_X11) && !CONFIG2_GLES #include #include // Define the GLX_MESA_query_renderer macros if built with // an old Mesa (<10.0) that doesn't provide them #ifndef GLX_MESA_query_renderer #define GLX_MESA_query_renderer 1 #define GLX_RENDERER_VENDOR_ID_MESA 0x8183 #define GLX_RENDERER_DEVICE_ID_MESA 0x8184 #define GLX_RENDERER_VERSION_MESA 0x8185 #define GLX_RENDERER_ACCELERATED_MESA 0x8186 #define GLX_RENDERER_VIDEO_MEMORY_MESA 0x8187 #define GLX_RENDERER_UNIFIED_MEMORY_ARCHITECTURE_MESA 0x8188 #define GLX_RENDERER_PREFERRED_PROFILE_MESA 0x8189 #define GLX_RENDERER_OPENGL_CORE_PROFILE_VERSION_MESA 0x818A #define GLX_RENDERER_OPENGL_COMPATIBILITY_PROFILE_VERSION_MESA 0x818B #define GLX_RENDERER_OPENGL_ES_PROFILE_VERSION_MESA 0x818C #define GLX_RENDERER_OPENGL_ES2_PROFILE_VERSION_MESA 0x818D #define GLX_RENDERER_ID_MESA 0x818E #endif /* GLX_MESA_query_renderer */ #endif static void ReportSDL(const ScriptInterface& scriptInterface, JS::HandleValue settings); static void ReportGLLimits(const ScriptInterface& scriptInterface, JS::HandleValue settings); -#if ARCH_X86_X64 -void ConvertCaches(const ScriptInterface& scriptInterface, x86_x64::IdxCache idxCache, JS::MutableHandleValue ret) -{ - ScriptRequest rq(scriptInterface); - - ScriptInterface::CreateArray(rq, ret); - - for (size_t idxLevel = 0; idxLevel < x86_x64::Cache::maxLevels; ++idxLevel) - { - const x86_x64::Cache* pcache = x86_x64::Caches(idxCache+idxLevel); - if (pcache->m_Type == x86_x64::Cache::kNull || pcache->m_NumEntries == 0) - continue; - - JS::RootedValue cache(rq.cx); - - ScriptInterface::CreateObject( - rq, - &cache, - "type", static_cast(pcache->m_Type), - "level", static_cast(pcache->m_Level), - "associativity", static_cast(pcache->m_Associativity), - "linesize", static_cast(pcache->m_EntrySize), - "sharedby", static_cast(pcache->m_SharedBy), - "totalsize", static_cast(pcache->TotalSize())); - - scriptInterface.SetPropertyInt(ret, idxLevel, cache); - } -} - -void ConvertTLBs(const ScriptInterface& scriptInterface, JS::MutableHandleValue ret) -{ - ScriptRequest rq(scriptInterface); - - ScriptInterface::CreateArray(rq, ret); - - for(size_t i = 0; ; i++) - { - const x86_x64::Cache* ptlb = x86_x64::Caches(x86_x64::TLB+i); - if (!ptlb) - break; - - JS::RootedValue tlb(rq.cx); - - ScriptInterface::CreateObject( - rq, - &tlb, - "type", static_cast(ptlb->m_Type), - "level", static_cast(ptlb->m_Level), - "associativity", static_cast(ptlb->m_Associativity), - "pagesize", static_cast(ptlb->m_EntrySize), - "entries", static_cast(ptlb->m_NumEntries)); - - scriptInterface.SetPropertyInt(ret, i, tlb); - } -} -#endif - void SetDisableAudio(ScriptInterface::CmptPrivate* UNUSED(pCmptPrivate), bool disabled) { g_DisableAudio = disabled; } void RunHardwareDetection() { TIMER(L"RunHardwareDetection"); ScriptInterface scriptInterface("Engine", "HWDetect", g_ScriptContext); ScriptRequest rq(scriptInterface); JSI_Debug::RegisterScriptFunctions(scriptInterface); // Engine.DisplayErrorDialog JSI_ConfigDB::RegisterScriptFunctions(scriptInterface); scriptInterface.RegisterFunction("SetDisableAudio"); // Load the detection script: const wchar_t* scriptName = L"hwdetect/hwdetect.js"; CVFSFile file; if (file.Load(g_VFS, scriptName) != PSRETURN_OK) { LOGERROR("Failed to load hardware detection script"); return; } std::string code = file.DecodeUTF8(); // assume it's UTF-8 scriptInterface.LoadScript(scriptName, code); // Collect all the settings we'll pass to the script: // (We'll use this same data for the opt-in online reporting system, so it // includes some fields that aren't directly useful for the hwdetect script) JS::RootedValue settings(rq.cx); ScriptInterface::CreateObject(rq, &settings); scriptInterface.SetProperty(settings, "os_unix", OS_UNIX); scriptInterface.SetProperty(settings, "os_bsd", OS_BSD); scriptInterface.SetProperty(settings, "os_linux", OS_LINUX); scriptInterface.SetProperty(settings, "os_android", OS_ANDROID); scriptInterface.SetProperty(settings, "os_macosx", OS_MACOSX); scriptInterface.SetProperty(settings, "os_win", OS_WIN); scriptInterface.SetProperty(settings, "arch_ia32", ARCH_IA32); scriptInterface.SetProperty(settings, "arch_amd64", ARCH_AMD64); scriptInterface.SetProperty(settings, "arch_arm", ARCH_ARM); scriptInterface.SetProperty(settings, "arch_aarch64", ARCH_AARCH64); scriptInterface.SetProperty(settings, "arch_e2k", ARCH_E2K); #ifdef NDEBUG scriptInterface.SetProperty(settings, "build_debug", 0); #else scriptInterface.SetProperty(settings, "build_debug", 1); #endif scriptInterface.SetProperty(settings, "build_opengles", CONFIG2_GLES); scriptInterface.SetProperty(settings, "build_datetime", std::string(__DATE__ " " __TIME__)); scriptInterface.SetProperty(settings, "build_revision", std::wstring(svn_revision)); scriptInterface.SetProperty(settings, "build_msc", (int)MSC_VERSION); scriptInterface.SetProperty(settings, "build_icc", (int)ICC_VERSION); scriptInterface.SetProperty(settings, "build_gcc", (int)GCC_VERSION); scriptInterface.SetProperty(settings, "build_clang", (int)CLANG_VERSION); scriptInterface.SetProperty(settings, "gfx_card", gfx::CardName()); scriptInterface.SetProperty(settings, "gfx_drv_ver", gfx::DriverInfo()); #if CONFIG2_AUDIO scriptInterface.SetProperty(settings, "snd_card", g_SoundManager->GetSoundCardNames()); scriptInterface.SetProperty(settings, "snd_drv_ver", g_SoundManager->GetOpenALVersion()); #endif ReportSDL(scriptInterface, settings); ReportGLLimits(scriptInterface, settings); scriptInterface.SetProperty(settings, "video_desktop_xres", g_VideoMode.GetDesktopXRes()); scriptInterface.SetProperty(settings, "video_desktop_yres", g_VideoMode.GetDesktopYRes()); scriptInterface.SetProperty(settings, "video_desktop_bpp", g_VideoMode.GetDesktopBPP()); scriptInterface.SetProperty(settings, "video_desktop_freq", g_VideoMode.GetDesktopFreq()); struct utsname un; uname(&un); scriptInterface.SetProperty(settings, "uname_sysname", std::string(un.sysname)); scriptInterface.SetProperty(settings, "uname_release", std::string(un.release)); scriptInterface.SetProperty(settings, "uname_version", std::string(un.version)); scriptInterface.SetProperty(settings, "uname_machine", std::string(un.machine)); #if OS_LINUX { std::ifstream ifs("/etc/lsb-release"); if (ifs.good()) { std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); scriptInterface.SetProperty(settings, "linux_release", str); } } #endif scriptInterface.SetProperty(settings, "cpu_identifier", std::string(cpu_IdentifierString())); scriptInterface.SetProperty(settings, "cpu_frequency", os_cpu_ClockFrequency()); scriptInterface.SetProperty(settings, "cpu_pagesize", (u32)os_cpu_PageSize()); scriptInterface.SetProperty(settings, "cpu_largepagesize", (u32)os_cpu_LargePageSize()); scriptInterface.SetProperty(settings, "cpu_numprocs", (u32)os_cpu_NumProcessors()); #if ARCH_X86_X64 scriptInterface.SetProperty(settings, "cpu_numpackages", (u32)topology::NumPackages()); scriptInterface.SetProperty(settings, "cpu_coresperpackage", (u32)topology::CoresPerPackage()); scriptInterface.SetProperty(settings, "cpu_logicalpercore", (u32)topology::LogicalPerCore()); - scriptInterface.SetProperty(settings, "cpu_numcaches", (u32)topology::NumCaches()); #endif scriptInterface.SetProperty(settings, "numa_numnodes", (u32)numa_NumNodes()); scriptInterface.SetProperty(settings, "numa_factor", numa_Factor()); scriptInterface.SetProperty(settings, "numa_interleaved", numa_IsMemoryInterleaved()); scriptInterface.SetProperty(settings, "ram_total", (u32)os_cpu_MemorySize()); scriptInterface.SetProperty(settings, "ram_total_os", (u32)os_cpu_QueryMemorySize()); #if ARCH_X86_X64 scriptInterface.SetProperty(settings, "x86_vendor", (u32)x86_x64::Vendor()); scriptInterface.SetProperty(settings, "x86_model", (u32)x86_x64::Model()); scriptInterface.SetProperty(settings, "x86_family", (u32)x86_x64::Family()); u32 caps0, caps1, caps2, caps3; x86_x64::GetCapBits(&caps0, &caps1, &caps2, &caps3); scriptInterface.SetProperty(settings, "x86_caps[0]", caps0); scriptInterface.SetProperty(settings, "x86_caps[1]", caps1); scriptInterface.SetProperty(settings, "x86_caps[2]", caps2); scriptInterface.SetProperty(settings, "x86_caps[3]", caps3); - - JS::RootedValue tmpVal(rq.cx); - ConvertCaches(scriptInterface, x86_x64::L1I, &tmpVal); - scriptInterface.SetProperty(settings, "x86_icaches", tmpVal); - ConvertCaches(scriptInterface, x86_x64::L1D, &tmpVal); - scriptInterface.SetProperty(settings, "x86_dcaches", tmpVal); - ConvertTLBs(scriptInterface, &tmpVal); - scriptInterface.SetProperty(settings, "x86_tlbs", tmpVal); #endif scriptInterface.SetProperty(settings, "timer_resolution", timer_Resolution()); // The version should be increased for every meaningful change. - const int reportVersion = 13; + const int reportVersion = 14; // Send the same data to the reporting system g_UserReporter.SubmitReport( "hwdetect", reportVersion, scriptInterface.StringifyJSON(&settings, false), scriptInterface.StringifyJSON(&settings, true)); // Run the detection script: JS::RootedValue global(rq.cx, rq.globalValue()); scriptInterface.CallFunctionVoid(global, "RunHardwareDetection", settings); } static void ReportSDL(const ScriptInterface& scriptInterface, JS::HandleValue settings) { SDL_version build, runtime; SDL_VERSION(&build); char version[16]; snprintf(version, ARRAY_SIZE(version), "%d.%d.%d", build.major, build.minor, build.patch); scriptInterface.SetProperty(settings, "sdl_build_version", version); SDL_GetVersion(&runtime); snprintf(version, ARRAY_SIZE(version), "%d.%d.%d", runtime.major, runtime.minor, runtime.patch); scriptInterface.SetProperty(settings, "sdl_runtime_version", version); // This is null in atlas (and further the call triggers an assertion). const char* backend = g_VideoMode.GetWindow() ? GetSDLSubsystem(g_VideoMode.GetWindow()) : "none"; scriptInterface.SetProperty(settings, "sdl_video_backend", backend ? backend : "unknown"); } static void ReportGLLimits(const ScriptInterface& scriptInterface, JS::HandleValue settings) { const char* errstr = "(error)"; #define INTEGER(id) do { \ GLint i = -1; \ glGetIntegerv(GL_##id, &i); \ if (ogl_SquelchError(GL_INVALID_ENUM)) \ scriptInterface.SetProperty(settings, "GL_" #id, errstr); \ else \ scriptInterface.SetProperty(settings, "GL_" #id, i); \ } while (false) #define INTEGER2(id) do { \ GLint i[2] = { -1, -1 }; \ glGetIntegerv(GL_##id, i); \ if (ogl_SquelchError(GL_INVALID_ENUM)) { \ scriptInterface.SetProperty(settings, "GL_" #id "[0]", errstr); \ scriptInterface.SetProperty(settings, "GL_" #id "[1]", errstr); \ } else { \ scriptInterface.SetProperty(settings, "GL_" #id "[0]", i[0]); \ scriptInterface.SetProperty(settings, "GL_" #id "[1]", i[1]); \ } \ } while (false) #define FLOAT(id) do { \ GLfloat f = std::numeric_limits::quiet_NaN(); \ glGetFloatv(GL_##id, &f); \ if (ogl_SquelchError(GL_INVALID_ENUM)) \ scriptInterface.SetProperty(settings, "GL_" #id, errstr); \ else \ scriptInterface.SetProperty(settings, "GL_" #id, f); \ } while (false) #define FLOAT2(id) do { \ GLfloat f[2] = { std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN() }; \ glGetFloatv(GL_##id, f); \ if (ogl_SquelchError(GL_INVALID_ENUM)) { \ scriptInterface.SetProperty(settings, "GL_" #id "[0]", errstr); \ scriptInterface.SetProperty(settings, "GL_" #id "[1]", errstr); \ } else { \ scriptInterface.SetProperty(settings, "GL_" #id "[0]", f[0]); \ scriptInterface.SetProperty(settings, "GL_" #id "[1]", f[1]); \ } \ } while (false) #define STRING(id) do { \ const char* c = (const char*)glGetString(GL_##id); \ if (!c) c = ""; \ if (ogl_SquelchError(GL_INVALID_ENUM)) c = errstr; \ scriptInterface.SetProperty(settings, "GL_" #id, std::string(c)); \ } while (false) #define QUERY(target, pname) do { \ GLint i = -1; \ pglGetQueryivARB(GL_##target, GL_##pname, &i); \ if (ogl_SquelchError(GL_INVALID_ENUM)) \ scriptInterface.SetProperty(settings, "GL_" #target ".GL_" #pname, errstr); \ else \ scriptInterface.SetProperty(settings, "GL_" #target ".GL_" #pname, i); \ } while (false) #define VERTEXPROGRAM(id) do { \ GLint i = -1; \ pglGetProgramivARB(GL_VERTEX_PROGRAM_ARB, GL_##id, &i); \ if (ogl_SquelchError(GL_INVALID_ENUM)) \ scriptInterface.SetProperty(settings, "GL_VERTEX_PROGRAM_ARB.GL_" #id, errstr); \ else \ scriptInterface.SetProperty(settings, "GL_VERTEX_PROGRAM_ARB.GL_" #id, i); \ } while (false) #define FRAGMENTPROGRAM(id) do { \ GLint i = -1; \ pglGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_##id, &i); \ if (ogl_SquelchError(GL_INVALID_ENUM)) \ scriptInterface.SetProperty(settings, "GL_FRAGMENT_PROGRAM_ARB.GL_" #id, errstr); \ else \ scriptInterface.SetProperty(settings, "GL_FRAGMENT_PROGRAM_ARB.GL_" #id, i); \ } while (false) #define BOOL(id) INTEGER(id) ogl_WarnIfError(); // Core OpenGL 1.3: // (We don't bother checking extension strings for anything older than 1.3; // it'll just produce harmless warnings) STRING(VERSION); STRING(VENDOR); STRING(RENDERER); STRING(EXTENSIONS); #if !CONFIG2_GLES INTEGER(MAX_LIGHTS); INTEGER(MAX_CLIP_PLANES); // Skip MAX_COLOR_MATRIX_STACK_DEPTH (only in imaging subset) INTEGER(MAX_MODELVIEW_STACK_DEPTH); INTEGER(MAX_PROJECTION_STACK_DEPTH); INTEGER(MAX_TEXTURE_STACK_DEPTH); #endif INTEGER(SUBPIXEL_BITS); #if !CONFIG2_GLES INTEGER(MAX_3D_TEXTURE_SIZE); #endif INTEGER(MAX_TEXTURE_SIZE); INTEGER(MAX_CUBE_MAP_TEXTURE_SIZE); #if !CONFIG2_GLES INTEGER(MAX_PIXEL_MAP_TABLE); INTEGER(MAX_NAME_STACK_DEPTH); INTEGER(MAX_LIST_NESTING); INTEGER(MAX_EVAL_ORDER); #endif INTEGER2(MAX_VIEWPORT_DIMS); #if !CONFIG2_GLES INTEGER(MAX_ATTRIB_STACK_DEPTH); INTEGER(MAX_CLIENT_ATTRIB_STACK_DEPTH); INTEGER(AUX_BUFFERS); BOOL(RGBA_MODE); BOOL(INDEX_MODE); BOOL(DOUBLEBUFFER); BOOL(STEREO); #endif FLOAT2(ALIASED_POINT_SIZE_RANGE); #if !CONFIG2_GLES FLOAT2(SMOOTH_POINT_SIZE_RANGE); FLOAT(SMOOTH_POINT_SIZE_GRANULARITY); #endif FLOAT2(ALIASED_LINE_WIDTH_RANGE); #if !CONFIG2_GLES FLOAT2(SMOOTH_LINE_WIDTH_RANGE); FLOAT(SMOOTH_LINE_WIDTH_GRANULARITY); // Skip MAX_CONVOLUTION_WIDTH, MAX_CONVOLUTION_HEIGHT (only in imaging subset) INTEGER(MAX_ELEMENTS_INDICES); INTEGER(MAX_ELEMENTS_VERTICES); INTEGER(MAX_TEXTURE_UNITS); #endif INTEGER(SAMPLE_BUFFERS); INTEGER(SAMPLES); // TODO: compressed texture formats INTEGER(RED_BITS); INTEGER(GREEN_BITS); INTEGER(BLUE_BITS); INTEGER(ALPHA_BITS); #if !CONFIG2_GLES INTEGER(INDEX_BITS); #endif INTEGER(DEPTH_BITS); INTEGER(STENCIL_BITS); #if !CONFIG2_GLES INTEGER(ACCUM_RED_BITS); INTEGER(ACCUM_GREEN_BITS); INTEGER(ACCUM_BLUE_BITS); INTEGER(ACCUM_ALPHA_BITS); #endif #if !CONFIG2_GLES // Core OpenGL 2.0 (treated as extensions): if (ogl_HaveExtension("GL_EXT_texture_lod_bias")) { FLOAT(MAX_TEXTURE_LOD_BIAS_EXT); } if (ogl_HaveExtension("GL_ARB_occlusion_query")) { QUERY(SAMPLES_PASSED, QUERY_COUNTER_BITS); } if (ogl_HaveExtension("GL_ARB_shading_language_100")) { STRING(SHADING_LANGUAGE_VERSION_ARB); } if (ogl_HaveExtension("GL_ARB_vertex_shader")) { INTEGER(MAX_VERTEX_ATTRIBS_ARB); INTEGER(MAX_VERTEX_UNIFORM_COMPONENTS_ARB); INTEGER(MAX_VARYING_FLOATS_ARB); INTEGER(MAX_COMBINED_TEXTURE_IMAGE_UNITS_ARB); INTEGER(MAX_VERTEX_TEXTURE_IMAGE_UNITS_ARB); } if (ogl_HaveExtension("GL_ARB_fragment_shader")) { INTEGER(MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB); } if (ogl_HaveExtension("GL_ARB_vertex_shader") || ogl_HaveExtension("GL_ARB_fragment_shader") || ogl_HaveExtension("GL_ARB_vertex_program") || ogl_HaveExtension("GL_ARB_fragment_program")) { INTEGER(MAX_TEXTURE_IMAGE_UNITS_ARB); INTEGER(MAX_TEXTURE_COORDS_ARB); } if (ogl_HaveExtension("GL_ARB_draw_buffers")) { INTEGER(MAX_DRAW_BUFFERS_ARB); } // Core OpenGL 3.0: if (ogl_HaveExtension("GL_EXT_gpu_shader4")) { INTEGER(MIN_PROGRAM_TEXEL_OFFSET); // no _EXT version of these in glext.h INTEGER(MAX_PROGRAM_TEXEL_OFFSET); } if (ogl_HaveExtension("GL_EXT_framebuffer_object")) { INTEGER(MAX_COLOR_ATTACHMENTS_EXT); INTEGER(MAX_RENDERBUFFER_SIZE_EXT); } if (ogl_HaveExtension("GL_EXT_framebuffer_multisample")) { INTEGER(MAX_SAMPLES_EXT); } if (ogl_HaveExtension("GL_EXT_texture_array")) { INTEGER(MAX_ARRAY_TEXTURE_LAYERS_EXT); } if (ogl_HaveExtension("GL_EXT_transform_feedback")) { INTEGER(MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT); INTEGER(MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS_EXT); INTEGER(MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT); } // Other interesting extensions: if (ogl_HaveExtension("GL_EXT_timer_query") || ogl_HaveExtension("GL_ARB_timer_query")) { QUERY(TIME_ELAPSED, QUERY_COUNTER_BITS); } if (ogl_HaveExtension("GL_ARB_timer_query")) { QUERY(TIMESTAMP, QUERY_COUNTER_BITS); } if (ogl_HaveExtension("GL_EXT_texture_filter_anisotropic")) { FLOAT(MAX_TEXTURE_MAX_ANISOTROPY_EXT); } if (ogl_HaveExtension("GL_ARB_texture_rectangle")) { INTEGER(MAX_RECTANGLE_TEXTURE_SIZE_ARB); } if (ogl_HaveExtension("GL_ARB_vertex_program") || ogl_HaveExtension("GL_ARB_fragment_program")) { INTEGER(MAX_PROGRAM_MATRICES_ARB); INTEGER(MAX_PROGRAM_MATRIX_STACK_DEPTH_ARB); } if (ogl_HaveExtension("GL_ARB_vertex_program")) { VERTEXPROGRAM(MAX_PROGRAM_ENV_PARAMETERS_ARB); VERTEXPROGRAM(MAX_PROGRAM_LOCAL_PARAMETERS_ARB); VERTEXPROGRAM(MAX_PROGRAM_INSTRUCTIONS_ARB); VERTEXPROGRAM(MAX_PROGRAM_TEMPORARIES_ARB); VERTEXPROGRAM(MAX_PROGRAM_PARAMETERS_ARB); VERTEXPROGRAM(MAX_PROGRAM_ATTRIBS_ARB); VERTEXPROGRAM(MAX_PROGRAM_ADDRESS_REGISTERS_ARB); VERTEXPROGRAM(MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB); VERTEXPROGRAM(MAX_PROGRAM_NATIVE_TEMPORARIES_ARB); VERTEXPROGRAM(MAX_PROGRAM_NATIVE_PARAMETERS_ARB); VERTEXPROGRAM(MAX_PROGRAM_NATIVE_ATTRIBS_ARB); VERTEXPROGRAM(MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB); if (ogl_HaveExtension("GL_ARB_fragment_program")) { // The spec seems to say these should be supported, but // Mesa complains about them so let's not bother /* VERTEXPROGRAM(MAX_PROGRAM_ALU_INSTRUCTIONS_ARB); VERTEXPROGRAM(MAX_PROGRAM_TEX_INSTRUCTIONS_ARB); VERTEXPROGRAM(MAX_PROGRAM_TEX_INDIRECTIONS_ARB); VERTEXPROGRAM(MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB); VERTEXPROGRAM(MAX_PROGRAM_NATIVE_TEX_INSTRUCTIONS_ARB); VERTEXPROGRAM(MAX_PROGRAM_NATIVE_TEX_INDIRECTIONS_ARB); */ } } if (ogl_HaveExtension("GL_ARB_fragment_program")) { FRAGMENTPROGRAM(MAX_PROGRAM_ENV_PARAMETERS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_LOCAL_PARAMETERS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_INSTRUCTIONS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_ALU_INSTRUCTIONS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_TEX_INSTRUCTIONS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_TEX_INDIRECTIONS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_TEMPORARIES_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_PARAMETERS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_ATTRIBS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_NATIVE_TEX_INSTRUCTIONS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_NATIVE_TEX_INDIRECTIONS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_NATIVE_TEMPORARIES_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_NATIVE_PARAMETERS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_NATIVE_ATTRIBS_ARB); if (ogl_HaveExtension("GL_ARB_vertex_program")) { // The spec seems to say these should be supported, but // Intel drivers on Windows complain about them so let's not bother /* FRAGMENTPROGRAM(MAX_PROGRAM_ADDRESS_REGISTERS_ARB); FRAGMENTPROGRAM(MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB); */ } } if (ogl_HaveExtension("GL_ARB_geometry_shader4")) { INTEGER(MAX_GEOMETRY_TEXTURE_IMAGE_UNITS_ARB); INTEGER(MAX_GEOMETRY_OUTPUT_VERTICES_ARB); INTEGER(MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS_ARB); INTEGER(MAX_GEOMETRY_UNIFORM_COMPONENTS_ARB); INTEGER(MAX_GEOMETRY_VARYING_COMPONENTS_ARB); INTEGER(MAX_VERTEX_VARYING_COMPONENTS_ARB); } #else // CONFIG2_GLES // Core OpenGL ES 2.0: STRING(SHADING_LANGUAGE_VERSION); INTEGER(MAX_VERTEX_ATTRIBS); INTEGER(MAX_VERTEX_UNIFORM_VECTORS); INTEGER(MAX_VARYING_VECTORS); INTEGER(MAX_COMBINED_TEXTURE_IMAGE_UNITS); INTEGER(MAX_VERTEX_TEXTURE_IMAGE_UNITS); INTEGER(MAX_FRAGMENT_UNIFORM_VECTORS); INTEGER(MAX_TEXTURE_IMAGE_UNITS); INTEGER(MAX_RENDERBUFFER_SIZE); #endif // CONFIG2_GLES // TODO: Support OpenGL platforms which don’t use GLX as well. #if defined(SDL_VIDEO_DRIVER_X11) && !CONFIG2_GLES #define GLXQCR_INTEGER(id) do { \ unsigned int i = UINT_MAX; \ if (pglXQueryCurrentRendererIntegerMESA(id, &i)) \ scriptInterface.SetProperty(settings, #id, i); \ } while (false) #define GLXQCR_INTEGER2(id) do { \ unsigned int i[2] = { UINT_MAX, UINT_MAX }; \ if (pglXQueryCurrentRendererIntegerMESA(id, i)) { \ scriptInterface.SetProperty(settings, #id "[0]", i[0]); \ scriptInterface.SetProperty(settings, #id "[1]", i[1]); \ } \ } while (false) #define GLXQCR_INTEGER3(id) do { \ unsigned int i[3] = { UINT_MAX, UINT_MAX, UINT_MAX }; \ if (pglXQueryCurrentRendererIntegerMESA(id, i)) { \ scriptInterface.SetProperty(settings, #id "[0]", i[0]); \ scriptInterface.SetProperty(settings, #id "[1]", i[1]); \ scriptInterface.SetProperty(settings, #id "[2]", i[2]); \ } \ } while (false) #define GLXQCR_STRING(id) do { \ const char* str = pglXQueryCurrentRendererStringMESA(id); \ if (str) \ scriptInterface.SetProperty(settings, #id ".string", str); \ } while (false) SDL_SysWMinfo wminfo; SDL_VERSION(&wminfo.version); const int ret = SDL_GetWindowWMInfo(g_VideoMode.GetWindow(), &wminfo); if (ret && wminfo.subsystem == SDL_SYSWM_X11) { Display* dpy = wminfo.info.x11.display; int scrnum = DefaultScreen(dpy); const char* glxexts = glXQueryExtensionsString(dpy, scrnum); scriptInterface.SetProperty(settings, "glx_extensions", glxexts); if (strstr(glxexts, "GLX_MESA_query_renderer") && pglXQueryCurrentRendererIntegerMESA && pglXQueryCurrentRendererStringMESA) { GLXQCR_INTEGER(GLX_RENDERER_VENDOR_ID_MESA); GLXQCR_INTEGER(GLX_RENDERER_DEVICE_ID_MESA); GLXQCR_INTEGER3(GLX_RENDERER_VERSION_MESA); GLXQCR_INTEGER(GLX_RENDERER_ACCELERATED_MESA); GLXQCR_INTEGER(GLX_RENDERER_VIDEO_MEMORY_MESA); GLXQCR_INTEGER(GLX_RENDERER_UNIFIED_MEMORY_ARCHITECTURE_MESA); GLXQCR_INTEGER(GLX_RENDERER_PREFERRED_PROFILE_MESA); GLXQCR_INTEGER2(GLX_RENDERER_OPENGL_CORE_PROFILE_VERSION_MESA); GLXQCR_INTEGER2(GLX_RENDERER_OPENGL_COMPATIBILITY_PROFILE_VERSION_MESA); GLXQCR_INTEGER2(GLX_RENDERER_OPENGL_ES_PROFILE_VERSION_MESA); GLXQCR_INTEGER2(GLX_RENDERER_OPENGL_ES2_PROFILE_VERSION_MESA); GLXQCR_STRING(GLX_RENDERER_VENDOR_ID_MESA); GLXQCR_STRING(GLX_RENDERER_DEVICE_ID_MESA); } } #endif // SDL_VIDEO_DRIVER_X11 }