Changeset View
Changeset View
Standalone View
Standalone View
source/lib/sysdep/arch/x86_x64/x86_x64.cpp
Show All 27 Lines | |||||
#include "lib/sysdep/arch/x86_x64/x86_x64.h" | #include "lib/sysdep/arch/x86_x64/x86_x64.h" | ||||
#include <cstring> | #include <cstring> | ||||
#include <cstdio> | #include <cstdio> | ||||
#include <vector> | #include <vector> | ||||
#include <set> | #include <set> | ||||
#include <algorithm> | #include <algorithm> | ||||
#include "lib/posix/posix_pthread.h" | |||||
#include "lib/bits.h" | #include "lib/bits.h" | ||||
#include "lib/timer.h" | #include "lib/timer.h" | ||||
#include "lib/module_init.h" | #include "lib/module_init.h" | ||||
#include "lib/sysdep/cpu.h" | #include "lib/sysdep/cpu.h" | ||||
#include "lib/sysdep/os_cpu.h" | #include "lib/sysdep/os_cpu.h" | ||||
#if MSC_VERSION | #if MSC_VERSION | ||||
# include <intrin.h> // __rdtsc | # include <intrin.h> // __rdtsc | ||||
▲ Show 20 Lines • Show All 353 Lines • ▼ Show 20 Lines | #if MSC_VERSION | ||||
__debugbreak(); | __debugbreak(); | ||||
#elif GCC_VERSION | #elif GCC_VERSION | ||||
// note: this probably isn't necessary, since unix_debug_break | // note: this probably isn't necessary, since unix_debug_break | ||||
// (SIGTRAP) is most probably available if GCC_VERSION. | // (SIGTRAP) is most probably available if GCC_VERSION. | ||||
// we include it for completeness, though. | // we include it for completeness, though. | ||||
__asm__ __volatile__ ("int $3"); | __asm__ __volatile__ ("int $3"); | ||||
#endif | #endif | ||||
} | } | ||||
//----------------------------------------------------------------------------- | |||||
// CPU frequency | |||||
// set scheduling priority and restore when going out of scope. | |||||
class ScopedSetPriority | |||||
{ | |||||
public: | |||||
ScopedSetPriority(int newPriority) | |||||
{ | |||||
// get current scheduling policy and priority | |||||
pthread_getschedparam(pthread_self(), &m_oldPolicy, &m_oldParam); | |||||
// set new priority | |||||
sched_param newParam = {0}; | |||||
newParam.sched_priority = newPriority; | |||||
pthread_setschedparam(pthread_self(), SCHED_FIFO, &newParam); | |||||
} | |||||
~ScopedSetPriority() | |||||
{ | |||||
// restore previous policy and priority. | |||||
pthread_setschedparam(pthread_self(), m_oldPolicy, &m_oldParam); | |||||
} | |||||
private: | |||||
int m_oldPolicy; | |||||
sched_param m_oldParam; | |||||
}; | |||||
// note: this function uses timer.cpp!timer_Time, which is implemented via | |||||
// whrt.cpp on Windows. | |||||
double ClockFrequency() | |||||
{ | |||||
// if the TSC isn't available, there's really no good way to count the | |||||
// actual CPU clocks per known time interval, so bail. | |||||
// note: loop iterations ("bogomips") are not a reliable measure due | |||||
// to differing IPC and compiler optimizations. | |||||
if(!Cap(x86_x64::CAP_TSC)) | |||||
return -1.0; // impossible value | |||||
// increase priority to reduce interference while measuring. | |||||
const int priority = sched_get_priority_max(SCHED_FIFO)-1; | |||||
ScopedSetPriority ssp(priority); | |||||
// note: no need to "warm up" cpuid - it will already have been | |||||
// called several times by the time this code is reached. | |||||
// (background: it's used in rdtsc() to serialize instruction flow; | |||||
// the first call is documented to be slower on Intel CPUs) | |||||
size_t numSamples = 16; | |||||
// if clock is low-res, do less samples so it doesn't take too long. | |||||
// balance measuring time (~ 10 ms) and accuracy (< 0.1% error - | |||||
// ok for using the TSC as a time reference) | |||||
if(timer_Resolution() >= 1e-3) | |||||
numSamples = 8; | |||||
std::vector<double> samples(numSamples); | |||||
for(size_t i = 0; i < numSamples; i++) | |||||
{ | |||||
double dt; | |||||
i64 dc; // (i64 instead of u64 for faster conversion to double) | |||||
// count # of clocks in max{1 tick, 1 ms}: | |||||
// .. wait for start of tick. | |||||
const double t0 = timer_Time(); | |||||
u64 c1; double t1; | |||||
do | |||||
{ | |||||
// note: timer_Time effectively has a long delay (up to 5 us) | |||||
// before returning the time. we call it before rdtsc to | |||||
// minimize the delay between actually sampling time / TSC, | |||||
// thus decreasing the chance for interference. | |||||
// (if unavoidable background activity, e.g. interrupts, | |||||
// delays the second reading, inaccuracy is introduced). | |||||
t1 = timer_Time(); | |||||
c1 = rdtsc(); | |||||
} | |||||
while(t1 == t0); | |||||
// .. wait until start of next tick and at least 1 ms elapsed. | |||||
do | |||||
{ | |||||
const double t2 = timer_Time(); | |||||
const u64 c2 = rdtsc(); | |||||
dc = (i64)(c2 - c1); | |||||
dt = t2 - t1; | |||||
} | |||||
while(dt < 1e-3); | |||||
// .. freq = (delta_clocks) / (delta_seconds); | |||||
// rdtsc/timer overhead is negligible. | |||||
const double freq = dc / dt; | |||||
samples[i] = freq; | |||||
} | |||||
std::sort(samples.begin(), samples.end()); | |||||
// median filter (remove upper and lower 25% and average the rest). | |||||
// note: don't just take the lowest value! it could conceivably be | |||||
// too low, if background processing delays reading c1 (see above). | |||||
double sum = 0.0; | |||||
const size_t lo = numSamples/4, hi = 3*numSamples/4; | |||||
for(size_t i = lo; i < hi; i++) | |||||
sum += samples[i]; | |||||
const double clockFrequency = sum / (hi-lo); | |||||
return clockFrequency; | |||||
} | |||||
} // namespace x86_x64 | } // namespace x86_x64 | ||||
const char* cpu_IdentifierString() | const char* cpu_IdentifierString() | ||||
{ | { | ||||
return x86_x64::IdentifierString(); | return x86_x64::IdentifierString(); | ||||
} | } |
Wildfire Games · Phabricator