Index: source/ps/Profiler2.h =================================================================== --- source/ps/Profiler2.h +++ source/ps/Profiler2.h @@ -81,6 +81,7 @@ #include "lib/timer.h" #include "ps/ThreadUtil.h" +#include #include #include #include @@ -261,11 +262,8 @@ // outside the range Pos1 <= x < Pos0 are safe to use. (Any in that range might // be half-written and corrupted.) (All ranges are modulo BUFFER_SIZE.) // Outside of Write(), these will always be equal. - // - // TODO: does this attempt at synchronisation (plus use of COMPILER_FENCE etc) - // actually work in practice? - u32 m_BufferPos0; - u32 m_BufferPos1; + std::atomic m_BufferPos0; + std::atomic m_BufferPos1; }; public: Index: source/ps/Profiler2.cpp =================================================================== --- source/ps/Profiler2.cpp +++ source/ps/Profiler2.cpp @@ -333,30 +333,27 @@ // See m_BufferPos0 etc for comments on synchronisation u32 size = 1 + itemSize; - u32 start = m_BufferPos0; + u32 start = m_BufferPos0.load(std::memory_order_relaxed); if (start + size > BUFFER_SIZE) { // The remainder of the buffer is too small - fill the rest // with NOPs then start from offset 0, so we don't have to // bother splitting the real item across the end of the buffer - m_BufferPos0 = size; - COMPILER_FENCE; // must write m_BufferPos0 before m_Buffer + m_BufferPos0.store(size, std::memory_order_release); memset(m_Buffer + start, 0, BUFFER_SIZE - start); start = 0; } else { - m_BufferPos0 = start + size; - COMPILER_FENCE; // must write m_BufferPos0 before m_Buffer + m_BufferPos0.store(start + size, std::memory_order_release); } m_Buffer[start] = (u8)type; memcpy(&m_Buffer[start + 1], item, itemSize); - COMPILER_FENCE; // must write m_BufferPos1 after m_Buffer - m_BufferPos1 = start + size; + m_BufferPos1.store(start + size, std::memory_order_release); } void CProfiler2::ThreadStorage::WriteHold(EItem type, const void* item, u32 itemSize) @@ -380,13 +377,11 @@ std::shared_ptr buffer(new u8[BUFFER_SIZE], ArrayDeleter()); - u32 pos1 = m_BufferPos1; - COMPILER_FENCE; // must read m_BufferPos1 before m_Buffer + u32 pos1 = m_BufferPos1.load(std::memory_order_acquire); memcpy(buffer.get(), m_Buffer, BUFFER_SIZE); - COMPILER_FENCE; // must read m_BufferPos0 after m_Buffer - u32 pos0 = m_BufferPos0; + u32 pos0 = m_BufferPos0.load(std::memory_order_acquire); // The range [pos1, pos0) modulo BUFFER_SIZE is invalid, so concatenate the rest of the buffer @@ -700,22 +695,19 @@ else { u32 size = m_HoldBuffers[m_HeldDepth - 1].pos; - u32 start = m_BufferPos0; + u32 start = m_BufferPos0.load(std::memory_order_relaxed); if (start + size > BUFFER_SIZE) { - m_BufferPos0 = size; - COMPILER_FENCE; + m_BufferPos0.store(size, std::memory_order_release); memset(m_Buffer + start, 0, BUFFER_SIZE - start); start = 0; } else { - m_BufferPos0 = start + size; - COMPILER_FENCE; // must write m_BufferPos0 before m_Buffer + m_BufferPos0.store(start + size, std::memory_order_release); } memcpy(&m_Buffer[start], m_HoldBuffers[m_HeldDepth - 1].buffer, size); - COMPILER_FENCE; // must write m_BufferPos1 after m_Buffer - m_BufferPos1 = start + size; + m_BufferPos1.store(start + size, std::memory_order_release); } m_HeldDepth--; }