Index: binaries/data/config/default.cfg =================================================================== --- binaries/data/config/default.cfg +++ binaries/data/config/default.cfg @@ -404,6 +404,9 @@ zoom.in = 5 zoom.out = 4 +[multithreading] +pathfinder = 0 ; How many threads to use for pathfinding. Special values: 0 chooses automatically, 1 de-activates threading entirely. + [chat] timestamp = true ; Show at which time chat messages have been sent Index: binaries/data/mods/public/gui/credits/texts/programming.json =================================================================== --- binaries/data/mods/public/gui/credits/texts/programming.json +++ binaries/data/mods/public/gui/credits/texts/programming.json @@ -125,6 +125,7 @@ {"nick": "kabzerek", "name": "Grzegorz Kabza"}, {"nick": "Kai", "name": "Kai Chen"}, {"name": "Kareem Ergawy"}, + {"nick": "Kuba386", "name":"Jakub Kośmicki"}, {"nick": "kevmo", "name": "Kevin Caffrey"}, {"nick": "kezz", "name": "Graeme Kerry"}, {"nick": "kingadami", "name": "Adam Winsor"}, Index: binaries/data/mods/public/gui/options/options.json =================================================================== --- binaries/data/mods/public/gui/options/options.json +++ binaries/data/mods/public/gui/options/options.json @@ -81,7 +81,16 @@ "label": "Chat Timestamp", "tooltip": "Show time that messages are posted in the lobby, gamesetup and ingame chat.", "config": "chat.timestamp" + }, + { + "type": "number", + "label": "Number of pathfinder threads", + "tooltip": "Number of pathfinder worker threads. Use 0 to choose automatically and 1 to disable threading altogether.", + "config": "pathfinder.threads", + "min": 0, + "max": 64 } + ] }, { Index: source/ps/ThreadFrontier.h =================================================================== --- /dev/null +++ source/ps/ThreadFrontier.h @@ -0,0 +1,73 @@ +/* Copyright (C) 2019 Wildfire Games. + * This file is part of 0 A.D. + * + * 0 A.D. is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * 0 A.D. is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with 0 A.D. If not, see . + */ + +#ifndef INCLUDED_THREADFRONTIER +#define INCLUDED_THREADFRONTIER + +#include +#include + +/* + * A ThreadFrontier is similar to a Barrier in that it synchronizes n threads. + * A frontier has one thread waiting for n other threads to go through the Frontier. + */ +class ThreadFrontier +{ +private: + std::mutex m_Mutex; + std::condition_variable m_ConditionVariable; + int m_Expecting; + int m_Count; +public: + ThreadFrontier() : m_Expecting(0), m_Count(0) {}; + + void Setup(int expect) + { + ENSURE(m_Expecting == 0 && m_Count == 0); + std::lock_guard lock(m_Mutex); + m_Expecting = expect; + // The frontier is open, call Reset() to close it. + m_Count = m_Expecting; + } + + void Reset() + { + m_Count = 0; + } + + void Watch() + { + std::unique_lock lock(m_Mutex); + // If all threads have already gone through the frontier, we can stop watching right away. + if (m_Count == m_Expecting) + return; + m_ConditionVariable.wait(lock, [this] { return m_Count == m_Expecting; }); + } + + void GoThrough() + { + // Acquire the lock: we must be sure that the watching thread is either not yet in Watch() + // or is fully in the waiting state. Without this mutex lock, we could notify when the watching thread + // is in wait() but not yet in the waiting state, thus deadlocking. + std::lock_guard lock(m_Mutex); + // Notify the watching thread if we are the last to go through. + if (++m_Count == m_Expecting) + m_ConditionVariable.notify_one(); + } +}; + +#endif // INCLUDED_THREADFRONTIER Index: source/ps/ThreadUtil.h =================================================================== --- source/ps/ThreadUtil.h +++ source/ps/ThreadUtil.h @@ -33,6 +33,11 @@ */ void SetMainThread(); +/** + * Returns the number of threads we want for the pathfinder. + */ +u32 GetNumberOfPathfindingThreads(); + } #endif // INCLUDED_THREADUTIL Index: source/ps/ThreadUtil.cpp =================================================================== --- source/ps/ThreadUtil.cpp +++ source/ps/ThreadUtil.cpp @@ -20,6 +20,8 @@ #include #include "ThreadUtil.h" +#include "ConfigDB.h" +#include "tools/atlas/GameInterface/GameLoop.h" static bool g_MainThreadSet; static std::thread::id g_MainThread; @@ -39,3 +41,22 @@ g_MainThread = std::this_thread::get_id(); g_MainThreadSet = true; } + +u32 ThreadUtil::GetNumberOfPathfindingThreads() +{ + // TODO BEFORE COMMIT ID SAY: atlas threading is de-activated because the vertex pathfinder uses the obstruction manager's obstructions. + // this can be changed in-betweenturns in Atlas. We should probably mutex it in the pathfinder to make sure it's not changed. + if ((g_AtlasGameLoop && g_AtlasGameLoop->running)) + return 1; + + u32 wantedThreads = 0; + + if (CConfigDB::IsInitialised()) + CFG_GET_VAL("multithreading.pathfinder", wantedThreads); + + // By default use 2 * (# of cores - 1) cores to benefit from hardware load-balancing as ours is very simple. + if (wantedThreads == 0) + return (std::thread::hardware_concurrency() - 1) * 2; + + return wantedThreads; +} Index: source/simulation2/components/CCmpPathfinder.cpp =================================================================== --- source/simulation2/components/CCmpPathfinder.cpp +++ source/simulation2/components/CCmpPathfinder.cpp @@ -27,6 +27,7 @@ #include "ps/CLogger.h" #include "ps/CStr.h" #include "ps/Profile.h" +#include "ps/ThreadUtil.h" #include "ps/XML/Xeromyces.h" #include "renderer/Scene.h" #include "simulation2/MessageTypes.h" @@ -68,21 +69,6 @@ CParamNode externalParamNode; CParamNode::LoadXML(externalParamNode, L"simulation/data/pathfinder.xml", "pathfinder"); - // Previously all move commands during a turn were - // queued up and processed asynchronously at the start - // of the next turn. Now we are processing queued up - // events several times duing the turn. This improves - // responsiveness and units move more smoothly especially. - // when in formation. There is still a call at the - // beginning of a turn to process all outstanding moves - - // this will handle any moves above the MaxSameTurnMoves - // threshold. - // - // TODO - The moves processed at the beginning of the - // turn do not count against the maximum moves per turn - // currently. The thinking is that this will eventually - // happen in another thread. Either way this probably - // will require some adjustment and rethinking. const CParamNode pathingSettings = externalParamNode.GetChild("Pathfinder"); m_MaxSameTurnMoves = (u16)pathingSettings.GetChild("MaxSameTurnMoves").ToInt(); @@ -97,13 +83,35 @@ m_PassClassMasks[name] = mask; } - m_Workers.emplace_back(PathfinderWorker{}); -} + u32 wantedThreads = ThreadUtil::GetNumberOfPathfindingThreads(); + + LOGMESSAGE("Initialising %i threads for pathfinding.", wantedThreads); + + // The worker thread will only call std::thread if we actually have > 1 threads, otherwise we're running in the main thread. + if (wantedThreads <= 1) // <= 1 as the above computations returns 0 for one core. + { + m_UseThreading = false; + m_Workers.emplace_back(); + } + else + { + m_PathfinderFrontier.Setup(wantedThreads); + m_UseThreading = true; + // We cannot move workers or threads will run on deleted instances. + m_Workers.resize(wantedThreads); + for (size_t i = 0; i < wantedThreads; ++i) + m_Workers[i].Start(*this, i); + } +}; CCmpPathfinder::~CCmpPathfinder() {}; void CCmpPathfinder::Deinit() { + for (PathfinderWorker& worker : m_Workers) + worker.PrepareToKill(); + + m_PathfinderConditionVariable.notify_all(); m_Workers.clear(); SetDebugOverlay(false); // cleans up memory @@ -703,7 +711,27 @@ // Async pathfinder workers -CCmpPathfinder::PathfinderWorker::PathfinderWorker() {} +CCmpPathfinder::PathfinderWorker::PathfinderWorker() : m_Computing(false), m_Kill(false) +{ +} + +CCmpPathfinder::PathfinderWorker::~PathfinderWorker() +{ + if (m_Thread.joinable()) + m_Thread.join(); +} + +void CCmpPathfinder::PathfinderWorker::Start(const CCmpPathfinder& pathfinder, size_t index) +{ + if (pathfinder.m_UseThreading) + m_Thread = std::thread(&CCmpPathfinder::PathfinderWorker::InitThread, this, std::ref(pathfinder), index); +} + +void CCmpPathfinder::PathfinderWorker::InitThread(const CCmpPathfinder& pathfinder, size_t index) +{ + g_Profiler2.RegisterCurrentThread("Pathfinder thread " + std::to_string(index)); + WaitForWork(pathfinder); +} template void CCmpPathfinder::PathfinderWorker::PushRequests(std::vector&, ssize_t) @@ -721,6 +749,32 @@ m_ShortRequests.insert(m_ShortRequests.end(), std::make_move_iterator(from.end() - amount), std::make_move_iterator(from.end())); } +void CCmpPathfinder::PathfinderWorker::PrepareToKill() +{ + m_Kill = true; +} + +void CCmpPathfinder::PathfinderWorker::WaitForWork(const CCmpPathfinder& pathfinder) +{ + while (true) + { + { + std::unique_lock lock(pathfinder.m_PathfinderMutex); + pathfinder.m_PathfinderConditionVariable.wait(lock, [this] { return m_Computing || m_Kill; }); + } + + if (m_Kill) + return; + Work(pathfinder); + + // We must be the ones setting our m_Computing to false. + ENSURE(m_Computing); + m_Computing = false; + + pathfinder.m_PathfinderFrontier.GoThrough(); + } +} + void CCmpPathfinder::PathfinderWorker::Work(const CCmpPathfinder& pathfinder) { while (!m_LongRequests.empty()) @@ -773,6 +827,11 @@ { PROFILE2("FetchAsyncResults"); + // TODO maybe: a possible improvement here would be to push results from workers whenever they are done, and not when all are done. + + // Wait until all threads have finished computing. + m_PathfinderFrontier.Watch(); + // WARNING: the order in which moves are pulled must be consistent when using 1 or n workers. // We fetch in the same order we inserted in, but we push moves backwards, so this works. std::vector results; @@ -798,14 +857,32 @@ void CCmpPathfinder::StartProcessingMoves(bool useMax) { + // We will send new path requests to worker threads, + // trying to balance the workload somewhat + // and then notify them they can start working. + // To avoid data races, we can only push jobs when workers are not computing them, + // So FetchAsyncResultsAndSendMessages must have been called first. + std::vector longRequests = PopMovesToProcess(m_LongPathRequests, useMax, m_MaxSameTurnMoves); std::vector shortRequests = PopMovesToProcess(m_ShortPathRequests, useMax, m_MaxSameTurnMoves - longRequests.size()); PushRequestsToWorkers(longRequests); PushRequestsToWorkers(shortRequests); - for (PathfinderWorker& worker : m_Workers) - worker.Work(*this); + m_PathfinderFrontier.Reset(); + + if (m_UseThreading) + { + for (PathfinderWorker& worker : m_Workers) + { + // Mark as computing to unblock. + ENSURE(!worker.m_Computing); + worker.m_Computing = true; + } + m_PathfinderConditionVariable.notify_all(); + } + else + m_Workers.back().Work(*this); } template @@ -843,6 +920,10 @@ // In this instance, work is distributed in a strict LIFO order, effectively reversing tickets. for (PathfinderWorker& worker : m_Workers) { + // Prevent pushing requests when the worker is computing. + // Call FetchAsyncResultsAndSendMessages() before pushing new requests. + ENSURE(!worker.m_Computing); + amount = std::min(amount, from.size()); // Since we are rounding up before, ensure we aren't pushing beyond the end. worker.PushRequests(from, amount); from.erase(from.end() - amount, from.end()); Index: source/simulation2/components/CCmpPathfinder_Common.h =================================================================== --- source/simulation2/components/CCmpPathfinder_Common.h +++ source/simulation2/components/CCmpPathfinder_Common.h @@ -35,9 +35,11 @@ #include "graphics/Terrain.h" #include "maths/MathUtil.h" #include "ps/CLogger.h" +#include "ps/ThreadFrontier.h" #include "renderer/TerrainOverlay.h" #include "simulation2/components/ICmpObstructionManager.h" +#include class HierarchicalPathfinder; class LongPathfinder; @@ -64,11 +66,30 @@ friend CCmpPathfinder; public: PathfinderWorker(); + // Implement a noexcept move constructor for std::vector that actually does nothing. + PathfinderWorker(PathfinderWorker&&) noexcept + { + ENSURE(!m_Thread.joinable()); + } + + ~PathfinderWorker(); + + // Create the std::thread and call InitThread + void Start(const CCmpPathfinder& pathfinder, size_t index); + + void PrepareToKill(); + + // Will loop until a conditional_variable notifies us, and call Work(). + void WaitForWork(const CCmpPathfinder& pathfinder); // Process path requests, checking if we should stop before each new one. + // Should be callable both synchronously and asynchronously. void Work(const CCmpPathfinder& pathfinder); private: + // Takes care of what needs to be called to initialise the thread before calling WaitForWork(). + void InitThread(const CCmpPathfinder& pathfinder, size_t index); + // Insert requests in m_[Long/Short]Requests depending on from. // This could be removed when we may use if-constexpr in CCmpPathfinder::PushRequestsToWorkers template @@ -77,6 +98,11 @@ // Stores our results, the main thread will fetch this. std::vector m_Results; + std::thread m_Thread; + + std::atomic m_Kill; + std::atomic m_Computing; + std::vector m_LongRequests; std::vector m_ShortRequests; }; @@ -128,8 +154,12 @@ std::unique_ptr m_PathfinderHier; std::unique_ptr m_LongPathfinder; - // Workers process pathing requests. + // Worker process pathing requests. std::vector m_Workers; + bool m_UseThreading = false; + mutable std::mutex m_PathfinderMutex; + mutable std::condition_variable m_PathfinderConditionVariable; + mutable ThreadFrontier m_PathfinderFrontier; AtlasOverlay* m_AtlasOverlay; Index: source/simulation2/helpers/LongPathfinder.h =================================================================== --- source/simulation2/helpers/LongPathfinder.h +++ source/simulation2/helpers/LongPathfinder.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2017 Wildfire Games. +/* Copyright (C) 2019 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -18,6 +18,7 @@ #ifndef INCLUDED_LONGPATHFINDER #define INCLUDED_LONGPATHFINDER +#include #include "Pathfinding.h" #include "graphics/Overlay.h" @@ -222,15 +223,14 @@ u16 m_GridSize; // Debugging - output from last pathfind operation. - // mutable as making these const would require a lot of boilerplate code - // and they do not change the behavioural const-ness of the pathfinder. - mutable LongOverlay* m_DebugOverlay; - mutable PathfindTileGrid* m_DebugGrid; - mutable u32 m_DebugSteps; - mutable double m_DebugTime; - mutable PathGoal m_DebugGoal; - mutable WaypointPath* m_DebugPath; - mutable pass_class_t m_DebugPassClass; + // Static and thread-local - we don't support threading debug code. + static thread_local LongOverlay* m_DebugOverlay; + static thread_local PathfindTileGrid* m_DebugGrid; + static thread_local u32 m_DebugSteps; + static thread_local double m_DebugTime; + static thread_local PathGoal m_DebugGoal; + static thread_local WaypointPath* m_DebugPath; + static thread_local pass_class_t m_DebugPassClass; private: PathCost CalculateHeuristic(int i, int j, int iGoal, int jGoal) const; @@ -272,11 +272,8 @@ void GenerateSpecialMap(pass_class_t passClass, std::vector excludedRegions); bool m_UseJPSCache; - // Mutable may be used here as caching does not change the external const-ness of the Long Range pathfinder. - // This is thread-safe as it is order independent (no change in the output of the function for a given set of params). - // Obviously, this means that the cache should actually be a cache and not return different results - // from what would happen if things hadn't been cached. - mutable std::map > m_JumpPointCache; + + static thread_local std::map > m_JumpPointCache; }; /** Index: source/simulation2/helpers/LongPathfinder.cpp =================================================================== --- source/simulation2/helpers/LongPathfinder.cpp +++ source/simulation2/helpers/LongPathfinder.cpp @@ -25,6 +25,15 @@ #include "Geometry.h" #include "HierarchicalPathfinder.h" +thread_local LongOverlay* LongPathfinder::m_DebugOverlay; +thread_local PathfindTileGrid* LongPathfinder::m_DebugGrid; +thread_local u32 LongPathfinder::m_DebugSteps; +thread_local double LongPathfinder::m_DebugTime; +thread_local PathGoal LongPathfinder::m_DebugGoal; +thread_local WaypointPath* LongPathfinder::m_DebugPath; +thread_local pass_class_t LongPathfinder::m_DebugPassClass; +thread_local std::map > LongPathfinder::m_JumpPointCache; + /** * Jump point cache. * @@ -373,9 +382,11 @@ LongPathfinder::LongPathfinder() : m_UseJPSCache(false), - m_Grid(NULL), m_GridSize(0), - m_DebugOverlay(NULL), m_DebugGrid(NULL), m_DebugPath(NULL) + m_Grid(NULL), m_GridSize(0) { + m_DebugOverlay = nullptr; + m_DebugGrid = nullptr; + m_DebugPath = nullptr; } LongPathfinder::~LongPathfinder() Index: source/simulation2/helpers/VertexPathfinder.h =================================================================== --- source/simulation2/helpers/VertexPathfinder.h +++ source/simulation2/helpers/VertexPathfinder.h @@ -96,25 +96,25 @@ const u16& m_MapSize; Grid* const & m_TerrainOnlyGrid; - std::atomic m_DebugOverlay; - mutable std::vector m_DebugOverlayShortPathLines; + bool m_DebugOverlay; + static thread_local std::vector m_DebugOverlayShortPathLines; + static thread_local std::mutex m_DebugMutex; // These vectors are expensive to recreate on every call, so we cache them here. - // They are made mutable to allow using them in the otherwise const ComputeShortPath. - mutable std::vector m_EdgesUnaligned; - mutable std::vector m_EdgesLeft; - mutable std::vector m_EdgesRight; - mutable std::vector m_EdgesBottom; - mutable std::vector m_EdgesTop; + static thread_local std::vector m_EdgesUnaligned; + static thread_local std::vector m_EdgesLeft; + static thread_local std::vector m_EdgesRight; + static thread_local std::vector m_EdgesBottom; + static thread_local std::vector m_EdgesTop; // List of obstruction vertexes (plus start/end points); we'll try to find paths through // the graph defined by these vertexes. - mutable std::vector m_Vertexes; + static thread_local std::vector m_Vertexes; // List of collision edges - paths must never cross these. // (Edges are one-sided so intersections are fine in one direction, but not the other direction.) - mutable std::vector m_Edges; - mutable std::vector m_EdgeSquares; // Axis-aligned squares; equivalent to 4 edges. + static thread_local std::vector m_Edges; + static thread_local std::vector m_EdgeSquares; // Axis-aligned squares; equivalent to 4 edges. }; #endif // INCLUDED_VERTEXPATHFINDER Index: source/simulation2/helpers/VertexPathfinder.cpp =================================================================== --- source/simulation2/helpers/VertexPathfinder.cpp +++ source/simulation2/helpers/VertexPathfinder.cpp @@ -42,6 +42,18 @@ #include "simulation2/helpers/Render.h" #include "simulation2/system/SimContext.h" + +thread_local std::vector VertexPathfinder::m_DebugOverlayShortPathLines; +thread_local std::mutex VertexPathfinder::m_DebugMutex; +thread_local std::vector VertexPathfinder::m_EdgesUnaligned; +thread_local std::vector VertexPathfinder::m_EdgesLeft; +thread_local std::vector VertexPathfinder::m_EdgesRight; +thread_local std::vector VertexPathfinder::m_EdgesBottom; +thread_local std::vector VertexPathfinder::m_EdgesTop; +thread_local std::vector VertexPathfinder::m_Vertexes; +thread_local std::vector VertexPathfinder::m_Edges; +thread_local std::vector VertexPathfinder::m_EdgeSquares; + /* Quadrant optimisation: * (loosely based on GPG2 "Optimizing Points-of-Visibility Pathfinding") * @@ -838,6 +850,7 @@ { if (!m_DebugOverlay) return; + std::lock_guard lock(m_DebugMutex); m_DebugOverlayShortPathLines.clear(); @@ -871,6 +884,7 @@ { if (!m_DebugOverlay) return; + std::lock_guard lock(m_DebugMutex); #define PUSH_POINT(p) STMT(xz.push_back(p.X.ToFloat()); xz.push_back(p.Y.ToFloat())) // Render the vertexes as little Pac-Man shapes to indicate quadrant direction @@ -968,6 +982,7 @@ if (!m_DebugOverlay) return; + std::lock_guard lock(m_DebugMutex); for (size_t i = 0; i < m_DebugOverlayShortPathLines.size(); ++i) collector.Submit(&m_DebugOverlayShortPathLines[i]); }