Index: binaries/data/config/default.cfg =================================================================== --- binaries/data/config/default.cfg +++ binaries/data/config/default.cfg @@ -404,6 +404,9 @@ zoom.in = 5 zoom.out = 4 +[multithreading] +pathfinder = 0 ; How many threads to use for pathfinding. Special values: 0 chooses automatically, 1 de-activates threading entirely. + [chat] timestamp = true ; Show at which time chat messages have been sent Index: binaries/data/mods/public/gui/credits/texts/programming.json =================================================================== --- binaries/data/mods/public/gui/credits/texts/programming.json +++ binaries/data/mods/public/gui/credits/texts/programming.json @@ -124,6 +124,7 @@ {"nick": "kabzerek", "name": "Grzegorz Kabza"}, {"nick": "Kai", "name": "Kai Chen"}, {"name": "Kareem Ergawy"}, + {"nick": "Kuba386", "name":"Jakub Kośmicki"}, {"nick": "kevmo", "name": "Kevin Caffrey"}, {"nick": "kezz", "name": "Graeme Kerry"}, {"nick": "kingadami", "name": "Adam Winsor"}, Index: binaries/data/mods/public/gui/options/options.json =================================================================== --- binaries/data/mods/public/gui/options/options.json +++ binaries/data/mods/public/gui/options/options.json @@ -81,7 +81,16 @@ "label": "Chat Timestamp", "tooltip": "Show time that messages are posted in the lobby, gamesetup and ingame chat.", "config": "chat.timestamp" + }, + { + "type": "number", + "label": "Number of pathfinder threads", + "tooltip": "Number of pathfinder worker threads. Use 0 to choose automatically and 1 to disable threading altogether.", + "config": "pathfinder.threads", + "min": 0, + "max": 64 } + ] }, { Index: source/ps/ThreadUtil.h =================================================================== --- source/ps/ThreadUtil.h +++ source/ps/ThreadUtil.h @@ -20,6 +20,10 @@ #include "lib/posix/posix_pthread.h" +#include +#include +#include + #ifdef DEBUG_LOCKS #define LOCK_MUTEX(_mutex) STMT( \ @@ -96,6 +100,54 @@ pthread_mutex_t* m_Mutex; }; +/* + * A Frontier is similar to a Barrier in that it synchronizes n threads. + * A frontier has one thread waiting for n other threads to go through the Frontier. + */ +class Frontier +{ +private: + std::mutex m_Mutex; + std::condition_variable m_ConditionVariable; + int m_Expecting; + int m_Count; +public: + Frontier() : m_Expecting(0), m_Count(0) {}; + + void Setup(int expect) + { + ENSURE(m_Expecting == 0 && m_Count == 0); + std::lock_guard lock(m_Mutex); + m_Expecting = expect; + // The frontier is open, call Reset() to close it. + m_Count = m_Expecting; + } + + void Reset() + { + m_Count = 0; + } + + void Watch() + { + std::unique_lock lock(m_Mutex); + // If all threads have already gone through the frontier, we can stop watching right away. + if (m_Count == m_Expecting) + return; + m_ConditionVariable.wait(lock, [this] { return m_Count == m_Expecting; }); + } + + void GoThrough() + { + // Acquire the lock: we must be sure that the watching thread is either not yet in Watch() + // or is fully in the waiting state. Without this mutex lock, we could notify when the watching thread + // is in wait() but not yet in the waiting state, thus deadlocking. + std::lock_guard lock(m_Mutex); + // Notify the watching thread if we are the last to go through. + if (++m_Count == m_Expecting) + m_ConditionVariable.notify_one(); + } +}; namespace ThreadUtil { @@ -112,6 +164,10 @@ */ void SetMainThread(); +/** + * Returns the number of threads we want for the pathfinder. + */ +u32 GetNumberOfPathfindingThreads(); } #endif // INCLUDED_THREADUTIL Index: source/ps/ThreadUtil.cpp =================================================================== --- source/ps/ThreadUtil.cpp +++ source/ps/ThreadUtil.cpp @@ -17,7 +17,11 @@ #include "precompiled.h" +#include + #include "ThreadUtil.h" +#include "ConfigDB.h" +#include "tools/atlas/GameInterface/GameLoop.h" static bool g_MainThreadSet; static pthread_t g_MainThread; @@ -37,3 +41,22 @@ g_MainThread = pthread_self(); g_MainThreadSet = true; } + +u32 ThreadUtil::GetNumberOfPathfindingThreads() +{ + // TODO BEFORE COMMIT ID SAY: atlas threading is de-activated because the vertex pathfinder uses the obstruction manager's obstructions. + // this can be changed in-betweenturns in Atlas. We should probably mutex it in the pathfinder to make sure it's not changed. + if ((g_AtlasGameLoop && g_AtlasGameLoop->running)) + return 1; + + u32 wantedThreads = 0; + + if (CConfigDB::IsInitialised()) + CFG_GET_VAL("multithreading.pathfinder", wantedThreads); + + // By default use 2 * (# of cores - 1) cores to benefit from hardware load-balancing as ours is very simple. + if (wantedThreads == 0) + return (std::thread::hardware_concurrency() - 1) * 2; + + return wantedThreads; +} Index: source/simulation2/Simulation2.cpp =================================================================== --- source/simulation2/Simulation2.cpp +++ source/simulation2/Simulation2.cpp @@ -540,8 +540,8 @@ CmpPtr cmpPathfinder(simContext, SYSTEM_ENTITY); if (cmpPathfinder) { + cmpPathfinder->FetchAsyncResultsAndSendMessages(); cmpPathfinder->UpdateGrid(); - cmpPathfinder->FinishAsyncRequests(); } // Push AI commands onto the queue before we use them @@ -555,14 +555,17 @@ // Process newly generated move commands so the UI feels snappy if (cmpPathfinder) - cmpPathfinder->ProcessSameTurnMoves(); - + { + cmpPathfinder->StartProcessingMoves(true); + cmpPathfinder->FetchAsyncResultsAndSendMessages(); + } // Send all the update phases { PROFILE2("Sim - Update"); CMessageUpdate msgUpdate(turnLengthFixed); componentManager.BroadcastMessage(msgUpdate); } + { CMessageUpdate_MotionFormation msgUpdate(turnLengthFixed); componentManager.BroadcastMessage(msgUpdate); @@ -570,7 +573,10 @@ // Process move commands for formations (group proxy) if (cmpPathfinder) - cmpPathfinder->ProcessSameTurnMoves(); + { + cmpPathfinder->StartProcessingMoves(true); + cmpPathfinder->FetchAsyncResultsAndSendMessages(); + } { PROFILE2("Sim - Motion Unit"); @@ -583,12 +589,12 @@ componentManager.BroadcastMessage(msgUpdate); } - // Process moves resulting from group proxy movement (unit needs to catch up or realign) and any others - if (cmpPathfinder) - cmpPathfinder->ProcessSameTurnMoves(); - // Clean up any entities destroyed during the simulation update componentManager.FlushDestroyedComponents(); + + // Process all remaining moves + if (cmpPathfinder) + cmpPathfinder->StartProcessingMoves(); } void CSimulation2Impl::Interpolate(float simFrameLength, float frameOffset, float realFrameLength) Index: source/simulation2/components/CCmpPathfinder.cpp =================================================================== --- source/simulation2/components/CCmpPathfinder.cpp +++ source/simulation2/components/CCmpPathfinder.cpp @@ -51,10 +51,6 @@ m_AtlasOverlay = NULL; - m_SameTurnMovesCount = 0; - - m_VertexPathfinder = std::unique_ptr(new VertexPathfinder(m_MapSize, m_TerrainOnlyGrid)); - // Register Relax NG validator CXeromyces::AddValidator(g_VFS, "pathfinder", "simulation/data/pathfinder.rng"); @@ -64,21 +60,6 @@ CParamNode externalParamNode; CParamNode::LoadXML(externalParamNode, L"simulation/data/pathfinder.xml", "pathfinder"); - // Previously all move commands during a turn were - // queued up and processed asynchronously at the start - // of the next turn. Now we are processing queued up - // events several times duing the turn. This improves - // responsiveness and units move more smoothly especially. - // when in formation. There is still a call at the - // beginning of a turn to process all outstanding moves - - // this will handle any moves above the MaxSameTurnMoves - // threshold. - // - // TODO - The moves processed at the beginning of the - // turn do not count against the maximum moves per turn - // currently. The thinking is that this will eventually - // happen in another thread. Either way this probably - // will require some adjustment and rethinking. const CParamNode pathingSettings = externalParamNode.GetChild("Pathfinder"); m_MaxSameTurnMoves = (u16)pathingSettings.GetChild("MaxSameTurnMoves").ToInt(); @@ -92,10 +73,35 @@ m_PassClasses.push_back(PathfinderPassability(mask, it->second)); m_PassClassMasks[name] = mask; } -} + + + u32 wantedThreads = ThreadUtil::GetNumberOfPathfindingThreads(); + + LOGMESSAGE("Initialising %i threads for pathfinding.", wantedThreads); + + // The worker thread will only call std::thread if we actually have > 1 threads, otherwise we're running in the main thread. + if (wantedThreads <= 1) // <= 1 as the above computations returns 0 for one core. + { + m_UseThreading = false; + m_Workers.emplace_back(new PathfinderWorker(*this, m_Workers.size())); + } + else + { + m_PathfinderFrontier.Setup(wantedThreads); + m_UseThreading = true; + for (size_t i = 0; i < wantedThreads; ++i) + m_Workers.emplace_back(new PathfinderWorker(*this, m_Workers.size())); + } +}; void CCmpPathfinder::Deinit() { + for (std::unique_ptr& worker : m_Workers) + worker->PrepareToKill(); + m_PathfinderConditionVariable.notify_all(); + + m_Workers.clear(); + SetDebugOverlay(false); // cleans up memory SAFE_DELETE(m_AtlasOverlay); @@ -141,7 +147,6 @@ SerializeVector()(serialize, "long requests", m_LongPathRequests); SerializeVector()(serialize, "short requests", m_ShortPathRequests); serialize.NumberU32_Unbounded("next ticket", m_NextAsyncTicket); - serialize.NumberU16_Unbounded("same turn moves count", m_SameTurnMovesCount); serialize.NumberU16_Unbounded("map size", m_MapSize); } @@ -176,15 +181,13 @@ m_TerrainDirty = true; UpdateGrid(); break; - case MT_TurnStart: - m_SameTurnMovesCount = 0; - break; } } void CCmpPathfinder::RenderSubmit(SceneCollector& collector) { - m_VertexPathfinder->RenderSubmit(collector); + for (std::unique_ptr& worker : m_Workers) + worker->m_VertexPathfinder.RenderSubmit(collector); m_LongPathfinder.HierarchicalRenderSubmit(collector); } @@ -671,7 +674,96 @@ ////////////////////////////////////////////////////////// -// Async path requests: +// Async pathfinder workers + +CCmpPathfinder::PathfinderWorker::PathfinderWorker(const CCmpPathfinder& pathfinder, size_t index) + : m_Pathfinder(pathfinder), m_VertexPathfinder(m_Pathfinder.m_MapSize, m_Pathfinder.m_TerrainOnlyGrid) +{ + m_Computing = false; + m_Kill = false; + if (m_Pathfinder.m_UseThreading) + m_Thread = std::unique_ptr(new std::thread(&CCmpPathfinder::PathfinderWorker::InitThread, this, index)); +} + +CCmpPathfinder::PathfinderWorker::~PathfinderWorker() +{ + if (m_Thread) + { + if (m_Thread->joinable()) + m_Thread->join(); + } +} + +void CCmpPathfinder::PathfinderWorker::InitThread(size_t index) +{ + g_Profiler2.RegisterCurrentThread("Pathfinder thread " + std::to_string(index)); + WaitForWork(); +} + +template +void CCmpPathfinder::PathfinderWorker::PushRequests(std::vector&, ssize_t) +{ + static_assert(sizeof(T) == 0, "Only specializations can be used"); +} + +template<> void CCmpPathfinder::PathfinderWorker::PushRequests(std::vector& from, ssize_t amount) +{ + m_LongRequests.insert(m_LongRequests.end(), std::make_move_iterator(from.end() - amount), std::make_move_iterator(from.end())); +} + +template<> void CCmpPathfinder::PathfinderWorker::PushRequests(std::vector& from, ssize_t amount) +{ + m_ShortRequests.insert(m_ShortRequests.end(), std::make_move_iterator(from.end() - amount), std::make_move_iterator(from.end())); +} + +void CCmpPathfinder::PathfinderWorker::PrepareToKill() +{ + m_Kill = true; +} + +void CCmpPathfinder::PathfinderWorker::WaitForWork() +{ + while (true) + { + { + std::unique_lock lock(m_Pathfinder.m_PathfinderMutex); + m_Pathfinder.m_PathfinderConditionVariable.wait(lock, [this] { return m_Computing || m_Kill; }); + } + + if (m_Kill) + return; + + Work(); + + // We must be the ones setting our m_Computing to false. + ENSURE(m_Computing); + m_Computing = false; + + m_Pathfinder.m_PathfinderFrontier.GoThrough(); + } +} + +void CCmpPathfinder::PathfinderWorker::Work() +{ + while (!m_LongRequests.empty()) + { + const LongPathRequest& req = m_LongRequests.back(); + WaypointPath path; + ComputePath(req.x0, req.z0, req.goal, req.passClass, path); + m_Results.push_back({req.ticket, req.notify, path}); + + m_LongRequests.pop_back(); + } + + while (!m_ShortRequests.empty()) + { + const ShortPathRequest& req = m_ShortRequests.back(); + WaypointPath path = ComputeShortPath(req); + m_Results.push_back({req.ticket, req.notify, path}); + + m_ShortRequests.pop_back(); + } +} u32 CCmpPathfinder::ComputePathAsync(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, entity_id_t notify) { @@ -680,111 +772,123 @@ return req.ticket; } -u32 CCmpPathfinder::ComputeShortPathAsync(entity_pos_t x0, entity_pos_t z0, entity_pos_t clearance, entity_pos_t range, const PathGoal& goal, pass_class_t passClass, bool avoidMovingUnits, entity_id_t group, entity_id_t notify) +u32 CCmpPathfinder::ComputeShortPathAsync(entity_pos_t x0, entity_pos_t z0, entity_pos_t clearance, entity_pos_t range, + const PathGoal& goal, pass_class_t passClass, bool avoidMovingUnits, + entity_id_t group, entity_id_t notify) { ShortPathRequest req = { m_NextAsyncTicket++, x0, z0, clearance, range, goal, passClass, avoidMovingUnits, group, notify }; m_ShortPathRequests.push_back(req); return req.ticket; } -void CCmpPathfinder::FinishAsyncRequests() +void CCmpPathfinder::ComputePathImmediate(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, WaypointPath& ret) const { - PROFILE2("Finish Async Requests"); - // Save the request queue in case it gets modified while iterating - std::vector longRequests; - m_LongPathRequests.swap(longRequests); + m_LongPathfinder.ComputePath(x0, z0, goal, passClass, ret); +} - std::vector shortRequests; - m_ShortPathRequests.swap(shortRequests); +WaypointPath CCmpPathfinder::ComputeShortPathImmediate(const ShortPathRequest& request) const +{ + VertexPathfinder pathfinder(m_MapSize, m_TerrainOnlyGrid); + return pathfinder.ComputeShortPath(request, CmpPtr(GetSystemEntity())); +} - // TODO: we should only compute one path per entity per turn +void CCmpPathfinder::FetchAsyncResultsAndSendMessages() +{ + PROFILE2("FetchAsyncResults"); - // TODO: this computation should be done incrementally, spread - // across multiple frames (or even multiple turns) + // TODO maybe: a possible improvement here would be to push results from workers whenever they are done, and not when all are done. - ProcessLongRequests(longRequests); - ProcessShortRequests(shortRequests); -} + // Wait until all threads have finished computing. + m_PathfinderFrontier.Watch(); -void CCmpPathfinder::ProcessLongRequests(const std::vector& longRequests) -{ - PROFILE2("Process Long Requests"); - for (size_t i = 0; i < longRequests.size(); ++i) + std::vector results; + for (std::unique_ptr& worker : m_Workers) { - const LongPathRequest& req = longRequests[i]; - WaypointPath path; - ComputePath(req.x0, req.z0, req.goal, req.passClass, path); - CMessagePathResult msg(req.ticket, path); - GetSimContext().GetComponentManager().PostMessage(req.notify, msg); + results.insert(results.end(), std::make_move_iterator(worker->m_Results.begin()), std::make_move_iterator(worker->m_Results.end())); + worker->m_Results.clear(); } -} -void CCmpPathfinder::ProcessShortRequests(const std::vector& shortRequests) -{ - PROFILE2("Process Short Requests"); - for (size_t i = 0; i < shortRequests.size(); ++i) { - const ShortPathRequest& req = shortRequests[i]; - WaypointPath path = m_VertexPathfinder->ComputeShortPath(req, CmpPtr(GetSystemEntity())); - CMessagePathResult msg(req.ticket, path); - GetSimContext().GetComponentManager().PostMessage(req.notify, msg); + PROFILE2("PostMessages"); + for (PathResult& path : results) + { + CMessagePathResult msg(path.ticket, path.path); + GetSimContext().GetComponentManager().PostMessage(path.notify, msg); + } } } -void CCmpPathfinder::ProcessSameTurnMoves() +void CCmpPathfinder::StartProcessingMoves(bool useMax) { - if (!m_LongPathRequests.empty()) - { - // Figure out how many moves we can do this time - i32 moveCount = m_MaxSameTurnMoves - m_SameTurnMovesCount; + // We will send new path requests to worker threads, + // trying to balance the workload somewhat + // and then notify them they can start working. + // To avoid data races, we can only push jobs when workers are not computing them, + // So FetchAsyncResultsAndSendMessages must have been called first. + + std::vector longRequests = PopMovesToProcess(m_LongPathRequests, useMax, m_MaxSameTurnMoves); + std::vector shortRequests = PopMovesToProcess(m_ShortPathRequests, useMax, m_MaxSameTurnMoves - longRequests.size()); - if (moveCount <= 0) - return; + PushRequestsToWorkers(longRequests); + PushRequestsToWorkers(shortRequests); - // Copy the long request elements we are going to process into a new array - std::vector longRequests; - if ((i32)m_LongPathRequests.size() <= moveCount) + m_PathfinderFrontier.Reset(); + + if (m_UseThreading) + { + for (std::unique_ptr& worker : m_Workers) { - m_LongPathRequests.swap(longRequests); - moveCount = (i32)longRequests.size(); + // Mark as computing to unblock. + ENSURE(!worker->m_Computing); + worker->m_Computing = true; } - else + m_PathfinderConditionVariable.notify_all(); + } + else + m_Workers.back()->Work(); +} + +template +std::vector CCmpPathfinder::PopMovesToProcess(std::vector& requests, bool useMax, size_t maxMoves) +{ + std::vector poppedRequests; + if (useMax) + { + size_t amount = std::min(poppedRequests.size(), maxMoves); + if (amount > 0) { - longRequests.resize(moveCount); - copy(m_LongPathRequests.begin(), m_LongPathRequests.begin() + moveCount, longRequests.begin()); - m_LongPathRequests.erase(m_LongPathRequests.begin(), m_LongPathRequests.begin() + moveCount); + poppedRequests.insert(poppedRequests.begin(), std::make_move_iterator(requests.end() - amount), std::make_move_iterator(requests.end())); + requests.erase(requests.end() - amount, requests.end()); } - - ProcessLongRequests(longRequests); - - m_SameTurnMovesCount = (u16)(m_SameTurnMovesCount + moveCount); } - - if (!m_ShortPathRequests.empty()) + else { - // Figure out how many moves we can do now - i32 moveCount = m_MaxSameTurnMoves - m_SameTurnMovesCount; + poppedRequests.swap(requests); + requests.clear(); + } - if (moveCount <= 0) - return; + return poppedRequests; +} - // Copy the short request elements we are going to process into a new array - std::vector shortRequests; - if ((i32)m_ShortPathRequests.size() <= moveCount) - { - m_ShortPathRequests.swap(shortRequests); - moveCount = (i32)shortRequests.size(); - } - else - { - shortRequests.resize(moveCount); - copy(m_ShortPathRequests.begin(), m_ShortPathRequests.begin() + moveCount, shortRequests.begin()); - m_ShortPathRequests.erase(m_ShortPathRequests.begin(), m_ShortPathRequests.begin() + moveCount); - } +template +void CCmpPathfinder::PushRequestsToWorkers(std::vector& from) +{ + if (from.empty()) + return; - ProcessShortRequests(shortRequests); + // Trivial load-balancing, / rounds towards zero so add 1 to ensure we do push all requests. + size_t amount = from.size() / m_Workers.size() + 1; - m_SameTurnMovesCount = (u16)(m_SameTurnMovesCount + moveCount); + for (std::unique_ptr& worker : m_Workers) + { + // Prevent pushing requests when the worker is computing. + // Call FetchAsyncResultsAndSendMessages() before pushing new requests. + ENSURE(!worker->m_Computing); + + // Since we are rounding up before, ensure we aren't pushing beyond the end. + amount = std::min(amount, from.size()); + worker->PushRequests(from, amount); + from.erase(from.end() - amount, from.end()); } } Index: source/simulation2/components/CCmpPathfinder_Common.h =================================================================== --- source/simulation2/components/CCmpPathfinder_Common.h +++ source/simulation2/components/CCmpPathfinder_Common.h @@ -39,6 +39,10 @@ #include "simulation2/helpers/LongPathfinder.h" #include "simulation2/helpers/VertexPathfinder.h" +#include +#include +#include + class SceneCollector; class AtlasOverlay; @@ -53,6 +57,73 @@ */ class CCmpPathfinder : public ICmpPathfinder { +protected: + + class PathfinderWorker + { + friend CCmpPathfinder; + public: + PathfinderWorker(const CCmpPathfinder& pathfinder, size_t index); + + ~PathfinderWorker(); + + void PrepareToKill(); + + // Will loop until a conditional_variable notifies us, and call Work(). + void WaitForWork(); + + // Process path requests, checking if we should stop before each new one. + // Should be callable both synchronously and asynchronously. + void Work(); + + void ComputePath(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, WaypointPath& ret) const + { + m_Pathfinder.m_LongPathfinder.ComputePath(x0, z0, goal, passClass, ret); + } + + WaypointPath ComputeShortPath(const ShortPathRequest& request) const + { + return m_VertexPathfinder.ComputeShortPath(request, CmpPtr(m_Pathfinder.GetSystemEntity())); + } + + + private: + // Takes care of what needs to be called to initialise the thread before calling WaitForWork(). + void InitThread(size_t index); + + // Insert requests in m_[Long/Short]Requests depending on from. + // This could be removed when we may use if-constexpr in CCmpPathfinder::PushRequestsToWorkers + template + void PushRequests(std::vector& from, ssize_t amount); + + /* + * General state + */ + + // Read-only pathfinder, for getting required information (grids,…). + const CCmpPathfinder& m_Pathfinder; + + // Stores our results, the main thread will fetch this. + std::vector m_Results; + + std::unique_ptr m_Thread; + + /* + * Thread synchronisation + */ + + std::atomic m_Kill; + std::atomic m_Computing; + + std::vector m_LongRequests; + std::vector m_ShortRequests; + + VertexPathfinder m_VertexPathfinder; + }; + + // Allow the workers to access our private variables + friend class PathfinderWorker; + public: static void ClassInit(CComponentManager& componentManager) { @@ -76,7 +147,7 @@ std::vector m_LongPathRequests; std::vector m_ShortPathRequests; u32 m_NextAsyncTicket; // unique IDs for asynchronous path requests - u16 m_SameTurnMovesCount; // current number of same turn moves we have processed this turn + u16 m_MaxSameTurnMoves; // How many moves to immediately compute. // Lazily-constructed dynamic state (not serialized): @@ -91,13 +162,15 @@ GridUpdateInformation m_AIPathfinderDirtinessInformation; bool m_TerrainDirty; - std::unique_ptr m_VertexPathfinder; // Interface to the long-range pathfinder. - LongPathfinder m_LongPathfinder; + mutable LongPathfinder m_LongPathfinder; - // For responsiveness we will process some moves in the same turn they were generated in - - u16 m_MaxSameTurnMoves; // max number of moves that can be created and processed in the same turn + // Worker process pathing requests. + std::vector> m_Workers; + bool m_UseThreading = false; + mutable std::mutex m_PathfinderMutex; + mutable std::condition_variable m_PathfinderConditionVariable; + mutable Frontier m_PathfinderFrontier; AtlasOverlay* m_AtlasOverlay; @@ -162,19 +235,15 @@ virtual Grid ComputeShoreGrid(bool expandOnWater = false); - virtual void ComputePath(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, WaypointPath& ret) - { - m_LongPathfinder.ComputePath(x0, z0, goal, passClass, ret); - } + virtual void ComputePathImmediate(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, WaypointPath& ret) const; virtual u32 ComputePathAsync(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, entity_id_t notify); - virtual WaypointPath ComputeShortPath(const ShortPathRequest& request) - { - return m_VertexPathfinder->ComputeShortPath(request, CmpPtr(GetSystemEntity())); - } + virtual WaypointPath ComputeShortPathImmediate(const ShortPathRequest& request) const; - virtual u32 ComputeShortPathAsync(entity_pos_t x0, entity_pos_t z0, entity_pos_t clearance, entity_pos_t range, const PathGoal& goal, pass_class_t passClass, bool avoidMovingUnits, entity_id_t controller, entity_id_t notify); + virtual u32 ComputeShortPathAsync(entity_pos_t x0, entity_pos_t z0, entity_pos_t clearance, entity_pos_t range, + const PathGoal& goal, pass_class_t passClass, bool avoidMovingUnits, + entity_id_t controller, entity_id_t notify); virtual void SetDebugPath(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass) { @@ -183,7 +252,8 @@ virtual void SetDebugOverlay(bool enabled) { - m_VertexPathfinder->SetDebugOverlay(enabled); + for (std::unique_ptr& worker : m_Workers) + worker->m_VertexPathfinder.SetDebugOverlay(enabled); m_LongPathfinder.SetDebugOverlay(enabled); } @@ -207,13 +277,15 @@ virtual ICmpObstruction::EFoundationCheck CheckBuildingPlacement(const IObstructionTestFilter& filter, entity_pos_t x, entity_pos_t z, entity_pos_t a, entity_pos_t w, entity_pos_t h, entity_id_t id, pass_class_t passClass, bool onlyCenterPoint) const; - virtual void FinishAsyncRequests(); + virtual void FetchAsyncResultsAndSendMessages(); - void ProcessLongRequests(const std::vector& longRequests); + virtual void StartProcessingMoves(bool useMax); - void ProcessShortRequests(const std::vector& shortRequests); + template + std::vector PopMovesToProcess(std::vector& requests, bool useMax = false, size_t maxMoves = 0); - virtual void ProcessSameTurnMoves(); + template + void PushRequestsToWorkers(std::vector& from); /** * Regenerates the grid based on the current obstruction list, if necessary Index: source/simulation2/components/CCmpRallyPointRenderer.cpp =================================================================== --- source/simulation2/components/CCmpRallyPointRenderer.cpp +++ source/simulation2/components/CCmpRallyPointRenderer.cpp @@ -686,7 +686,7 @@ start.X = m_RallyPoints[index-1].X; start.Y = m_RallyPoints[index-1].Y; } - cmpPathfinder->ComputePath(start.X, start.Y, goal, cmpPathfinder->GetPassabilityClass(m_LinePassabilityClass), path); + cmpPathfinder->ComputePathImmediate(start.X, start.Y, goal, cmpPathfinder->GetPassabilityClass(m_LinePassabilityClass), path); // Check if we got a path back; if not we probably have two markers less than one tile apart. if (path.m_Waypoints.size() < 2) Index: source/simulation2/components/ICmpPathfinder.h =================================================================== --- source/simulation2/components/ICmpPathfinder.h +++ source/simulation2/components/ICmpPathfinder.h @@ -33,6 +33,16 @@ template class Grid; +// returned by asynchronous workers, used to send messages in the main thread. +struct WaypointPath; + +struct PathResult +{ + u32 ticket; + entity_id_t notify; + WaypointPath path; +}; + /** * Pathfinder algorithms. * @@ -89,41 +99,36 @@ virtual Grid ComputeShoreGrid(bool expandOnWater = false) = 0; /** - * Compute a tile-based path from the given point to the goal, and return the set of waypoints. - * The waypoints correspond to the centers of horizontally/vertically adjacent tiles - * along the path. - */ - virtual void ComputePath(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, WaypointPath& ret) = 0; - - /** - * Asynchronous version of ComputePath. + * Request a long path computation, asynchronously. * The result will be sent as CMessagePathResult to 'notify'. * Returns a unique non-zero number, which will match the 'ticket' in the result, * so callers can recognise each individual request they make. */ virtual u32 ComputePathAsync(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, entity_id_t notify) = 0; + /* + * Request a long-path computation immediately + */ + virtual void ComputePathImmediate(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass, WaypointPath& ret) const = 0; + + /** + * Request a short path computation, asynchronously. + * The result will be sent as CMessagePathResult to 'notify'. + * Returns a unique non-zero number, which will match the 'ticket' in the result, + * so callers can recognise each individual request they make. + */ + virtual u32 ComputeShortPathAsync(entity_pos_t x0, entity_pos_t z0, entity_pos_t clearance, entity_pos_t range, const PathGoal& goal, pass_class_t passClass, bool avoidMovingUnits, entity_id_t group, entity_id_t notify) = 0; + + /* + * Request a short-path computation immediately. + */ + virtual WaypointPath ComputeShortPathImmediate(const ShortPathRequest& request) const = 0; + /** * If the debug overlay is enabled, render the path that will computed by ComputePath. */ virtual void SetDebugPath(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass) = 0; - /** - * Compute a precise path from the given point to the goal, and return the set of waypoints. - * The path is based on the full set of obstructions that pass the filter, such that - * a unit of clearance 'clearance' will be able to follow the path with no collisions. - * The path is restricted to a box of radius 'range' from the starting point. - */ - virtual WaypointPath ComputeShortPath(const ShortPathRequest& request) = 0; - - /** - * Asynchronous version of ComputeShortPath (using ControlGroupObstructionFilter). - * The result will be sent as CMessagePathResult to 'notify'. - * Returns a unique non-zero number, which will match the 'ticket' in the result, - * so callers can recognise each individual request they make. - */ - virtual u32 ComputeShortPathAsync(entity_pos_t x0, entity_pos_t z0, entity_pos_t clearance, entity_pos_t range, const PathGoal& goal, pass_class_t passClass, bool avoidMovingUnits, entity_id_t group, entity_id_t notify) = 0; - /** * Check whether the given movement line is valid and doesn't hit any obstructions * or impassable terrain. @@ -171,12 +176,12 @@ /** * Finish computing asynchronous path requests and send the CMessagePathResult messages. */ - virtual void FinishAsyncRequests() = 0; + virtual void FetchAsyncResultsAndSendMessages() = 0; /** - * Process moves during the same turn they were created in to improve responsiveness. + * Tell asynchronous pathfinder threads that they can begin computing paths. */ - virtual void ProcessSameTurnMoves() = 0; + virtual void StartProcessingMoves(bool useMax = false) = 0; /** * Regenerates the grid based on the current obstruction list, if necessary Index: source/simulation2/components/tests/test_Pathfinder.h =================================================================== --- source/simulation2/components/tests/test_Pathfinder.h +++ source/simulation2/components/tests/test_Pathfinder.h @@ -184,7 +184,7 @@ PathGoal goal = { PathGoal::POINT, x1, z1 }; WaypointPath path; - cmp->ComputePath(x0, z0, goal, cmp->GetPassabilityClass("default"), path); + cmp->ComputePathImmediate(x0, z0, goal, cmp->GetPassabilityClass("default"), path); } t = timer_Time() - t; @@ -370,7 +370,7 @@ PathGoal goal = { PathGoal::POINT, x1, z1 }; WaypointPath path; - cmpPathfinder->ComputePath(x0, z0, goal, cmpPathfinder->GetPassabilityClass("default"), path); + cmpPathfinder->ComputePathImmediate(x0, z0, goal, cmpPathfinder->GetPassabilityClass("default"), path); u32 debugSteps; double debugTime; @@ -419,7 +419,7 @@ for (int i = 0; i < n; ++i) { WaypointPath path; - cmpPathfinder->ComputePath(x0, z0, goal, cmpPathfinder->GetPassabilityClass("default"), path); + cmpPathfinder->ComputePathImmediate(x0, z0, goal, cmpPathfinder->GetPassabilityClass("default"), path); } t = timer_Time() - t; debug_printf("### RepeatPath %fms each (%fs total)\n", 1000*t / n, t); Index: source/simulation2/helpers/LongPathfinder.h =================================================================== --- source/simulation2/helpers/LongPathfinder.h +++ source/simulation2/helpers/LongPathfinder.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2017 Wildfire Games. +/* Copyright (C) 2019 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -18,6 +18,7 @@ #ifndef INCLUDED_LONGPATHFINDER #define INCLUDED_LONGPATHFINDER +#include #include "Pathfinding.h" #include "HierarchicalPathfinder.h" @@ -261,6 +262,8 @@ mutable WaypointPath* m_DebugPath; mutable pass_class_t m_DebugPassClass; + mutable std::mutex m_DebugMutex; + private: PathCost CalculateHeuristic(int i, int j, int iGoal, int jGoal) const; void ProcessNeighbour(int pi, int pj, int i, int j, PathCost pg, PathfinderState& state) const; Index: source/simulation2/helpers/LongPathfinder.cpp =================================================================== --- source/simulation2/helpers/LongPathfinder.cpp +++ source/simulation2/helpers/LongPathfinder.cpp @@ -718,6 +718,8 @@ PROFILE2_IFSPIKE("ComputePathJPS", 0.0002); PathfinderState state = { 0 }; + ENSURE(!m_UseJPSCache && "Jump Point Cache currently not supported as it must handle threading properly."); + /* if (m_UseJPSCache && m_JumpPointCache.find(passClass) == m_JumpPointCache.end()) { state.jpc = new JumpPointCache; @@ -725,7 +727,7 @@ debug_printf("PATHFINDER: JPC memory: %d kB\n", (int)state.jpc->GetMemoryUsage() / 1024); m_JumpPointCache[passClass] = shared_ptr(state.jpc); } - + */ // Convert the start coordinates to tile indexes u16 i0, j0; Pathfinding::NearestNavcell(x0, z0, i0, j0, m_GridSize, m_GridSize); @@ -896,6 +898,7 @@ ImprovePathWaypoints(path, passClass, origGoal.maxdist, x0, z0); // Save this grid for debug display + std::lock_guard lock(m_DebugMutex); delete m_DebugGrid; m_DebugGrid = state.tiles; m_DebugSteps = state.steps; @@ -958,6 +961,7 @@ void LongPathfinder::GetDebugDataJPS(u32& steps, double& time, Grid& grid) const { + std::lock_guard lock(m_DebugMutex); steps = m_DebugSteps; time = m_DebugTime; @@ -982,6 +986,7 @@ void LongPathfinder::SetDebugOverlay(bool enabled) { + std::lock_guard lock(m_DebugMutex); if (enabled && !m_DebugOverlay) m_DebugOverlay = new LongOverlay(*this); else if (!enabled && m_DebugOverlay) Index: source/simulation2/helpers/VertexPathfinder.h =================================================================== --- source/simulation2/helpers/VertexPathfinder.h +++ source/simulation2/helpers/VertexPathfinder.h @@ -98,6 +98,7 @@ std::atomic m_DebugOverlay; mutable std::vector m_DebugOverlayShortPathLines; + mutable std::mutex m_DebugMutex; // These vectors are expensive to recreate on every call, so we cache them here. // They are made mutable to allow using them in the otherwise const ComputeShortPath. Index: source/simulation2/helpers/VertexPathfinder.cpp =================================================================== --- source/simulation2/helpers/VertexPathfinder.cpp +++ source/simulation2/helpers/VertexPathfinder.cpp @@ -828,6 +828,7 @@ { if (!m_DebugOverlay) return; + std::lock_guard lock(m_DebugMutex); m_DebugOverlayShortPathLines.clear(); @@ -861,6 +862,7 @@ { if (!m_DebugOverlay) return; + std::lock_guard lock(m_DebugMutex); #define PUSH_POINT(p) STMT(xz.push_back(p.X.ToFloat()); xz.push_back(p.Y.ToFloat())) // Render the vertexes as little Pac-Man shapes to indicate quadrant direction @@ -938,6 +940,7 @@ return; return; // Disabled by default. + std::lock_guard lock(m_DebugMutex); m_DebugOverlayShortPathLines.push_back(SOverlayLine()); m_DebugOverlayShortPathLines.back().m_Color = visible ? CColor(0, 1, 0, 0.5) : CColor(1, 0, 0, 0.5); @@ -957,6 +960,7 @@ if (!m_DebugOverlay) return; + std::lock_guard lock(m_DebugMutex); for (size_t i = 0; i < m_DebugOverlayShortPathLines.size(); ++i) collector.Submit(&m_DebugOverlayShortPathLines[i]); }