Index: binaries/data/config/default.cfg
===================================================================
--- binaries/data/config/default.cfg
+++ binaries/data/config/default.cfg
@@ -404,6 +404,9 @@
zoom.in = 5
zoom.out = 4
+[multithreading]
+pathfinder = 0 ; How many threads to use for pathfinding. Special values: 0 chooses automatically, 1 de-activates threading entirely.
+
[chat]
timestamp = true ; Show at which time chat messages have been sent
Index: binaries/data/mods/public/gui/credits/texts/programming.json
===================================================================
--- binaries/data/mods/public/gui/credits/texts/programming.json
+++ binaries/data/mods/public/gui/credits/texts/programming.json
@@ -125,6 +125,7 @@
{"nick": "kabzerek", "name": "Grzegorz Kabza"},
{"nick": "Kai", "name": "Kai Chen"},
{"name": "Kareem Ergawy"},
+ {"nick": "Kuba386", "name":"Jakub Kośmicki"},
{"nick": "kevmo", "name": "Kevin Caffrey"},
{"nick": "kezz", "name": "Graeme Kerry"},
{"nick": "kingadami", "name": "Adam Winsor"},
Index: binaries/data/mods/public/gui/options/options.json
===================================================================
--- binaries/data/mods/public/gui/options/options.json
+++ binaries/data/mods/public/gui/options/options.json
@@ -81,7 +81,16 @@
"label": "Chat Timestamp",
"tooltip": "Show time that messages are posted in the lobby, gamesetup and ingame chat.",
"config": "chat.timestamp"
+ },
+ {
+ "type": "number",
+ "label": "Number of pathfinder threads",
+ "tooltip": "Number of pathfinder worker threads. Use 0 to choose automatically and 1 to disable threading altogether.",
+ "config": "pathfinder.threads",
+ "min": 0,
+ "max": 64
}
+
]
},
{
Index: source/ps/ThreadFrontier.h
===================================================================
--- /dev/null
+++ source/ps/ThreadFrontier.h
@@ -0,0 +1,73 @@
+/* Copyright (C) 2019 Wildfire Games.
+ * This file is part of 0 A.D.
+ *
+ * 0 A.D. is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * 0 A.D. is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with 0 A.D. If not, see .
+ */
+
+#ifndef INCLUDED_THREADFRONTIER
+#define INCLUDED_THREADFRONTIER
+
+#include
+#include
+
+/*
+ * A ThreadFrontier is similar to a Barrier in that it synchronizes n threads.
+ * A frontier has one thread waiting for n other threads to go through the Frontier.
+ */
+class ThreadFrontier
+{
+private:
+ std::mutex m_Mutex;
+ std::condition_variable m_ConditionVariable;
+ int m_Expecting;
+ int m_Count;
+public:
+ ThreadFrontier() : m_Expecting(0), m_Count(0) {};
+
+ void Setup(int expect)
+ {
+ ENSURE(m_Expecting == 0 && m_Count == 0);
+ std::lock_guard lock(m_Mutex);
+ m_Expecting = expect;
+ // The frontier is open, call Reset() to close it.
+ m_Count = m_Expecting;
+ }
+
+ void Reset()
+ {
+ m_Count = 0;
+ }
+
+ void Watch()
+ {
+ std::unique_lock lock(m_Mutex);
+ // If all threads have already gone through the frontier, we can stop watching right away.
+ if (m_Count == m_Expecting)
+ return;
+ m_ConditionVariable.wait(lock, [this] { return m_Count == m_Expecting; });
+ }
+
+ void GoThrough()
+ {
+ // Acquire the lock: we must be sure that the watching thread is either not yet in Watch()
+ // or is fully in the waiting state. Without this mutex lock, we could notify when the watching thread
+ // is in wait() but not yet in the waiting state, thus deadlocking.
+ std::lock_guard lock(m_Mutex);
+ // Notify the watching thread if we are the last to go through.
+ if (++m_Count == m_Expecting)
+ m_ConditionVariable.notify_one();
+ }
+};
+
+#endif // INCLUDED_THREADFRONTIER
Index: source/ps/ThreadUtil.h
===================================================================
--- source/ps/ThreadUtil.h
+++ source/ps/ThreadUtil.h
@@ -33,6 +33,11 @@
*/
void SetMainThread();
+/**
+ * Returns the number of threads we want for the pathfinder.
+ */
+u32 GetNumberOfPathfindingThreads();
+
}
#endif // INCLUDED_THREADUTIL
Index: source/ps/ThreadUtil.cpp
===================================================================
--- source/ps/ThreadUtil.cpp
+++ source/ps/ThreadUtil.cpp
@@ -20,6 +20,8 @@
#include
#include "ThreadUtil.h"
+#include "ConfigDB.h"
+#include "tools/atlas/GameInterface/GameLoop.h"
static bool g_MainThreadSet;
static std::thread::id g_MainThread;
@@ -39,3 +41,22 @@
g_MainThread = std::this_thread::get_id();
g_MainThreadSet = true;
}
+
+u32 ThreadUtil::GetNumberOfPathfindingThreads()
+{
+ // TODO BEFORE COMMIT ID SAY: atlas threading is de-activated because the vertex pathfinder uses the obstruction manager's obstructions.
+ // this can be changed in-betweenturns in Atlas. We should probably mutex it in the pathfinder to make sure it's not changed.
+ if ((g_AtlasGameLoop && g_AtlasGameLoop->running))
+ return 1;
+
+ u32 wantedThreads = 0;
+
+ if (CConfigDB::IsInitialised())
+ CFG_GET_VAL("multithreading.pathfinder", wantedThreads);
+
+ // By default use 2 * (# of cores - 1) cores to benefit from hardware load-balancing as ours is very simple.
+ if (wantedThreads == 0)
+ return (std::thread::hardware_concurrency() - 1) * 2;
+
+ return wantedThreads;
+}
Index: source/simulation2/components/CCmpPathfinder.cpp
===================================================================
--- source/simulation2/components/CCmpPathfinder.cpp
+++ source/simulation2/components/CCmpPathfinder.cpp
@@ -27,6 +27,7 @@
#include "ps/CLogger.h"
#include "ps/CStr.h"
#include "ps/Profile.h"
+#include "ps/ThreadUtil.h"
#include "ps/XML/Xeromyces.h"
#include "renderer/Scene.h"
#include "simulation2/MessageTypes.h"
@@ -68,21 +69,6 @@
CParamNode externalParamNode;
CParamNode::LoadXML(externalParamNode, L"simulation/data/pathfinder.xml", "pathfinder");
- // Previously all move commands during a turn were
- // queued up and processed asynchronously at the start
- // of the next turn. Now we are processing queued up
- // events several times duing the turn. This improves
- // responsiveness and units move more smoothly especially.
- // when in formation. There is still a call at the
- // beginning of a turn to process all outstanding moves -
- // this will handle any moves above the MaxSameTurnMoves
- // threshold.
- //
- // TODO - The moves processed at the beginning of the
- // turn do not count against the maximum moves per turn
- // currently. The thinking is that this will eventually
- // happen in another thread. Either way this probably
- // will require some adjustment and rethinking.
const CParamNode pathingSettings = externalParamNode.GetChild("Pathfinder");
m_MaxSameTurnMoves = (u16)pathingSettings.GetChild("MaxSameTurnMoves").ToInt();
@@ -97,13 +83,35 @@
m_PassClassMasks[name] = mask;
}
- m_Workers.emplace_back(PathfinderWorker{});
-}
+ u32 wantedThreads = ThreadUtil::GetNumberOfPathfindingThreads();
+
+ LOGMESSAGE("Initialising %i threads for pathfinding.", wantedThreads);
+
+ // The worker thread will only call std::thread if we actually have > 1 threads, otherwise we're running in the main thread.
+ if (wantedThreads <= 1) // <= 1 as the above computations returns 0 for one core.
+ {
+ m_UseThreading = false;
+ m_Workers.emplace_back();
+ }
+ else
+ {
+ m_PathfinderFrontier.Setup(wantedThreads);
+ m_UseThreading = true;
+ // We cannot move workers or threads will run on deleted instances.
+ m_Workers.resize(wantedThreads);
+ for (size_t i = 0; i < wantedThreads; ++i)
+ m_Workers[i].Start(*this, i);
+ }
+};
CCmpPathfinder::~CCmpPathfinder() {};
void CCmpPathfinder::Deinit()
{
+ for (PathfinderWorker& worker : m_Workers)
+ worker.PrepareToKill();
+
+ m_PathfinderConditionVariable.notify_all();
m_Workers.clear();
SetDebugOverlay(false); // cleans up memory
@@ -703,7 +711,27 @@
// Async pathfinder workers
-CCmpPathfinder::PathfinderWorker::PathfinderWorker() {}
+CCmpPathfinder::PathfinderWorker::PathfinderWorker() : m_Computing(false), m_Kill(false)
+{
+}
+
+CCmpPathfinder::PathfinderWorker::~PathfinderWorker()
+{
+ if (m_Thread.joinable())
+ m_Thread.join();
+}
+
+void CCmpPathfinder::PathfinderWorker::Start(const CCmpPathfinder& pathfinder, size_t index)
+{
+ if (pathfinder.m_UseThreading)
+ m_Thread = std::thread(&CCmpPathfinder::PathfinderWorker::InitThread, this, std::ref(pathfinder), index);
+}
+
+void CCmpPathfinder::PathfinderWorker::InitThread(const CCmpPathfinder& pathfinder, size_t index)
+{
+ g_Profiler2.RegisterCurrentThread("Pathfinder thread " + std::to_string(index));
+ WaitForWork(pathfinder);
+}
template
void CCmpPathfinder::PathfinderWorker::PushRequests(std::vector&, ssize_t)
@@ -721,6 +749,32 @@
m_ShortRequests.insert(m_ShortRequests.end(), std::make_move_iterator(from.end() - amount), std::make_move_iterator(from.end()));
}
+void CCmpPathfinder::PathfinderWorker::PrepareToKill()
+{
+ m_Kill = true;
+}
+
+void CCmpPathfinder::PathfinderWorker::WaitForWork(const CCmpPathfinder& pathfinder)
+{
+ while (true)
+ {
+ {
+ std::unique_lock lock(pathfinder.m_PathfinderMutex);
+ pathfinder.m_PathfinderConditionVariable.wait(lock, [this] { return m_Computing || m_Kill; });
+ }
+
+ if (m_Kill)
+ return;
+ Work(pathfinder);
+
+ // We must be the ones setting our m_Computing to false.
+ ENSURE(m_Computing);
+ m_Computing = false;
+
+ pathfinder.m_PathfinderFrontier.GoThrough();
+ }
+}
+
void CCmpPathfinder::PathfinderWorker::Work(const CCmpPathfinder& pathfinder)
{
while (!m_LongRequests.empty())
@@ -773,6 +827,11 @@
{
PROFILE2("FetchAsyncResults");
+ // TODO maybe: a possible improvement here would be to push results from workers whenever they are done, and not when all are done.
+
+ // Wait until all threads have finished computing.
+ m_PathfinderFrontier.Watch();
+
// WARNING: the order in which moves are pulled must be consistent when using 1 or n workers.
// We fetch in the same order we inserted in, but we push moves backwards, so this works.
std::vector results;
@@ -798,14 +857,32 @@
void CCmpPathfinder::StartProcessingMoves(bool useMax)
{
+ // We will send new path requests to worker threads,
+ // trying to balance the workload somewhat
+ // and then notify them they can start working.
+ // To avoid data races, we can only push jobs when workers are not computing them,
+ // So FetchAsyncResultsAndSendMessages must have been called first.
+
std::vector longRequests = PopMovesToProcess(m_LongPathRequests, useMax, m_MaxSameTurnMoves);
std::vector shortRequests = PopMovesToProcess(m_ShortPathRequests, useMax, m_MaxSameTurnMoves - longRequests.size());
PushRequestsToWorkers(longRequests);
PushRequestsToWorkers(shortRequests);
- for (PathfinderWorker& worker : m_Workers)
- worker.Work(*this);
+ m_PathfinderFrontier.Reset();
+
+ if (m_UseThreading)
+ {
+ for (PathfinderWorker& worker : m_Workers)
+ {
+ // Mark as computing to unblock.
+ ENSURE(!worker.m_Computing);
+ worker.m_Computing = true;
+ }
+ m_PathfinderConditionVariable.notify_all();
+ }
+ else
+ m_Workers.back().Work(*this);
}
template
@@ -843,6 +920,10 @@
// In this instance, work is distributed in a strict LIFO order, effectively reversing tickets.
for (PathfinderWorker& worker : m_Workers)
{
+ // Prevent pushing requests when the worker is computing.
+ // Call FetchAsyncResultsAndSendMessages() before pushing new requests.
+ ENSURE(!worker.m_Computing);
+
amount = std::min(amount, from.size()); // Since we are rounding up before, ensure we aren't pushing beyond the end.
worker.PushRequests(from, amount);
from.erase(from.end() - amount, from.end());
Index: source/simulation2/components/CCmpPathfinder_Common.h
===================================================================
--- source/simulation2/components/CCmpPathfinder_Common.h
+++ source/simulation2/components/CCmpPathfinder_Common.h
@@ -35,9 +35,11 @@
#include "graphics/Terrain.h"
#include "maths/MathUtil.h"
#include "ps/CLogger.h"
+#include "ps/ThreadFrontier.h"
#include "renderer/TerrainOverlay.h"
#include "simulation2/components/ICmpObstructionManager.h"
+#include
class HierarchicalPathfinder;
class LongPathfinder;
@@ -64,11 +66,30 @@
friend CCmpPathfinder;
public:
PathfinderWorker();
+ // Implement a noexcept move constructor for std::vector that actually does nothing.
+ PathfinderWorker(PathfinderWorker&&) noexcept
+ {
+ ENSURE(!m_Thread.joinable());
+ }
+
+ ~PathfinderWorker();
+
+ // Create the std::thread and call InitThread
+ void Start(const CCmpPathfinder& pathfinder, size_t index);
+
+ void PrepareToKill();
+
+ // Will loop until a conditional_variable notifies us, and call Work().
+ void WaitForWork(const CCmpPathfinder& pathfinder);
// Process path requests, checking if we should stop before each new one.
+ // Should be callable both synchronously and asynchronously.
void Work(const CCmpPathfinder& pathfinder);
private:
+ // Takes care of what needs to be called to initialise the thread before calling WaitForWork().
+ void InitThread(const CCmpPathfinder& pathfinder, size_t index);
+
// Insert requests in m_[Long/Short]Requests depending on from.
// This could be removed when we may use if-constexpr in CCmpPathfinder::PushRequestsToWorkers
template
@@ -77,6 +98,11 @@
// Stores our results, the main thread will fetch this.
std::vector m_Results;
+ std::thread m_Thread;
+
+ std::atomic m_Kill;
+ std::atomic m_Computing;
+
std::vector m_LongRequests;
std::vector m_ShortRequests;
};
@@ -128,8 +154,12 @@
std::unique_ptr m_PathfinderHier;
std::unique_ptr m_LongPathfinder;
- // Workers process pathing requests.
+ // Worker process pathing requests.
std::vector m_Workers;
+ bool m_UseThreading = false;
+ mutable std::mutex m_PathfinderMutex;
+ mutable std::condition_variable m_PathfinderConditionVariable;
+ mutable ThreadFrontier m_PathfinderFrontier;
AtlasOverlay* m_AtlasOverlay;
Index: source/simulation2/helpers/LongPathfinder.h
===================================================================
--- source/simulation2/helpers/LongPathfinder.h
+++ source/simulation2/helpers/LongPathfinder.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017 Wildfire Games.
+/* Copyright (C) 2019 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@@ -18,6 +18,7 @@
#ifndef INCLUDED_LONGPATHFINDER
#define INCLUDED_LONGPATHFINDER
+#include
#include "Pathfinding.h"
#include "graphics/Overlay.h"
@@ -222,15 +223,14 @@
u16 m_GridSize;
// Debugging - output from last pathfind operation.
- // mutable as making these const would require a lot of boilerplate code
- // and they do not change the behavioural const-ness of the pathfinder.
- mutable LongOverlay* m_DebugOverlay;
- mutable PathfindTileGrid* m_DebugGrid;
- mutable u32 m_DebugSteps;
- mutable double m_DebugTime;
- mutable PathGoal m_DebugGoal;
- mutable WaypointPath* m_DebugPath;
- mutable pass_class_t m_DebugPassClass;
+ // Static and thread-local - we don't support threading debug code.
+ static thread_local LongOverlay* m_DebugOverlay;
+ static thread_local PathfindTileGrid* m_DebugGrid;
+ static thread_local u32 m_DebugSteps;
+ static thread_local double m_DebugTime;
+ static thread_local PathGoal m_DebugGoal;
+ static thread_local WaypointPath* m_DebugPath;
+ static thread_local pass_class_t m_DebugPassClass;
private:
PathCost CalculateHeuristic(int i, int j, int iGoal, int jGoal) const;
@@ -272,11 +272,8 @@
void GenerateSpecialMap(pass_class_t passClass, std::vector excludedRegions);
bool m_UseJPSCache;
- // Mutable may be used here as caching does not change the external const-ness of the Long Range pathfinder.
- // This is thread-safe as it is order independent (no change in the output of the function for a given set of params).
- // Obviously, this means that the cache should actually be a cache and not return different results
- // from what would happen if things hadn't been cached.
- mutable std::map > m_JumpPointCache;
+
+ static thread_local std::map > m_JumpPointCache;
};
/**
Index: source/simulation2/helpers/LongPathfinder.cpp
===================================================================
--- source/simulation2/helpers/LongPathfinder.cpp
+++ source/simulation2/helpers/LongPathfinder.cpp
@@ -25,6 +25,15 @@
#include "Geometry.h"
#include "HierarchicalPathfinder.h"
+thread_local LongOverlay* LongPathfinder::m_DebugOverlay;
+thread_local PathfindTileGrid* LongPathfinder::m_DebugGrid;
+thread_local u32 LongPathfinder::m_DebugSteps;
+thread_local double LongPathfinder::m_DebugTime;
+thread_local PathGoal LongPathfinder::m_DebugGoal;
+thread_local WaypointPath* LongPathfinder::m_DebugPath;
+thread_local pass_class_t LongPathfinder::m_DebugPassClass;
+thread_local std::map > LongPathfinder::m_JumpPointCache;
+
/**
* Jump point cache.
*
@@ -373,9 +382,11 @@
LongPathfinder::LongPathfinder() :
m_UseJPSCache(false),
- m_Grid(NULL), m_GridSize(0),
- m_DebugOverlay(NULL), m_DebugGrid(NULL), m_DebugPath(NULL)
+ m_Grid(NULL), m_GridSize(0)
{
+ m_DebugOverlay = nullptr;
+ m_DebugGrid = nullptr;
+ m_DebugPath = nullptr;
}
LongPathfinder::~LongPathfinder()
Index: source/simulation2/helpers/VertexPathfinder.h
===================================================================
--- source/simulation2/helpers/VertexPathfinder.h
+++ source/simulation2/helpers/VertexPathfinder.h
@@ -96,25 +96,25 @@
const u16& m_MapSize;
Grid* const & m_TerrainOnlyGrid;
- std::atomic m_DebugOverlay;
- mutable std::vector m_DebugOverlayShortPathLines;
+ bool m_DebugOverlay;
+ static thread_local std::vector m_DebugOverlayShortPathLines;
+ static thread_local std::mutex m_DebugMutex;
// These vectors are expensive to recreate on every call, so we cache them here.
- // They are made mutable to allow using them in the otherwise const ComputeShortPath.
- mutable std::vector m_EdgesUnaligned;
- mutable std::vector m_EdgesLeft;
- mutable std::vector m_EdgesRight;
- mutable std::vector m_EdgesBottom;
- mutable std::vector m_EdgesTop;
+ static thread_local std::vector m_EdgesUnaligned;
+ static thread_local std::vector m_EdgesLeft;
+ static thread_local std::vector m_EdgesRight;
+ static thread_local std::vector m_EdgesBottom;
+ static thread_local std::vector m_EdgesTop;
// List of obstruction vertexes (plus start/end points); we'll try to find paths through
// the graph defined by these vertexes.
- mutable std::vector m_Vertexes;
+ static thread_local std::vector m_Vertexes;
// List of collision edges - paths must never cross these.
// (Edges are one-sided so intersections are fine in one direction, but not the other direction.)
- mutable std::vector m_Edges;
- mutable std::vector m_EdgeSquares; // Axis-aligned squares; equivalent to 4 edges.
+ static thread_local std::vector m_Edges;
+ static thread_local std::vector m_EdgeSquares; // Axis-aligned squares; equivalent to 4 edges.
};
#endif // INCLUDED_VERTEXPATHFINDER
Index: source/simulation2/helpers/VertexPathfinder.cpp
===================================================================
--- source/simulation2/helpers/VertexPathfinder.cpp
+++ source/simulation2/helpers/VertexPathfinder.cpp
@@ -42,6 +42,18 @@
#include "simulation2/helpers/Render.h"
#include "simulation2/system/SimContext.h"
+
+thread_local std::vector VertexPathfinder::m_DebugOverlayShortPathLines;
+thread_local std::mutex VertexPathfinder::m_DebugMutex;
+thread_local std::vector VertexPathfinder::m_EdgesUnaligned;
+thread_local std::vector VertexPathfinder::m_EdgesLeft;
+thread_local std::vector VertexPathfinder::m_EdgesRight;
+thread_local std::vector VertexPathfinder::m_EdgesBottom;
+thread_local std::vector VertexPathfinder::m_EdgesTop;
+thread_local std::vector VertexPathfinder::m_Vertexes;
+thread_local std::vector VertexPathfinder::m_Edges;
+thread_local std::vector VertexPathfinder::m_EdgeSquares;
+
/* Quadrant optimisation:
* (loosely based on GPG2 "Optimizing Points-of-Visibility Pathfinding")
*
@@ -838,6 +850,7 @@
{
if (!m_DebugOverlay)
return;
+ std::lock_guard lock(m_DebugMutex);
m_DebugOverlayShortPathLines.clear();
@@ -871,6 +884,7 @@
{
if (!m_DebugOverlay)
return;
+ std::lock_guard lock(m_DebugMutex);
#define PUSH_POINT(p) STMT(xz.push_back(p.X.ToFloat()); xz.push_back(p.Y.ToFloat()))
// Render the vertexes as little Pac-Man shapes to indicate quadrant direction
@@ -968,6 +982,7 @@
if (!m_DebugOverlay)
return;
+ std::lock_guard lock(m_DebugMutex);
for (size_t i = 0; i < m_DebugOverlayShortPathLines.size(); ++i)
collector.Submit(&m_DebugOverlayShortPathLines[i]);
}