Changeset View
Standalone View
source/simulation2/helpers/SparseFlatMap.h
- This file was added.
/* Copyright (C) 2019 Wildfire Games. | |||||
* This file is part of 0 A.D. | |||||
* | |||||
* 0 A.D. is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 2 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* 0 A.D. is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef INCLUDED_SparseFlatMap | |||||
#define INCLUDED_SparseFlatMap | |||||
#include "simulation2/serialization/ISerializer.h" | |||||
#include "simulation2/serialization/IDeserializer.h" | |||||
#include <vector> | |||||
#include <deque> | |||||
/** | |||||
* SparseFlatMap is a faster replacement for std::map and std::unordered_map. | |||||
* Requirements of T: T can be used as an array index. | |||||
* Requirements of V: movable | |||||
* Other requirements: | |||||
* - the iteration order must be deterministic to ensure network synchronicity and replays. | |||||
elexis: The goal for this class, not for 0 A.D.
| |||||
Done Inline Actionsfor this class in the context of 0 A.D. would be more accurate wraitii: for this class in the context of 0 A.D. would be more accurate | |||||
* | |||||
* The basic principle is to have a sparse vector of <pointer to data> | |||||
* and a dense "data" vector of <key, data>. | |||||
* | |||||
* Inserting is slightly slower than a vector because of the pointer book-keeping and the key storage. | |||||
* Erasing just sets the key to nullptr and swap-and-erases the data, to keep it dense. | |||||
* It's thus also slightly slower than a vector. | |||||
* Iteration happens over the dense data deque, so it's as fast as iterating a deque (really fast). | |||||
* Random Access means two dereferences, so in theory it's about as fast as a deque, which is is still really fast. | |||||
*/ | |||||
template<class Map, class T, class V> | |||||
class SparseFlatMapCore | |||||
{ | |||||
private: | |||||
friend class TestSparseFlatMap; | |||||
public: | |||||
using key_type = T; | |||||
using mapped_type = V; | |||||
using value_type = std::pair<const key_type, mapped_type>; | |||||
Not Done Inline ActionsI wonder if this class has to be implemented in the header (for performance or template class requirement reasons) elexis: I wonder if this class has to be implemented in the header (for performance or template class… | |||||
Done Inline Actions? wraitii: ?
It's already in a header? Being a templated class I don't have much choice anyways. | |||||
protected: | |||||
using data_value_type = std::pair<key_type, mapped_type>; | |||||
using keys_value_type = data_value_type*; | |||||
using keys_container = std::vector<keys_value_type>; | |||||
keys_container m_Keys; | |||||
// The data container is a deque since that avoids moving items when the buffer has to grow | |||||
// which avoids additional book-keeping when that needs to happen, | |||||
Not Done Inline ActionsSometimes key_type is used, other times T is used. template<class key_type, class mapped_type> or using T and V, or KEY_T and VALUE_T consistently would remove the indirection and ambiguousity. elexis: Sometimes key_type is used, other times T is used. `template<class key_type, class… | |||||
Done Inline ActionsYeah I need a consistency pass for this. wraitii: Yeah I need a consistency pass for this. | |||||
// and it's only slightly slower than a vector for the regular case. | |||||
using data_container = std::deque<data_value_type>; | |||||
data_container m_Data; | |||||
Map* self() { return static_cast<Map*>(this); } | |||||
const Map* self() const { return static_cast<const Map*>(this); } | |||||
static key_type Idx(const key_type key) { return key; } | |||||
const keys_container& Keys(const key_type) const { return m_Keys; } | |||||
keys_container& Keys(const key_type) { return m_Keys; } | |||||
Not Done Inline ActionsAccording to the tests in D1682 vector is kind of always faster than anything else. Stan: According to the tests in D1682 vector is kind of always faster than anything else. | |||||
Done Inline ActionsI think I tested it and deque was faster for my use case as it handled sparsity better or something. Should re-test perf later though. wraitii: I think I tested it and deque was faster for my use case as it handled sparsity better or… | |||||
public: | |||||
SparseFlatMapCore() | |||||
{ | |||||
m_Keys.reserve(4096); | |||||
}; | |||||
SparseFlatMapCore(const SparseFlatMapCore& o) | |||||
{ | |||||
m_Keys = o.m_Keys; | |||||
std::copy(o.m_Data.begin(), o.m_Data.end(), std::inserter(m_Data, m_Data.begin())); | |||||
static_cast<Map*>(this)->copy(*o.self()); | |||||
} | |||||
SparseFlatMapCore& operator=(const SparseFlatMapCore& o) | |||||
{ | |||||
SparseFlatMapCore tmp(o); | |||||
m_Data.swap(tmp.m_Data); | |||||
m_Keys.swap(tmp.m_Keys); | |||||
static_cast<Map*>(this)->swap(*tmp.self()); | |||||
return *this; | |||||
Not Done Inline ActionsI was directed to not align a line after code, because if someone adds one line that needs a different alignment, all the other lines have to be changed as well; whereas using always only 1 space character means that such a systematic issue won't occur. elexis: I was directed to not align a line after code, because if someone adds one line that needs a… | |||||
Done Inline ActionsThis true. In this case though these are the only four possible combinations (const, non-const, secondary, non-secondary), and I feel like it really improves readability? wraitii: This true. In this case though these are the only four possible combinations (const, non-const… | |||||
} | |||||
// Provide an iterator-like interface for find() and such | |||||
Not Done Inline ActionsThe macro appears avoidable by putting it into a container that stores either one or two of those. elexis: The macro appears avoidable by putting it into a container that stores either one or two of… | |||||
Done Inline Actions? The macro is mostly here to help with readability for SFINAE wraitii: ?
The macro is mostly here to help with readability for SFINAE | |||||
// only this type cannot actually iterate. | |||||
template<typename K, typename M> | |||||
class pseudo_iterator | |||||
{ | |||||
friend class SparseFlatMapCore; | |||||
K key; | |||||
M* value; | |||||
pseudo_iterator(K k, M* v) : key(k), value(v) {}; | |||||
// Fake for comparison with end() | |||||
pseudo_iterator(const typename data_container::iterator&) : key(0), value(nullptr) {}; | |||||
pseudo_iterator(const typename data_container::const_iterator&) : key(0), value(nullptr) {}; | |||||
template<typename U> | |||||
Not Done Inline ActionsThe previous code didn't implement copying, there is a macro NONCOPYABLE to do the same (deleting the copy constructor and copy assignment operator). elexis: The previous code didn't implement copying, there is a macro NONCOPYABLE to do the same… | |||||
struct proxy | |||||
{ | |||||
U ptr; | |||||
proxy(U p) : ptr(p) {}; | |||||
U* operator->() { return &ptr; } | |||||
const U* operator->() const { return &ptr; } | |||||
}; | |||||
public: | |||||
pseudo_iterator(const pseudo_iterator<K, typename std::remove_const<M>::type>& o) : key(o.key), value(o.value) {}; | |||||
pseudo_iterator(const pseudo_iterator<K, const M>& o) : key(o.key), value(o.value) {}; | |||||
proxy<std::pair<K, M&>> operator->() { return proxy<std::pair<const K, M&>>((*this).operator*()); } | |||||
std::pair<K, M&> operator*() { return std::make_pair(key, std::reference_wrapper<M>(*value)); } | |||||
const proxy<std::pair<K, M&>> operator->() const { return proxy<std::pair<const K, M&>>((*this).operator*()); } | |||||
const std::pair<K, M&> operator*() const { return std::make_pair(key, std::reference_wrapper<M>(*value)); } | |||||
bool operator==(const pseudo_iterator& it) const { return value == it.value; }; | |||||
bool operator!=(const pseudo_iterator& it) const { return value != it.value; }; | |||||
bool operator==(const typename data_container::iterator&) const { return value == nullptr; }; | |||||
bool operator!=(const typename data_container::iterator&) const { return value != nullptr; }; | |||||
bool operator==(const typename data_container::const_iterator&) const { return value == nullptr; }; | |||||
bool operator!=(const typename data_container::const_iterator&) const { return value != nullptr; }; | |||||
}; | |||||
typedef pseudo_iterator<const key_type, mapped_type> iterator; | |||||
typedef const pseudo_iterator<const key_type, const mapped_type> const_iterator; | |||||
iterator it(const key_type& key) | |||||
{ | |||||
return iterator(key, &(self()->Keys(key)[Map::Idx(key)]->second)); | |||||
} | |||||
const_iterator it(const key_type& key) const | |||||
{ | |||||
return const_iterator(key, &(self()->Keys(key)[Map::Idx(key)]->second)); | |||||
} | |||||
// begin() and end() return data_container's iterator as we iterate over the compact data directly for efficiently. | |||||
typename data_container::iterator begin() { return m_Data.begin(); } | |||||
typename data_container::iterator end() { return m_Data.end(); } | |||||
typename data_container::const_iterator begin() const { return m_Data.begin(); } | |||||
typename data_container::const_iterator end() const { return m_Data.end(); } | |||||
bool empty() const { return m_Data.empty(); } | |||||
size_t size() const { return m_Data.size(); } | |||||
std::pair<iterator, bool> insert_or_assign(const key_type& key, const mapped_type& value) | |||||
{ | |||||
key_type idx = Map::Idx(key); | |||||
auto& keys = self()->Keys(key); | |||||
if (static_cast<size_t>(idx) >= keys.size()) | |||||
keys.resize(idx + 1, { nullptr }); | |||||
bool inserted = false; | |||||
if (keys[idx] == nullptr) | |||||
{ | |||||
inserted = true; | |||||
m_Data.push_back({ key, value }); | |||||
keys[idx] = { &m_Data.back() }; | |||||
} | |||||
else | |||||
*(keys[idx]) = { key, value }; | |||||
return { it(key), inserted }; | |||||
} | |||||
size_t erase(const iterator& it) | |||||
{ | |||||
return erase(it->first); | |||||
} | |||||
size_t erase(const key_type& key) | |||||
{ | |||||
key_type idx = Map::Idx(key); | |||||
keys_container& keys = self()->Keys(key); | |||||
if (static_cast<size_t>(idx) >= keys.size() || keys[idx] == nullptr) | |||||
return 0; | |||||
Not Done Inline ActionsWhat type is this? elexis: What type is this? | |||||
Done Inline Actionskeys_container&, will change wraitii: keys_container&, will change | |||||
data_value_type& data = *(keys[idx]); | |||||
data.second.~V(); | |||||
if (&data != &m_Data.back()) | |||||
{ | |||||
data = std::move(m_Data.back()); | |||||
self()->Keys(data.first)[Map::Idx(data.first)] = &data; | |||||
} | |||||
m_Data.pop_back(); | |||||
keys[idx] = nullptr; | |||||
return 1; | |||||
} | |||||
void clear() | |||||
{ | |||||
m_Keys.clear(); | |||||
m_Data.clear(); | |||||
self()->clearKeys(); | |||||
} | |||||
mapped_type& operator[](const key_type& k) | |||||
{ | |||||
iterator it = find(k); | |||||
if (find(k) == end()) | |||||
Not Done Inline ActionsWhat type is this? elexis: What type is this? | |||||
return insert_or_assign(k, {}).first->second; | |||||
return it->second; | |||||
} | |||||
mapped_type& at(const key_type& k) | |||||
{ | |||||
ENSURE(find(k) != end()); | |||||
return (*find(k)).second; | |||||
} | |||||
const mapped_type& at(const key_type& k) const | |||||
{ | |||||
ENSURE(find(k) != end()); | |||||
return (*find(k)).second; | |||||
} | |||||
iterator find(const key_type& key) | |||||
{ | |||||
key_type idx = Map::Idx(key); | |||||
Not Done Inline ActionsUsing emplace_back would throw a compiler warning if the value would end up being copied (force construct in place), whereas push_back accepts copies. elexis: Using emplace_back would throw a compiler warning if the value would end up being copied (force… | |||||
Done Inline Actionsmh, yes, vaguely seems like I had a compiling issue to use push_back here. Will check wraitii: mh, yes, vaguely seems like I had a compiling issue to use push_back here. Will check | |||||
if (static_cast<size_t>(idx) < self()->Keys(key).size() && self()->Keys(key)[idx] != nullptr) | |||||
return it(key); | |||||
return end(); | |||||
} | |||||
const_iterator find(const key_type& key) const | |||||
{ | |||||
key_type idx = Map::Idx(key); | |||||
if (static_cast<size_t>(idx) < self()->Keys(key).size() && self()->Keys(key)[idx] != nullptr) | |||||
return it(key); | |||||
return end(); | |||||
} | |||||
}; | |||||
template<class T, class V> | |||||
class SparseFlatMap : public SparseFlatMapCore<SparseFlatMap<T, V>, T, V> | |||||
{ | |||||
public: | |||||
SparseFlatMap() = default; | |||||
void copy(const SparseFlatMap&) {}; | |||||
void swap(SparseFlatMap&) {}; | |||||
void clearKeys() {}; | |||||
}; | |||||
/** | |||||
* DualFlatMap is a wrapper around SparseFlatMap when keys can start at two vastly different values. | |||||
* This avoid having a very large, empty key array, but instead two more compact key arrays. | |||||
*/ | |||||
template<class T, class V, T SECONDARY_ID_START> | |||||
class DualFlatMap : public SparseFlatMapCore<DualFlatMap<T, V, SECONDARY_ID_START>, T, V> | |||||
{ | |||||
protected: | |||||
using og = SparseFlatMapCore<DualFlatMap<T, V, SECONDARY_ID_START>, T, V>; | |||||
friend og; | |||||
static typename og::key_type Idx(const typename og::key_type key) | |||||
{ | |||||
return key < SECONDARY_ID_START ? key : key - SECONDARY_ID_START; | |||||
} | |||||
typename og::keys_container& Keys(const typename og::key_type key) | |||||
{ | |||||
return key < SECONDARY_ID_START ? og::m_Keys : m_SecondaryKeys; | |||||
} | |||||
const typename og::keys_container& Keys(const typename og::key_type key) const | |||||
{ | |||||
return key < SECONDARY_ID_START ? og::m_Keys : m_SecondaryKeys; | |||||
} | |||||
private: | |||||
typename og::keys_container m_SecondaryKeys; | |||||
public: | |||||
DualFlatMap() | |||||
{ | |||||
m_SecondaryKeys.reserve(4096); | |||||
} | |||||
Not Done Inline ActionsWhat type is this? elexis: What type is this? | |||||
Done Inline Actionsthis is a std::pair<T, V>, which doesn't feel much more informative. wraitii: this is a std::pair<T, V>, which doesn't feel much more informative. | |||||
void copy(const DualFlatMap& o) | |||||
{ | |||||
m_SecondaryKeys = o.m_SecondaryKeys; | |||||
} | |||||
void swap(DualFlatMap& o) | |||||
{ | |||||
m_SecondaryKeys.swap(o.m_SecondaryKeys); | |||||
} | |||||
void clearKeys() | |||||
{ | |||||
m_SecondaryKeys.clear(); | |||||
} | |||||
}; | |||||
template<class VSerializer> | |||||
struct SerializeSparseFlatMap | |||||
{ | |||||
template<class flatMap> | |||||
void operator()(ISerializer& serialize, const char* UNUSED(name), flatMap& value) | |||||
{ | |||||
static_assert(std::is_convertible<typename flatMap::key_type, u32>::value, "key must be convertible to a u32"); | |||||
size_t len = value.size(); | |||||
serialize.NumberU32_Unbounded("length", (u32)len); | |||||
size_t count = 0; | |||||
for (auto& data : value) | |||||
{ | |||||
serialize.NumberU32_Unbounded("key", data.first); | |||||
VSerializer()(serialize, "value", data.second); | |||||
count++; | |||||
} | |||||
// test to see if the sparse flat map count wasn't wrong | |||||
// (which causes a crashing deserialisation) | |||||
ENSURE(count == len); | |||||
} | |||||
template<class flatMap> | |||||
void operator()(IDeserializer& deserialize, const char* UNUSED(name), flatMap& value) | |||||
{ | |||||
static_assert(std::is_convertible<typename flatMap::key_type, u32>::value, "key must be convertible to a u32"); | |||||
value.clear(); | |||||
uint32_t len; | |||||
deserialize.NumberU32_Unbounded("length", len); | |||||
for (size_t i = 0; i < len; ++i) | |||||
{ | |||||
typename flatMap::key_type k; | |||||
typename flatMap::mapped_type v; | |||||
deserialize.NumberU32_Unbounded("key", k); | |||||
VSerializer()(deserialize, "value", v); | |||||
value.insert_or_assign(k, v); | |||||
} | |||||
} | |||||
}; | |||||
#endif |
The goal for this class, not for 0 A.D.