1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-20 19:54:19 +01:00

[dxvk] Add new barrier tracker

Uses a hash table backed by RB trees in order to reduce
worst-case runtime overhead in some edge cases.
This commit is contained in:
Philip Rebohle 2024-10-09 18:39:56 +02:00 committed by Philip Rebohle
parent af3c46d83c
commit c6c85d7532
2 changed files with 600 additions and 0 deletions

View File

@ -2,6 +2,415 @@
namespace dxvk {
DxvkBarrierTracker::DxvkBarrierTracker() {
// Having an accessible 0 node makes certain things easier to
// implement and allows us to use 0 as an invalid node index.
m_nodes.emplace_back();
// Pre-allocate root nodes for the implicit hash table
for (uint32_t i = 0; i < 2u * HashTableSize; i++)
allocateNode();
}
DxvkBarrierTracker::~DxvkBarrierTracker() {
}
bool DxvkBarrierTracker::findRange(
const DxvkAddressRange& range,
DxvkAccess accessType) const {
uint32_t rootIndex = computeRootIndex(range, accessType);
return findNode(range, rootIndex);
}
void DxvkBarrierTracker::insertRange(
const DxvkAddressRange& range,
DxvkAccess accessType) {
uint32_t rootIndex = computeRootIndex(range, accessType);
// If we can just insert the node with no conflicts,
// we don't have to do anything.
uint32_t nodeIndex = insertNode(range, rootIndex);
if (likely(!nodeIndex))
return;
// If there's an existing node and it contains the entire
// range we want to add already, also don't do anything.
auto& node = m_nodes[nodeIndex];
if (node.addressRange.contains(range))
return;
// Otherwise, check if there are any other overlapping ranges.
// If that is not the case, simply update the range we found.
bool hasOverlap = false;
if (range.rangeStart < node.addressRange.rangeStart) {
DxvkAddressRange testRange;
testRange.resource = range.resource;
testRange.rangeStart = range.rangeStart;
testRange.rangeEnd = node.addressRange.rangeStart - 1u;
hasOverlap = findNode(testRange, rootIndex);
}
if (range.rangeEnd > node.addressRange.rangeEnd && !hasOverlap) {
DxvkAddressRange testRange;
testRange.resource = range.resource;
testRange.rangeStart = node.addressRange.rangeEnd + 1u;
testRange.rangeEnd = range.rangeEnd;
hasOverlap = findNode(testRange, rootIndex);
}
if (!hasOverlap) {
node.addressRange.rangeStart = std::min(node.addressRange.rangeStart, range.rangeStart);
node.addressRange.rangeEnd = std::max(node.addressRange.rangeEnd, range.rangeEnd);
return;
}
// If there are multiple ranges overlapping the one being
// inserted, remove them all and insert the merged range.
DxvkAddressRange mergedRange = range;
while (nodeIndex) {
auto& node = m_nodes[nodeIndex];
mergedRange.rangeStart = std::min(mergedRange.rangeStart, node.addressRange.rangeStart);
mergedRange.rangeEnd = std::max(mergedRange.rangeEnd, node.addressRange.rangeEnd);
removeNode(nodeIndex, rootIndex);
nodeIndex = findNode(range, rootIndex);
}
insertNode(mergedRange, rootIndex);
}
void DxvkBarrierTracker::clear() {
m_rootMaskValid = 0u;
while (m_rootMaskSubtree) {
// Free subtrees if any, but keep the root node intact
uint32_t rootIndex = bit::tzcnt(m_rootMaskSubtree) + 1u;
auto& root = m_nodes[rootIndex];
if (root.header) {
freeNode(root.child(0));
freeNode(root.child(1));
root.header = 0u;
}
m_rootMaskSubtree &= m_rootMaskSubtree - 1u;
}
}
uint32_t DxvkBarrierTracker::allocateNode() {
if (!m_free.empty()) {
uint32_t nodeIndex = m_free.back();
m_free.pop_back();
// Free any subtree that the node might still have
auto& node = m_nodes[nodeIndex];
freeNode(node.child(0));
freeNode(node.child(1));
node.header = 0u;
return nodeIndex;
} else {
// Allocate entirely new node in the array
uint32_t nodeIndex = m_nodes.size();
m_nodes.emplace_back();
return nodeIndex;
}
}
void DxvkBarrierTracker::freeNode(uint32_t node) {
if (node)
m_free.push_back(node);
}
uint32_t DxvkBarrierTracker::findNode(
const DxvkAddressRange& range,
uint32_t rootIndex) const {
// Check if the given root is valid at all
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
if (!(m_rootMaskValid & rootBit))
return false;
// Traverse search tree normally
uint32_t nodeIndex = rootIndex;
while (nodeIndex) {
auto& node = m_nodes[nodeIndex];
if (node.addressRange.overlaps(range))
return nodeIndex;
nodeIndex = node.child(uint32_t(node.addressRange.lt(range)));
}
return 0u;
}
uint32_t DxvkBarrierTracker::insertNode(
const DxvkAddressRange& range,
uint32_t rootIndex) {
// Check if the given root is valid at all
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
if (!(m_rootMaskValid & rootBit)) {
m_rootMaskValid |= rootBit;
// Update root node as necessary. Also reset
// its red-ness if we set it during deletion.
auto& node = m_nodes[rootIndex];
node.header = 0;
node.addressRange = range;
return 0;
} else {
// Traverse tree and abort if we find any range
// overlapping the one we're trying to insert.
uint32_t parentIndex = rootIndex;
uint32_t childIndex = 0u;
while (true) {
auto& parent = m_nodes[parentIndex];
if (parent.addressRange.overlaps(range))
return parentIndex;
childIndex = parent.addressRange.lt(range);
if (!parent.child(childIndex))
break;
parentIndex = parent.child(childIndex);
}
// Create and insert new node into the tree
uint32_t nodeIndex = allocateNode();
auto& parent = m_nodes[parentIndex];
parent.setChild(childIndex, nodeIndex);
auto& node = m_nodes[nodeIndex];
node.setRed(true);
node.setParent(parentIndex);
node.addressRange = range;
// Only do the fixup to maintain red-black properties if
// we haven't marked the root node as red in a deletion.
if (parentIndex != rootIndex && !m_nodes[rootIndex].isRed())
rebalancePostInsert(nodeIndex, rootIndex);
m_rootMaskSubtree |= rootBit;
return 0u;
}
}
void DxvkBarrierTracker::removeNode(
uint32_t nodeIndex,
uint32_t rootIndex) {
auto& node = m_nodes[nodeIndex];
uint32_t l = node.child(0);
uint32_t r = node.child(1);
if (l && r) {
// Both children are valid. Take the payload from the smallest
// node in the right subtree and delete that node instead.
uint32_t childIndex = r;
while (m_nodes[childIndex].child(0))
childIndex = m_nodes[childIndex].child(0);
node.addressRange = m_nodes[childIndex].addressRange;
removeNode(childIndex, rootIndex);
} else {
// Deletion is expected to be exceptionally rare, to the point of
// being irrelevant in practice since it can only ever happen if an
// app reads multiple disjoint blocks of a resource and then reads
// another range covering multiple of those blocks again. Instead
// of implementing a complex post-delete fixup, mark the root as
// red and allow the tree to go unbalanced until the next reset.
if (!node.isRed() && (nodeIndex != rootIndex))
m_nodes[rootIndex].setRed(true);
// We're deleting the a node with one or no children. To avoid
// special-casing the root node, copy the child node to it and
// update links as necessary.
uint32_t childIndex = std::max(l, r);
uint32_t parentIndex = node.parent();
if (childIndex) {
auto& child = m_nodes[childIndex];
uint32_t cl = child.child(0);
uint32_t cr = child.child(1);
node.setChild(0, cl);
node.setChild(1, cr);
if (nodeIndex != rootIndex)
node.setRed(child.isRed());
node.addressRange = child.addressRange;
if (cl) m_nodes[cl].setParent(nodeIndex);
if (cr) m_nodes[cr].setParent(nodeIndex);
child.header = 0u;
freeNode(childIndex);
} else if (nodeIndex != rootIndex) {
// Removing leaf node, update parent link and move on.
auto& parent = m_nodes[parentIndex];
uint32_t which = uint32_t(parent.child(1) == nodeIndex);
parent.setChild(which, 0u);
node.header = 0;
freeNode(nodeIndex);
} else {
// Removing root with no children, mark tree as invalid
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
m_rootMaskSubtree &= ~rootBit;
m_rootMaskValid &= ~rootBit;
}
}
}
void DxvkBarrierTracker::rebalancePostInsert(
uint32_t nodeIndex,
uint32_t rootIndex) {
while (nodeIndex != rootIndex) {
auto& node = m_nodes[nodeIndex];
auto& p = m_nodes[node.parent()];
if (!p.isRed())
break;
auto& g = m_nodes[p.parent()];
if (g.child(1) == node.parent()) {
auto& u = m_nodes[g.child(0)];
if (g.child(0) && u.isRed()) {
g.setRed(true);
u.setRed(false);
p.setRed(false);
nodeIndex = p.parent();
} else {
if (p.child(0) == nodeIndex)
rotateRight(node.parent(), rootIndex);
p.setRed(false);
g.setRed(true);
rotateLeft(p.parent(), rootIndex);
}
} else {
auto& u = m_nodes[g.child(1)];
if (g.child(1) && u.isRed()) {
g.setRed(true);
u.setRed(false);
p.setRed(false);
nodeIndex = p.parent();
} else {
if (p.child(1) == nodeIndex)
rotateLeft(node.parent(), rootIndex);
p.setRed(false);
g.setRed(true);
rotateRight(p.parent(), rootIndex);
}
}
}
m_nodes[rootIndex].setRed(false);
}
void DxvkBarrierTracker::rotateLeft(
uint32_t nodeIndex,
uint32_t rootIndex) {
// This implements rotations in such a way that the node to
// rotate around does not move. This is important to avoid
// having a special case for the root node, and avoids having
// to access the parent or special-case the root node.
auto& node = m_nodes[nodeIndex];
auto l = node.child(0);
auto r = node.child(1);
auto rl = m_nodes[r].child(0);
auto rr = m_nodes[r].child(1);
m_nodes[l].setParent(r);
bool isRed = m_nodes[r].isRed();
m_nodes[r].setRed(node.isRed());
m_nodes[r].setChild(0, l);
m_nodes[r].setChild(1, rl);
m_nodes[rr].setParent(nodeIndex);
node.setRed(isRed && nodeIndex != rootIndex);
node.setChild(0, r);
node.setChild(1, rr);
std::swap(node.addressRange, m_nodes[r].addressRange);
}
void DxvkBarrierTracker::rotateRight(
uint32_t nodeIndex,
uint32_t rootIndex) {
auto& node = m_nodes[nodeIndex];
auto l = node.child(0);
auto r = node.child(1);
auto ll = m_nodes[l].child(0);
auto lr = m_nodes[l].child(1);
m_nodes[r].setParent(l);
bool isRed = m_nodes[l].isRed();
m_nodes[l].setRed(node.isRed());
m_nodes[l].setChild(0, lr);
m_nodes[l].setChild(1, r);
m_nodes[ll].setParent(nodeIndex);
node.setRed(isRed && nodeIndex != rootIndex);
node.setChild(0, ll);
node.setChild(1, l);
std::swap(node.addressRange, m_nodes[l].addressRange);
}
DxvkBarrierSet:: DxvkBarrierSet(DxvkCmdBuffer cmdBuffer)
: m_cmdBuffer(cmdBuffer) {

View File

@ -9,6 +9,197 @@
namespace dxvk {
/**
* \brief Address range
*/
struct DxvkAddressRange {
/// Unique resource handle or address
uint64_t resource = 0u;
/// Range start. For buffers, this shall be a byte offset,
/// images can encode the first subresource index here.
uint32_t rangeStart = 0u;
/// Range end. For buffers, this is the offset of the last byte
/// included in the range, i.e. offset + size - 1. For images,
/// this is the last subresource included in the range.
uint32_t rangeEnd = 0u;
bool contains(const DxvkAddressRange& other) const {
return resource == other.resource
&& rangeStart <= other.rangeStart
&& rangeEnd >= other.rangeEnd;
}
bool overlaps(const DxvkAddressRange& other) const {
return resource == other.resource
&& rangeEnd >= other.rangeStart
&& rangeStart <= other.rangeEnd;
}
bool lt(const DxvkAddressRange& other) const {
return (resource < other.resource)
|| (resource == other.resource && rangeStart < other.rangeStart);
}
};
/**
* \brief Barrier tree node
*
* Node of a red-black tree, consisting of a packed node
* header as well as aresource address range. GCC generates
* weird code with bitfields here, so pack manually.
*/
struct DxvkBarrierTreeNode {
constexpr static uint64_t NodeIndexMask = (1u << 21) - 1u;
// Packed header with node indices and the node color.
// [0:0]: Set if the node is red, clear otherwise.
// [21:1]: Index of the left child node, may be 0.
// [42:22]: Index of the right child node, may be 0.
// [43:63]: Index of the parent node, may be 0 for the root.
uint64_t header = 0u;
// Address range of the node
DxvkAddressRange addressRange = { };
void setRed(bool red) {
header &= ~uint64_t(1u);
header |= uint64_t(red);
}
bool isRed() const {
return header & 1u;
}
void setParent(uint32_t node) {
header &= ~(NodeIndexMask << 43);
header |= uint64_t(node) << 43;
}
void setChild(uint32_t index, uint32_t node) {
uint32_t shift = (index ? 22 : 1);
header &= ~(NodeIndexMask << shift);
header |= uint64_t(node) << shift;
}
uint32_t parent() const {
return uint32_t((header >> 43) & NodeIndexMask);
}
uint32_t child(uint32_t index) const {
uint32_t shift = (index ? 22 : 1);
return uint32_t((header >> shift) & NodeIndexMask);
}
bool isRoot() const {
return parent() == 0u;
}
};
/**
* \brief Barrier tracker
*
* Provides a two-part hash table for read and written resource
* ranges, which is backed by binary trees to handle individual
* address ranges as well as collisions.
*/
class DxvkBarrierTracker {
constexpr static uint32_t HashTableSize = 32u;
public:
DxvkBarrierTracker();
~DxvkBarrierTracker();
/**
* \brief Checks whether there is a pending access of a given type
*
* \param [in] range Resource range
* \param [in] accessType Access type
* \returns \c true if the range has a pending access
*/
bool findRange(
const DxvkAddressRange& range,
DxvkAccess accessType) const;
/**
* \brief Inserts address range for a given access type
*
* \param [in] range Resource range
* \param [in] accessType Access type
*/
void insertRange(
const DxvkAddressRange& range,
DxvkAccess accessType);
/**
* \brief Clears the entire structure
*
* Invalidates all hash table entries and trees.
*/
void clear();
/**
* \brief Checks whether any resources are dirty
* \returns \c true if the tracker is empty.
*/
bool empty() const {
return !m_rootMaskValid;
}
private:
uint64_t m_rootMaskValid = 0u;
uint64_t m_rootMaskSubtree = 0u;
std::vector<DxvkBarrierTreeNode> m_nodes;
std::vector<uint32_t> m_free;
uint32_t allocateNode();
void freeNode(uint32_t node);
uint32_t findNode(
const DxvkAddressRange& range,
uint32_t rootIndex) const;
uint32_t insertNode(
const DxvkAddressRange& range,
uint32_t rootIndex);
void removeNode(
uint32_t nodeIndex,
uint32_t rootIndex);
void rebalancePostInsert(
uint32_t nodeIndex,
uint32_t rootIndex);
void rotateLeft(
uint32_t nodeIndex,
uint32_t rootIndex);
void rotateRight(
uint32_t nodeIndex,
uint32_t rootIndex);
static uint32_t computeRootIndex(
const DxvkAddressRange& range,
DxvkAccess access) {
// TODO revisit once we use internal allocation
// objects or resource cookies here.
size_t hash = size_t(range.resource) * 93887;
hash ^= (hash >> 16);
// Reserve the upper half of the implicit hash table for written
// ranges, and add 1 because 0 refers to the actual null node.
return 1u + (hash % HashTableSize) + (access == DxvkAccess::Write ? HashTableSize : 0u);
}
};
/**
* \brief Buffer slice for barrier tracking
*