mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-20 19:54:19 +01:00
[dxvk] Add new barrier tracker
Uses a hash table backed by RB trees in order to reduce worst-case runtime overhead in some edge cases.
This commit is contained in:
parent
af3c46d83c
commit
c6c85d7532
@ -2,6 +2,415 @@
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
DxvkBarrierTracker::DxvkBarrierTracker() {
|
||||
// Having an accessible 0 node makes certain things easier to
|
||||
// implement and allows us to use 0 as an invalid node index.
|
||||
m_nodes.emplace_back();
|
||||
|
||||
// Pre-allocate root nodes for the implicit hash table
|
||||
for (uint32_t i = 0; i < 2u * HashTableSize; i++)
|
||||
allocateNode();
|
||||
}
|
||||
|
||||
|
||||
DxvkBarrierTracker::~DxvkBarrierTracker() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
bool DxvkBarrierTracker::findRange(
|
||||
const DxvkAddressRange& range,
|
||||
DxvkAccess accessType) const {
|
||||
uint32_t rootIndex = computeRootIndex(range, accessType);
|
||||
return findNode(range, rootIndex);
|
||||
}
|
||||
|
||||
|
||||
void DxvkBarrierTracker::insertRange(
|
||||
const DxvkAddressRange& range,
|
||||
DxvkAccess accessType) {
|
||||
uint32_t rootIndex = computeRootIndex(range, accessType);
|
||||
|
||||
// If we can just insert the node with no conflicts,
|
||||
// we don't have to do anything.
|
||||
uint32_t nodeIndex = insertNode(range, rootIndex);
|
||||
|
||||
if (likely(!nodeIndex))
|
||||
return;
|
||||
|
||||
// If there's an existing node and it contains the entire
|
||||
// range we want to add already, also don't do anything.
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
|
||||
if (node.addressRange.contains(range))
|
||||
return;
|
||||
|
||||
// Otherwise, check if there are any other overlapping ranges.
|
||||
// If that is not the case, simply update the range we found.
|
||||
bool hasOverlap = false;
|
||||
|
||||
if (range.rangeStart < node.addressRange.rangeStart) {
|
||||
DxvkAddressRange testRange;
|
||||
testRange.resource = range.resource;
|
||||
testRange.rangeStart = range.rangeStart;
|
||||
testRange.rangeEnd = node.addressRange.rangeStart - 1u;
|
||||
|
||||
hasOverlap = findNode(testRange, rootIndex);
|
||||
}
|
||||
|
||||
if (range.rangeEnd > node.addressRange.rangeEnd && !hasOverlap) {
|
||||
DxvkAddressRange testRange;
|
||||
testRange.resource = range.resource;
|
||||
testRange.rangeStart = node.addressRange.rangeEnd + 1u;
|
||||
testRange.rangeEnd = range.rangeEnd;
|
||||
|
||||
hasOverlap = findNode(testRange, rootIndex);
|
||||
}
|
||||
|
||||
if (!hasOverlap) {
|
||||
node.addressRange.rangeStart = std::min(node.addressRange.rangeStart, range.rangeStart);
|
||||
node.addressRange.rangeEnd = std::max(node.addressRange.rangeEnd, range.rangeEnd);
|
||||
return;
|
||||
}
|
||||
|
||||
// If there are multiple ranges overlapping the one being
|
||||
// inserted, remove them all and insert the merged range.
|
||||
DxvkAddressRange mergedRange = range;
|
||||
|
||||
while (nodeIndex) {
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
mergedRange.rangeStart = std::min(mergedRange.rangeStart, node.addressRange.rangeStart);
|
||||
mergedRange.rangeEnd = std::max(mergedRange.rangeEnd, node.addressRange.rangeEnd);
|
||||
|
||||
removeNode(nodeIndex, rootIndex);
|
||||
|
||||
nodeIndex = findNode(range, rootIndex);
|
||||
}
|
||||
|
||||
insertNode(mergedRange, rootIndex);
|
||||
}
|
||||
|
||||
|
||||
void DxvkBarrierTracker::clear() {
|
||||
m_rootMaskValid = 0u;
|
||||
|
||||
while (m_rootMaskSubtree) {
|
||||
// Free subtrees if any, but keep the root node intact
|
||||
uint32_t rootIndex = bit::tzcnt(m_rootMaskSubtree) + 1u;
|
||||
|
||||
auto& root = m_nodes[rootIndex];
|
||||
|
||||
if (root.header) {
|
||||
freeNode(root.child(0));
|
||||
freeNode(root.child(1));
|
||||
|
||||
root.header = 0u;
|
||||
}
|
||||
|
||||
m_rootMaskSubtree &= m_rootMaskSubtree - 1u;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
uint32_t DxvkBarrierTracker::allocateNode() {
|
||||
if (!m_free.empty()) {
|
||||
uint32_t nodeIndex = m_free.back();
|
||||
m_free.pop_back();
|
||||
|
||||
// Free any subtree that the node might still have
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
freeNode(node.child(0));
|
||||
freeNode(node.child(1));
|
||||
|
||||
node.header = 0u;
|
||||
return nodeIndex;
|
||||
} else {
|
||||
// Allocate entirely new node in the array
|
||||
uint32_t nodeIndex = m_nodes.size();
|
||||
m_nodes.emplace_back();
|
||||
return nodeIndex;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DxvkBarrierTracker::freeNode(uint32_t node) {
|
||||
if (node)
|
||||
m_free.push_back(node);
|
||||
}
|
||||
|
||||
|
||||
uint32_t DxvkBarrierTracker::findNode(
|
||||
const DxvkAddressRange& range,
|
||||
uint32_t rootIndex) const {
|
||||
// Check if the given root is valid at all
|
||||
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
|
||||
|
||||
if (!(m_rootMaskValid & rootBit))
|
||||
return false;
|
||||
|
||||
// Traverse search tree normally
|
||||
uint32_t nodeIndex = rootIndex;
|
||||
|
||||
while (nodeIndex) {
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
|
||||
if (node.addressRange.overlaps(range))
|
||||
return nodeIndex;
|
||||
|
||||
nodeIndex = node.child(uint32_t(node.addressRange.lt(range)));
|
||||
}
|
||||
|
||||
return 0u;
|
||||
}
|
||||
|
||||
|
||||
uint32_t DxvkBarrierTracker::insertNode(
|
||||
const DxvkAddressRange& range,
|
||||
uint32_t rootIndex) {
|
||||
// Check if the given root is valid at all
|
||||
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
|
||||
|
||||
if (!(m_rootMaskValid & rootBit)) {
|
||||
m_rootMaskValid |= rootBit;
|
||||
|
||||
// Update root node as necessary. Also reset
|
||||
// its red-ness if we set it during deletion.
|
||||
auto& node = m_nodes[rootIndex];
|
||||
node.header = 0;
|
||||
node.addressRange = range;
|
||||
return 0;
|
||||
} else {
|
||||
// Traverse tree and abort if we find any range
|
||||
// overlapping the one we're trying to insert.
|
||||
uint32_t parentIndex = rootIndex;
|
||||
uint32_t childIndex = 0u;
|
||||
|
||||
while (true) {
|
||||
auto& parent = m_nodes[parentIndex];
|
||||
|
||||
if (parent.addressRange.overlaps(range))
|
||||
return parentIndex;
|
||||
|
||||
childIndex = parent.addressRange.lt(range);
|
||||
|
||||
if (!parent.child(childIndex))
|
||||
break;
|
||||
|
||||
parentIndex = parent.child(childIndex);
|
||||
}
|
||||
|
||||
// Create and insert new node into the tree
|
||||
uint32_t nodeIndex = allocateNode();
|
||||
|
||||
auto& parent = m_nodes[parentIndex];
|
||||
parent.setChild(childIndex, nodeIndex);
|
||||
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
node.setRed(true);
|
||||
node.setParent(parentIndex);
|
||||
node.addressRange = range;
|
||||
|
||||
// Only do the fixup to maintain red-black properties if
|
||||
// we haven't marked the root node as red in a deletion.
|
||||
if (parentIndex != rootIndex && !m_nodes[rootIndex].isRed())
|
||||
rebalancePostInsert(nodeIndex, rootIndex);
|
||||
|
||||
m_rootMaskSubtree |= rootBit;
|
||||
return 0u;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DxvkBarrierTracker::removeNode(
|
||||
uint32_t nodeIndex,
|
||||
uint32_t rootIndex) {
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
|
||||
uint32_t l = node.child(0);
|
||||
uint32_t r = node.child(1);
|
||||
|
||||
if (l && r) {
|
||||
// Both children are valid. Take the payload from the smallest
|
||||
// node in the right subtree and delete that node instead.
|
||||
uint32_t childIndex = r;
|
||||
|
||||
while (m_nodes[childIndex].child(0))
|
||||
childIndex = m_nodes[childIndex].child(0);
|
||||
|
||||
node.addressRange = m_nodes[childIndex].addressRange;
|
||||
removeNode(childIndex, rootIndex);
|
||||
} else {
|
||||
// Deletion is expected to be exceptionally rare, to the point of
|
||||
// being irrelevant in practice since it can only ever happen if an
|
||||
// app reads multiple disjoint blocks of a resource and then reads
|
||||
// another range covering multiple of those blocks again. Instead
|
||||
// of implementing a complex post-delete fixup, mark the root as
|
||||
// red and allow the tree to go unbalanced until the next reset.
|
||||
if (!node.isRed() && (nodeIndex != rootIndex))
|
||||
m_nodes[rootIndex].setRed(true);
|
||||
|
||||
// We're deleting the a node with one or no children. To avoid
|
||||
// special-casing the root node, copy the child node to it and
|
||||
// update links as necessary.
|
||||
uint32_t childIndex = std::max(l, r);
|
||||
uint32_t parentIndex = node.parent();
|
||||
|
||||
if (childIndex) {
|
||||
auto& child = m_nodes[childIndex];
|
||||
|
||||
uint32_t cl = child.child(0);
|
||||
uint32_t cr = child.child(1);
|
||||
|
||||
node.setChild(0, cl);
|
||||
node.setChild(1, cr);
|
||||
|
||||
if (nodeIndex != rootIndex)
|
||||
node.setRed(child.isRed());
|
||||
|
||||
node.addressRange = child.addressRange;
|
||||
|
||||
if (cl) m_nodes[cl].setParent(nodeIndex);
|
||||
if (cr) m_nodes[cr].setParent(nodeIndex);
|
||||
|
||||
child.header = 0u;
|
||||
freeNode(childIndex);
|
||||
} else if (nodeIndex != rootIndex) {
|
||||
// Removing leaf node, update parent link and move on.
|
||||
auto& parent = m_nodes[parentIndex];
|
||||
|
||||
uint32_t which = uint32_t(parent.child(1) == nodeIndex);
|
||||
parent.setChild(which, 0u);
|
||||
|
||||
node.header = 0;
|
||||
freeNode(nodeIndex);
|
||||
} else {
|
||||
// Removing root with no children, mark tree as invalid
|
||||
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
|
||||
|
||||
m_rootMaskSubtree &= ~rootBit;
|
||||
m_rootMaskValid &= ~rootBit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DxvkBarrierTracker::rebalancePostInsert(
|
||||
uint32_t nodeIndex,
|
||||
uint32_t rootIndex) {
|
||||
while (nodeIndex != rootIndex) {
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
auto& p = m_nodes[node.parent()];
|
||||
|
||||
if (!p.isRed())
|
||||
break;
|
||||
|
||||
auto& g = m_nodes[p.parent()];
|
||||
|
||||
if (g.child(1) == node.parent()) {
|
||||
auto& u = m_nodes[g.child(0)];
|
||||
|
||||
if (g.child(0) && u.isRed()) {
|
||||
g.setRed(true);
|
||||
u.setRed(false);
|
||||
p.setRed(false);
|
||||
|
||||
nodeIndex = p.parent();
|
||||
} else {
|
||||
if (p.child(0) == nodeIndex)
|
||||
rotateRight(node.parent(), rootIndex);
|
||||
|
||||
p.setRed(false);
|
||||
g.setRed(true);
|
||||
|
||||
rotateLeft(p.parent(), rootIndex);
|
||||
}
|
||||
} else {
|
||||
auto& u = m_nodes[g.child(1)];
|
||||
|
||||
if (g.child(1) && u.isRed()) {
|
||||
g.setRed(true);
|
||||
u.setRed(false);
|
||||
p.setRed(false);
|
||||
|
||||
nodeIndex = p.parent();
|
||||
} else {
|
||||
if (p.child(1) == nodeIndex)
|
||||
rotateLeft(node.parent(), rootIndex);
|
||||
|
||||
p.setRed(false);
|
||||
g.setRed(true);
|
||||
|
||||
rotateRight(p.parent(), rootIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_nodes[rootIndex].setRed(false);
|
||||
}
|
||||
|
||||
|
||||
void DxvkBarrierTracker::rotateLeft(
|
||||
uint32_t nodeIndex,
|
||||
uint32_t rootIndex) {
|
||||
// This implements rotations in such a way that the node to
|
||||
// rotate around does not move. This is important to avoid
|
||||
// having a special case for the root node, and avoids having
|
||||
// to access the parent or special-case the root node.
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
|
||||
auto l = node.child(0);
|
||||
auto r = node.child(1);
|
||||
|
||||
auto rl = m_nodes[r].child(0);
|
||||
auto rr = m_nodes[r].child(1);
|
||||
|
||||
m_nodes[l].setParent(r);
|
||||
|
||||
bool isRed = m_nodes[r].isRed();
|
||||
m_nodes[r].setRed(node.isRed());
|
||||
m_nodes[r].setChild(0, l);
|
||||
m_nodes[r].setChild(1, rl);
|
||||
|
||||
m_nodes[rr].setParent(nodeIndex);
|
||||
|
||||
node.setRed(isRed && nodeIndex != rootIndex);
|
||||
node.setChild(0, r);
|
||||
node.setChild(1, rr);
|
||||
|
||||
std::swap(node.addressRange, m_nodes[r].addressRange);
|
||||
}
|
||||
|
||||
|
||||
void DxvkBarrierTracker::rotateRight(
|
||||
uint32_t nodeIndex,
|
||||
uint32_t rootIndex) {
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
|
||||
auto l = node.child(0);
|
||||
auto r = node.child(1);
|
||||
|
||||
auto ll = m_nodes[l].child(0);
|
||||
auto lr = m_nodes[l].child(1);
|
||||
|
||||
m_nodes[r].setParent(l);
|
||||
|
||||
bool isRed = m_nodes[l].isRed();
|
||||
m_nodes[l].setRed(node.isRed());
|
||||
m_nodes[l].setChild(0, lr);
|
||||
m_nodes[l].setChild(1, r);
|
||||
|
||||
m_nodes[ll].setParent(nodeIndex);
|
||||
|
||||
node.setRed(isRed && nodeIndex != rootIndex);
|
||||
node.setChild(0, ll);
|
||||
node.setChild(1, l);
|
||||
|
||||
std::swap(node.addressRange, m_nodes[l].addressRange);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
DxvkBarrierSet:: DxvkBarrierSet(DxvkCmdBuffer cmdBuffer)
|
||||
: m_cmdBuffer(cmdBuffer) {
|
||||
|
||||
|
@ -9,6 +9,197 @@
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
/**
|
||||
* \brief Address range
|
||||
*/
|
||||
struct DxvkAddressRange {
|
||||
/// Unique resource handle or address
|
||||
uint64_t resource = 0u;
|
||||
/// Range start. For buffers, this shall be a byte offset,
|
||||
/// images can encode the first subresource index here.
|
||||
uint32_t rangeStart = 0u;
|
||||
/// Range end. For buffers, this is the offset of the last byte
|
||||
/// included in the range, i.e. offset + size - 1. For images,
|
||||
/// this is the last subresource included in the range.
|
||||
uint32_t rangeEnd = 0u;
|
||||
|
||||
bool contains(const DxvkAddressRange& other) const {
|
||||
return resource == other.resource
|
||||
&& rangeStart <= other.rangeStart
|
||||
&& rangeEnd >= other.rangeEnd;
|
||||
}
|
||||
|
||||
bool overlaps(const DxvkAddressRange& other) const {
|
||||
return resource == other.resource
|
||||
&& rangeEnd >= other.rangeStart
|
||||
&& rangeStart <= other.rangeEnd;
|
||||
}
|
||||
|
||||
bool lt(const DxvkAddressRange& other) const {
|
||||
return (resource < other.resource)
|
||||
|| (resource == other.resource && rangeStart < other.rangeStart);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Barrier tree node
|
||||
*
|
||||
* Node of a red-black tree, consisting of a packed node
|
||||
* header as well as aresource address range. GCC generates
|
||||
* weird code with bitfields here, so pack manually.
|
||||
*/
|
||||
struct DxvkBarrierTreeNode {
|
||||
constexpr static uint64_t NodeIndexMask = (1u << 21) - 1u;
|
||||
|
||||
// Packed header with node indices and the node color.
|
||||
// [0:0]: Set if the node is red, clear otherwise.
|
||||
// [21:1]: Index of the left child node, may be 0.
|
||||
// [42:22]: Index of the right child node, may be 0.
|
||||
// [43:63]: Index of the parent node, may be 0 for the root.
|
||||
uint64_t header = 0u;
|
||||
|
||||
// Address range of the node
|
||||
DxvkAddressRange addressRange = { };
|
||||
|
||||
void setRed(bool red) {
|
||||
header &= ~uint64_t(1u);
|
||||
header |= uint64_t(red);
|
||||
}
|
||||
|
||||
bool isRed() const {
|
||||
return header & 1u;
|
||||
}
|
||||
|
||||
void setParent(uint32_t node) {
|
||||
header &= ~(NodeIndexMask << 43);
|
||||
header |= uint64_t(node) << 43;
|
||||
}
|
||||
|
||||
void setChild(uint32_t index, uint32_t node) {
|
||||
uint32_t shift = (index ? 22 : 1);
|
||||
header &= ~(NodeIndexMask << shift);
|
||||
header |= uint64_t(node) << shift;
|
||||
}
|
||||
|
||||
uint32_t parent() const {
|
||||
return uint32_t((header >> 43) & NodeIndexMask);
|
||||
}
|
||||
|
||||
uint32_t child(uint32_t index) const {
|
||||
uint32_t shift = (index ? 22 : 1);
|
||||
return uint32_t((header >> shift) & NodeIndexMask);
|
||||
}
|
||||
|
||||
bool isRoot() const {
|
||||
return parent() == 0u;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Barrier tracker
|
||||
*
|
||||
* Provides a two-part hash table for read and written resource
|
||||
* ranges, which is backed by binary trees to handle individual
|
||||
* address ranges as well as collisions.
|
||||
*/
|
||||
class DxvkBarrierTracker {
|
||||
constexpr static uint32_t HashTableSize = 32u;
|
||||
public:
|
||||
|
||||
DxvkBarrierTracker();
|
||||
|
||||
~DxvkBarrierTracker();
|
||||
|
||||
/**
|
||||
* \brief Checks whether there is a pending access of a given type
|
||||
*
|
||||
* \param [in] range Resource range
|
||||
* \param [in] accessType Access type
|
||||
* \returns \c true if the range has a pending access
|
||||
*/
|
||||
bool findRange(
|
||||
const DxvkAddressRange& range,
|
||||
DxvkAccess accessType) const;
|
||||
|
||||
/**
|
||||
* \brief Inserts address range for a given access type
|
||||
*
|
||||
* \param [in] range Resource range
|
||||
* \param [in] accessType Access type
|
||||
*/
|
||||
void insertRange(
|
||||
const DxvkAddressRange& range,
|
||||
DxvkAccess accessType);
|
||||
|
||||
/**
|
||||
* \brief Clears the entire structure
|
||||
*
|
||||
* Invalidates all hash table entries and trees.
|
||||
*/
|
||||
void clear();
|
||||
|
||||
/**
|
||||
* \brief Checks whether any resources are dirty
|
||||
* \returns \c true if the tracker is empty.
|
||||
*/
|
||||
bool empty() const {
|
||||
return !m_rootMaskValid;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
uint64_t m_rootMaskValid = 0u;
|
||||
uint64_t m_rootMaskSubtree = 0u;
|
||||
|
||||
std::vector<DxvkBarrierTreeNode> m_nodes;
|
||||
std::vector<uint32_t> m_free;
|
||||
|
||||
uint32_t allocateNode();
|
||||
|
||||
void freeNode(uint32_t node);
|
||||
|
||||
uint32_t findNode(
|
||||
const DxvkAddressRange& range,
|
||||
uint32_t rootIndex) const;
|
||||
|
||||
uint32_t insertNode(
|
||||
const DxvkAddressRange& range,
|
||||
uint32_t rootIndex);
|
||||
|
||||
void removeNode(
|
||||
uint32_t nodeIndex,
|
||||
uint32_t rootIndex);
|
||||
|
||||
void rebalancePostInsert(
|
||||
uint32_t nodeIndex,
|
||||
uint32_t rootIndex);
|
||||
|
||||
void rotateLeft(
|
||||
uint32_t nodeIndex,
|
||||
uint32_t rootIndex);
|
||||
|
||||
void rotateRight(
|
||||
uint32_t nodeIndex,
|
||||
uint32_t rootIndex);
|
||||
|
||||
static uint32_t computeRootIndex(
|
||||
const DxvkAddressRange& range,
|
||||
DxvkAccess access) {
|
||||
// TODO revisit once we use internal allocation
|
||||
// objects or resource cookies here.
|
||||
size_t hash = size_t(range.resource) * 93887;
|
||||
hash ^= (hash >> 16);
|
||||
|
||||
// Reserve the upper half of the implicit hash table for written
|
||||
// ranges, and add 1 because 0 refers to the actual null node.
|
||||
return 1u + (hash % HashTableSize) + (access == DxvkAccess::Write ? HashTableSize : 0u);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Buffer slice for barrier tracking
|
||||
*
|
||||
|
Loading…
x
Reference in New Issue
Block a user