mirror of
https://github.com/doitsujin/dxvk.git
synced 2024-12-02 01:24:11 +01:00
[dxvk] Use memcmp replacement for pipeline state lookup
Measured to be over twice as fast as memcmp on Ryzen for the 512-byte graphics pipeline state struct, achieving two cycles per iteration.
This commit is contained in:
parent
5cb7be2454
commit
a743ba6531
@ -641,11 +641,11 @@ namespace dxvk {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool operator == (const DxvkGraphicsPipelineStateInfo& other) const {
|
bool operator == (const DxvkGraphicsPipelineStateInfo& other) const {
|
||||||
return !std::memcmp(this, &other, sizeof(*this));
|
return bit::bcmpeq(this, &other);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator != (const DxvkGraphicsPipelineStateInfo& other) const {
|
bool operator != (const DxvkGraphicsPipelineStateInfo& other) const {
|
||||||
return std::memcmp(this, &other, sizeof(*this));
|
return !bit::bcmpeq(this, &other);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool useDynamicStencilRef() const {
|
bool useDynamicStencilRef() const {
|
||||||
@ -709,11 +709,11 @@ namespace dxvk {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool operator == (const DxvkComputePipelineStateInfo& other) const {
|
bool operator == (const DxvkComputePipelineStateInfo& other) const {
|
||||||
return !std::memcmp(this, &other, sizeof(*this));
|
return bit::bcmpeq(this, &other);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator != (const DxvkComputePipelineStateInfo& other) const {
|
bool operator != (const DxvkComputePipelineStateInfo& other) const {
|
||||||
return std::memcmp(this, &other, sizeof(*this));
|
return !bit::bcmpeq(this, &other);
|
||||||
}
|
}
|
||||||
|
|
||||||
DxvkBindingMask bsBindingMask;
|
DxvkBindingMask bsBindingMask;
|
||||||
|
@ -80,4 +80,56 @@ namespace dxvk::bit {
|
|||||||
return shift > Bits ? shift - Bits : 0;
|
return shift > Bits ? shift - Bits : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Compares two aligned structs bit by bit
|
||||||
|
*
|
||||||
|
* \param [in] a First struct
|
||||||
|
* \param [in] b Second struct
|
||||||
|
* \returns \c true if the structs are equal
|
||||||
|
*/
|
||||||
|
template<typename T>
|
||||||
|
bool bcmpeq(const T* a, const T* b) {
|
||||||
|
static_assert(alignof(T) >= 16);
|
||||||
|
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
|
||||||
|
auto ai = reinterpret_cast<const __m128i*>(a);
|
||||||
|
auto bi = reinterpret_cast<const __m128i*>(b);
|
||||||
|
|
||||||
|
size_t i = 0;
|
||||||
|
|
||||||
|
#if defined(__clang__)
|
||||||
|
#pragma nounroll
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#pragma GCC unroll 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for ( ; i < 2 * (sizeof(T) / 32); i += 2) {
|
||||||
|
__m128i eq0 = _mm_cmpeq_epi8(
|
||||||
|
_mm_load_si128(ai + i),
|
||||||
|
_mm_load_si128(bi + i));
|
||||||
|
__m128i eq1 = _mm_cmpeq_epi8(
|
||||||
|
_mm_load_si128(ai + i + 1),
|
||||||
|
_mm_load_si128(bi + i + 1));
|
||||||
|
__m128i eq = _mm_and_si128(eq0, eq1);
|
||||||
|
|
||||||
|
int mask = _mm_movemask_epi8(eq);
|
||||||
|
if (mask != 0xFFFF)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( ; i < sizeof(T) / 16; i++) {
|
||||||
|
__m128i eq = _mm_cmpeq_epi8(
|
||||||
|
_mm_load_si128(ai + i),
|
||||||
|
_mm_load_si128(bi + i));
|
||||||
|
|
||||||
|
int mask = _mm_movemask_epi8(eq);
|
||||||
|
if (mask != 0xFFFF)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
return !std::memcmp(a, b, sizeof(T));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user