mirror of
https://github.com/doitsujin/dxvk.git
synced 2024-12-02 01:24:11 +01:00
[dxvk] Use memcmp replacement for pipeline state lookup
Measured to be over twice as fast as memcmp on Ryzen for the 512-byte graphics pipeline state struct, achieving two cycles per iteration.
This commit is contained in:
parent
5cb7be2454
commit
a743ba6531
@ -641,11 +641,11 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
bool operator == (const DxvkGraphicsPipelineStateInfo& other) const {
|
||||
return !std::memcmp(this, &other, sizeof(*this));
|
||||
return bit::bcmpeq(this, &other);
|
||||
}
|
||||
|
||||
bool operator != (const DxvkGraphicsPipelineStateInfo& other) const {
|
||||
return std::memcmp(this, &other, sizeof(*this));
|
||||
return !bit::bcmpeq(this, &other);
|
||||
}
|
||||
|
||||
bool useDynamicStencilRef() const {
|
||||
@ -709,11 +709,11 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
bool operator == (const DxvkComputePipelineStateInfo& other) const {
|
||||
return !std::memcmp(this, &other, sizeof(*this));
|
||||
return bit::bcmpeq(this, &other);
|
||||
}
|
||||
|
||||
bool operator != (const DxvkComputePipelineStateInfo& other) const {
|
||||
return std::memcmp(this, &other, sizeof(*this));
|
||||
return !bit::bcmpeq(this, &other);
|
||||
}
|
||||
|
||||
DxvkBindingMask bsBindingMask;
|
||||
|
@ -80,4 +80,56 @@ namespace dxvk::bit {
|
||||
return shift > Bits ? shift - Bits : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Compares two aligned structs bit by bit
|
||||
*
|
||||
* \param [in] a First struct
|
||||
* \param [in] b Second struct
|
||||
* \returns \c true if the structs are equal
|
||||
*/
|
||||
template<typename T>
|
||||
bool bcmpeq(const T* a, const T* b) {
|
||||
static_assert(alignof(T) >= 16);
|
||||
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
|
||||
auto ai = reinterpret_cast<const __m128i*>(a);
|
||||
auto bi = reinterpret_cast<const __m128i*>(b);
|
||||
|
||||
size_t i = 0;
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma nounroll
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC unroll 0
|
||||
#endif
|
||||
|
||||
for ( ; i < 2 * (sizeof(T) / 32); i += 2) {
|
||||
__m128i eq0 = _mm_cmpeq_epi8(
|
||||
_mm_load_si128(ai + i),
|
||||
_mm_load_si128(bi + i));
|
||||
__m128i eq1 = _mm_cmpeq_epi8(
|
||||
_mm_load_si128(ai + i + 1),
|
||||
_mm_load_si128(bi + i + 1));
|
||||
__m128i eq = _mm_and_si128(eq0, eq1);
|
||||
|
||||
int mask = _mm_movemask_epi8(eq);
|
||||
if (mask != 0xFFFF)
|
||||
return false;
|
||||
}
|
||||
|
||||
for ( ; i < sizeof(T) / 16; i++) {
|
||||
__m128i eq = _mm_cmpeq_epi8(
|
||||
_mm_load_si128(ai + i),
|
||||
_mm_load_si128(bi + i));
|
||||
|
||||
int mask = _mm_movemask_epi8(eq);
|
||||
if (mask != 0xFFFF)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#else
|
||||
return !std::memcmp(a, b, sizeof(T));
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user