1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2024-11-29 19:24:10 +01:00

[dxvk] Use memcmp replacement for pipeline state lookup

Measured to be over twice as fast as memcmp on Ryzen for the
512-byte graphics pipeline state struct, achieving two cycles
per iteration.
This commit is contained in:
Philip Rebohle 2019-10-07 17:19:32 +02:00
parent 5cb7be2454
commit a743ba6531
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
2 changed files with 56 additions and 4 deletions

View File

@ -641,11 +641,11 @@ namespace dxvk {
}
bool operator == (const DxvkGraphicsPipelineStateInfo& other) const {
return !std::memcmp(this, &other, sizeof(*this));
return bit::bcmpeq(this, &other);
}
bool operator != (const DxvkGraphicsPipelineStateInfo& other) const {
return std::memcmp(this, &other, sizeof(*this));
return !bit::bcmpeq(this, &other);
}
bool useDynamicStencilRef() const {
@ -709,11 +709,11 @@ namespace dxvk {
}
bool operator == (const DxvkComputePipelineStateInfo& other) const {
return !std::memcmp(this, &other, sizeof(*this));
return bit::bcmpeq(this, &other);
}
bool operator != (const DxvkComputePipelineStateInfo& other) const {
return std::memcmp(this, &other, sizeof(*this));
return !bit::bcmpeq(this, &other);
}
DxvkBindingMask bsBindingMask;

View File

@ -79,5 +79,57 @@ namespace dxvk::bit {
shift += count;
return shift > Bits ? shift - Bits : 0;
}
/**
* \brief Compares two aligned structs bit by bit
*
* \param [in] a First struct
* \param [in] b Second struct
* \returns \c true if the structs are equal
*/
template<typename T>
bool bcmpeq(const T* a, const T* b) {
static_assert(alignof(T) >= 16);
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
auto ai = reinterpret_cast<const __m128i*>(a);
auto bi = reinterpret_cast<const __m128i*>(b);
size_t i = 0;
#if defined(__clang__)
#pragma nounroll
#elif defined(__GNUC__)
#pragma GCC unroll 0
#endif
for ( ; i < 2 * (sizeof(T) / 32); i += 2) {
__m128i eq0 = _mm_cmpeq_epi8(
_mm_load_si128(ai + i),
_mm_load_si128(bi + i));
__m128i eq1 = _mm_cmpeq_epi8(
_mm_load_si128(ai + i + 1),
_mm_load_si128(bi + i + 1));
__m128i eq = _mm_and_si128(eq0, eq1);
int mask = _mm_movemask_epi8(eq);
if (mask != 0xFFFF)
return false;
}
for ( ; i < sizeof(T) / 16; i++) {
__m128i eq = _mm_cmpeq_epi8(
_mm_load_si128(ai + i),
_mm_load_si128(bi + i));
int mask = _mm_movemask_epi8(eq);
if (mask != 0xFFFF)
return false;
}
return true;
#else
return !std::memcmp(a, b, sizeof(T));
#endif
}
}