diff --git a/src/dxvk/dxvk_graphics_state.h b/src/dxvk/dxvk_graphics_state.h index bd050a5ef..938a8384c 100644 --- a/src/dxvk/dxvk_graphics_state.h +++ b/src/dxvk/dxvk_graphics_state.h @@ -641,11 +641,11 @@ namespace dxvk { } bool operator == (const DxvkGraphicsPipelineStateInfo& other) const { - return !std::memcmp(this, &other, sizeof(*this)); + return bit::bcmpeq(this, &other); } bool operator != (const DxvkGraphicsPipelineStateInfo& other) const { - return std::memcmp(this, &other, sizeof(*this)); + return !bit::bcmpeq(this, &other); } bool useDynamicStencilRef() const { @@ -709,11 +709,11 @@ namespace dxvk { } bool operator == (const DxvkComputePipelineStateInfo& other) const { - return !std::memcmp(this, &other, sizeof(*this)); + return bit::bcmpeq(this, &other); } bool operator != (const DxvkComputePipelineStateInfo& other) const { - return std::memcmp(this, &other, sizeof(*this)); + return !bit::bcmpeq(this, &other); } DxvkBindingMask bsBindingMask; diff --git a/src/util/util_bit.h b/src/util/util_bit.h index 1bad55c36..b8b2cee82 100644 --- a/src/util/util_bit.h +++ b/src/util/util_bit.h @@ -79,5 +79,57 @@ namespace dxvk::bit { shift += count; return shift > Bits ? shift - Bits : 0; } + + /** + * \brief Compares two aligned structs bit by bit + * + * \param [in] a First struct + * \param [in] b Second struct + * \returns \c true if the structs are equal + */ + template + bool bcmpeq(const T* a, const T* b) { + static_assert(alignof(T) >= 16); + #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) + auto ai = reinterpret_cast(a); + auto bi = reinterpret_cast(b); + + size_t i = 0; + + #if defined(__clang__) + #pragma nounroll + #elif defined(__GNUC__) + #pragma GCC unroll 0 + #endif + + for ( ; i < 2 * (sizeof(T) / 32); i += 2) { + __m128i eq0 = _mm_cmpeq_epi8( + _mm_load_si128(ai + i), + _mm_load_si128(bi + i)); + __m128i eq1 = _mm_cmpeq_epi8( + _mm_load_si128(ai + i + 1), + _mm_load_si128(bi + i + 1)); + __m128i eq = _mm_and_si128(eq0, eq1); + + int mask = _mm_movemask_epi8(eq); + if (mask != 0xFFFF) + return false; + } + + for ( ; i < sizeof(T) / 16; i++) { + __m128i eq = _mm_cmpeq_epi8( + _mm_load_si128(ai + i), + _mm_load_si128(bi + i)); + + int mask = _mm_movemask_epi8(eq); + if (mask != 0xFFFF) + return false; + } + + return true; + #else + return !std::memcmp(a, b, sizeof(T)); + #endif + } }