#include "d3d9_swvp_emu.h" #include "d3d9_device.h" #include "d3d9_vertex_declaration.h" #include "../spirv/spirv_module.h" namespace dxvk { // Doesn't compare everything, only what we use in SWVP. size_t D3D9VertexDeclHash::operator () (const D3D9VertexElements& key) const { DxvkHashState hash; std::hash bytehash; std::hash wordhash; for (auto& element : key) { hash.add(wordhash(element.Stream)); hash.add(wordhash(element.Offset)); hash.add(bytehash(element.Type)); hash.add(bytehash(element.Method)); hash.add(bytehash(element.Usage)); hash.add(bytehash(element.UsageIndex)); } return hash; } bool D3D9VertexDeclEq::operator () (const D3D9VertexElements& a, const D3D9VertexElements& b) const { if (a.size() != b.size()) return false; bool equal = true; for (uint32_t i = 0; i < a.size(); i++) equal &= std::memcmp(&a[i], &b[i], sizeof(a[0])) == 0; return equal; } enum class DecltypeClass { Float, Byte, Short, Dec, Half }; enum DecltypeFlags { Signed = 1, Normalize = 2, ReverseRGB = 4 }; struct Decltype { DecltypeClass Class; uint32_t VectorCount; uint32_t Flags; }; Decltype ClassifyDecltype(D3DDECLTYPE Type) { switch (Type) { case D3DDECLTYPE_FLOAT1: return { DecltypeClass::Float, 1, DecltypeFlags::Signed }; case D3DDECLTYPE_FLOAT2: return { DecltypeClass::Float, 2, DecltypeFlags::Signed }; case D3DDECLTYPE_FLOAT3: return { DecltypeClass::Float, 3, DecltypeFlags::Signed }; case D3DDECLTYPE_FLOAT4: return { DecltypeClass::Float, 4, DecltypeFlags::Signed }; case D3DDECLTYPE_D3DCOLOR: return { DecltypeClass::Byte, 4, DecltypeFlags::Normalize | DecltypeFlags::ReverseRGB }; case D3DDECLTYPE_UBYTE4: return { DecltypeClass::Byte, 4, 0 }; case D3DDECLTYPE_SHORT2: return { DecltypeClass::Short, 2, DecltypeFlags::Signed }; case D3DDECLTYPE_SHORT4: return { DecltypeClass::Short, 4, DecltypeFlags::Signed }; case D3DDECLTYPE_UBYTE4N: return { DecltypeClass::Byte, 4, DecltypeFlags::Normalize }; case D3DDECLTYPE_SHORT2N: return { DecltypeClass::Short, 2, DecltypeFlags::Signed | DecltypeFlags::Normalize }; case D3DDECLTYPE_SHORT4N: return { DecltypeClass::Short, 4, DecltypeFlags::Signed | DecltypeFlags::Normalize }; case D3DDECLTYPE_USHORT2N: return { DecltypeClass::Short, 2, DecltypeFlags::Normalize }; case D3DDECLTYPE_USHORT4N: return { DecltypeClass::Short, 4, DecltypeFlags::Normalize }; case D3DDECLTYPE_UDEC3: return { DecltypeClass::Dec, 3, 0 }; case D3DDECLTYPE_DEC3N: return { DecltypeClass::Dec, 3, DecltypeFlags::Signed | DecltypeFlags::Normalize }; case D3DDECLTYPE_FLOAT16_2: return { DecltypeClass::Half, 2, DecltypeFlags::Signed }; case D3DDECLTYPE_FLOAT16_4: return { DecltypeClass::Half, 4, DecltypeFlags::Signed }; default: return { DecltypeClass::Float, 4, DecltypeFlags::Signed }; } } class D3D9SWVPEmulatorGenerator { public: D3D9SWVPEmulatorGenerator(const std::string& name) : m_module(spvVersion(1, 3)) { m_entryPointId = m_module.allocateId(); m_module.setDebugSource( spv::SourceLanguageUnknown, 0, m_module.addDebugString(name.c_str()), nullptr); m_module.setMemoryModel( spv::AddressingModelLogical, spv::MemoryModelGLSL450); m_module.enableCapability(spv::CapabilityGeometry); m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeInputPoints); m_module.setExecutionMode(m_entryPointId, spv::ExecutionModeOutputPoints); // This has to be > 0 for some reason even though // we will never emit a vertex m_module.setOutputVertices(m_entryPointId, 1); m_module.setInvocations(m_entryPointId, 1); m_module.functionBegin(m_module.defVoidType(), m_entryPointId, m_module.defFunctionType( m_module.defVoidType(), 0, nullptr), spv::FunctionControlMaskNone); m_module.opLabel(m_module.allocateId()); } void compile(const D3D9VertexElements& elements) { uint32_t uint_t = m_module.defIntType(32, false); uint32_t float_t = m_module.defFloatType(32); uint32_t vec4_t = m_module.defVectorType(float_t, 4); uint32_t vec4_singular_array_t = m_module.defArrayType(vec4_t, m_module.constu32(1)); // Setup the buffer uint32_t bufferSlot = getSWVPBufferSlot(); uint32_t arrayType = m_module.defRuntimeArrayTypeUnique(uint_t); m_module.decorateArrayStride(arrayType, sizeof(uint32_t)); uint32_t buffer_t = m_module.defStructTypeUnique(1, &arrayType); m_module.memberDecorateOffset(buffer_t, 0, 0); m_module.decorate(buffer_t, spv::DecorationBufferBlock); uint32_t buffer = m_module.newVar(m_module.defPointerType(buffer_t, spv::StorageClassUniform), spv::StorageClassUniform); m_module.decorateDescriptorSet(buffer, 0); m_module.decorateBinding(buffer, bufferSlot); m_bufferBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; m_bufferBinding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; m_bufferBinding.resourceBinding = bufferSlot; m_bufferBinding.stage = VK_SHADER_STAGE_GEOMETRY_BIT; m_bufferBinding.access = VK_ACCESS_SHADER_WRITE_BIT; m_bufferBinding.uboSet = VK_TRUE; // Load our builtins uint32_t primitiveIdPtr = m_module.newVar(m_module.defPointerType(uint_t, spv::StorageClassInput), spv::StorageClassInput); m_module.decorateBuiltIn(primitiveIdPtr, spv::BuiltInPrimitiveId); uint32_t primitiveId = m_module.opLoad(uint_t, primitiveIdPtr); // The size of any given vertex uint32_t size = 0; for (const auto& element : elements) { if (element.Stream == 0 && element.Type != D3DDECLTYPE_UNUSED) { size = std::max(size, element.Offset + GetDecltypeSize(D3DDECLTYPE(element.Type))); } } uint32_t vertexSize = m_module.constu32(size / sizeof(uint32_t)); //The offset of this vertex from the beginning of the buffer uint32_t thisVertexOffset = m_module.opIMul(uint_t, vertexSize, primitiveId); for (auto& element : elements) { // Load the slot associated with this element DxsoSemantic semantic = { DxsoUsage(element.Usage), element.UsageIndex }; uint32_t elementPtr; uint32_t elementVar; elementPtr = m_module.newVar(m_module.defPointerType(vec4_singular_array_t, spv::StorageClassInput), spv::StorageClassInput); if ((semantic.usage == DxsoUsage::Position || semantic.usage == DxsoUsage::PositionT) && element.UsageIndex == 0) { // Load from builtin m_module.decorateBuiltIn(elementPtr, spv::BuiltInPosition); } else { // Load from slot uint32_t slotIdx = RegisterLinkerSlot(semantic); m_module.decorateLocation(elementPtr, slotIdx); m_inputMask |= 1u << slotIdx; } uint32_t zero = m_module.constu32(0); elementVar = m_module.opAccessChain(m_module.defPointerType(vec4_t, spv::StorageClassInput), elementPtr, 1, &zero); elementVar = m_module.opLoad(vec4_t, elementVar); // The offset of this element from the beginning of any given vertex uint32_t perVertexElementOffset = m_module.constu32(element.Offset / sizeof(uint32_t)); // The offset of this element from the beginning of the buffer for **THIS** vertex uint32_t elementOffset = m_module.opIAdd(uint_t, thisVertexOffset, perVertexElementOffset); // Write to the buffer at the element offset for each part of the vector. Decltype elementInfo = ClassifyDecltype(D3DDECLTYPE(element.Type)); if (elementInfo.Class == DecltypeClass::Dec) { // TODO! Logger::warn("Encountered DEC3/UDEC3N class, ignoring..."); continue; } uint32_t vecn_t = m_module.defVectorType(float_t, elementInfo.VectorCount); uint32_t componentSet; // Modifiers... if (elementInfo.Flags & DecltypeFlags::ReverseRGB) { std::array indices = { 2, 1, 0, 3 }; componentSet = m_module.opVectorShuffle(vecn_t, elementVar, elementVar, elementInfo.VectorCount, indices.data()); } else { std::array indices = { 0, 1, 2, 3 }; componentSet = m_module.opVectorShuffle(vecn_t, elementVar, elementVar, elementInfo.VectorCount, indices.data()); } if (elementInfo.Flags & DecltypeFlags::Normalize) componentSet = m_module.opVectorTimesScalar(vecn_t, componentSet, m_module.constf32(255.0f)); bool isSigned = elementInfo.Flags & DecltypeFlags::Signed; // Convert the component to the correct type/value. switch (elementInfo.Class) { case DecltypeClass::Float: break; // Do nothing! case DecltypeClass::Byte: { m_module.enableCapability(spv::CapabilityInt8); uint32_t type = m_module.defIntType(8, isSigned); type = m_module.defVectorType(type, elementInfo.VectorCount); componentSet = isSigned ? m_module.opConvertFtoS(type, componentSet) : m_module.opConvertFtoU(type, componentSet); break; } case DecltypeClass::Short: { m_module.enableCapability(spv::CapabilityInt16); uint32_t type = m_module.defIntType(16, isSigned); type = m_module.defVectorType(type, elementInfo.VectorCount); componentSet = isSigned ? m_module.opConvertFtoS(type, componentSet) : m_module.opConvertFtoU(type, componentSet); break; } case DecltypeClass::Half: { m_module.enableCapability(spv::CapabilityFloat16); uint32_t type = m_module.defFloatType(16); type = m_module.defVectorType(type, elementInfo.VectorCount); componentSet = m_module.opFConvert(type, componentSet); break; } case DecltypeClass::Dec: { // TODO! break; } } // Bitcast to dwords before we write. uint32_t dwordCount = GetDecltypeSize(D3DDECLTYPE(element.Type)) / sizeof(uint32_t); uint32_t dwordVector = m_module.opBitcast( m_module.defVectorType(uint_t, dwordCount), componentSet); // Finally write each dword to the buffer! for (uint32_t i = 0; i < dwordCount; i++) { std::array bufferIndices = { m_module.constu32(0), elementOffset }; uint32_t writeDest = m_module.opAccessChain(m_module.defPointerType(uint_t, spv::StorageClassUniform), buffer, bufferIndices.size(), bufferIndices.data()); uint32_t currentDword = m_module.opCompositeExtract(uint_t, dwordVector, 1, &i); m_module.opStore(writeDest, currentDword); elementOffset = m_module.opIAdd(uint_t, elementOffset, m_module.constu32(1)); } } } Rc finalize() { m_module.opReturn(); m_module.functionEnd(); m_module.addEntryPoint(m_entryPointId, spv::ExecutionModelGeometry, "main"); m_module.setDebugName(m_entryPointId, "main"); DxvkShaderCreateInfo info; info.stage = VK_SHADER_STAGE_GEOMETRY_BIT; info.bindingCount = 1; info.bindings = &m_bufferBinding; info.inputMask = m_inputMask; return new DxvkShader(info, m_module.compile()); } private: SpirvModule m_module; uint32_t m_entryPointId = 0; uint32_t m_inputMask = 0u; DxvkBindingInfo m_bufferBinding; }; Rc D3D9SWVPEmulator::GetShaderModule(D3D9DeviceEx* pDevice, const D3D9VertexElements& elements) { // Use the shader's unique key for the lookup { std::unique_lock lock(m_mutex); auto entry = m_modules.find(elements); if (entry != m_modules.end()) return entry->second; } Sha1Hash hash = Sha1Hash::compute( elements.data(), elements.size() * sizeof(elements[0])); DxvkShaderKey key = { VK_SHADER_STAGE_GEOMETRY_BIT , hash }; std::string name = str::format("SWVP_", key.toString()); // This shader has not been compiled yet, so we have to create a // new module. This takes a while, so we won't lock the structure. D3D9SWVPEmulatorGenerator generator(name); generator.compile(elements); Rc shader = generator.finalize(); shader->setShaderKey(key); pDevice->GetDXVKDevice()->registerShader(shader); const std::string& dumpPath = pDevice->GetOptions()->shaderDumpPath; if (dumpPath.size() != 0) { std::ofstream dumpStream( str::format(dumpPath, "/", name, ".spv"), std::ios_base::binary | std::ios_base::trunc); shader->dump(dumpStream); } // Insert the new module into the lookup table. If another thread // has compiled the same shader in the meantime, we should return // that object instead and discard the newly created module. { std::unique_lock lock(m_mutex); auto status = m_modules.insert({ elements, shader }); if (!status.second) return status.first->second; } return shader; } }