mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-01-18 02:52:10 +01:00
[dxbc] Implemented thread group shared memory and barriers
This commit is contained in:
parent
9848f9bdaa
commit
a51439fb29
@ -69,6 +69,9 @@ namespace dxvk {
|
||||
case DxbcInstClass::Atomic:
|
||||
return this->emitAtomic(ins);
|
||||
|
||||
case DxbcInstClass::Barrier:
|
||||
return this->emitBarrier(ins);
|
||||
|
||||
case DxbcInstClass::BufferLoad:
|
||||
return this->emitBufferLoad(ins);
|
||||
|
||||
@ -354,6 +357,39 @@ namespace dxvk {
|
||||
ins.op));
|
||||
}
|
||||
} break;
|
||||
|
||||
case DxbcOperandType::InputThreadId: {
|
||||
m_cs.builtinGlobalInvocationId = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 3, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInGlobalInvocationId,
|
||||
"vThreadId");
|
||||
} break;
|
||||
|
||||
case DxbcOperandType::InputThreadGroupId: {
|
||||
m_cs.builtinWorkgroupId = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 3, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInWorkgroupId,
|
||||
"vThreadGroupId");
|
||||
} break;
|
||||
|
||||
case DxbcOperandType::InputThreadIdInGroup: {
|
||||
m_cs.builtinLocalInvocationId = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 3, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInLocalInvocationId,
|
||||
"vThreadIdInGroup");
|
||||
} break;
|
||||
|
||||
case DxbcOperandType::InputThreadIndexInGroup: {
|
||||
// FIXME this might not be supported by Vulkan?
|
||||
m_cs.builtinLocalInvocationIndex = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 1, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInLocalInvocationIndex,
|
||||
"vThreadIndexInGroup");
|
||||
} break;
|
||||
|
||||
default:
|
||||
Logger::err(str::format(
|
||||
@ -734,8 +770,34 @@ namespace dxvk {
|
||||
// dcl_tgsm_structured takes three arguments:
|
||||
// (dst0) The resource register ID
|
||||
// (imm0) Structure stride, in bytes
|
||||
// (imm0) Structure count
|
||||
Logger::err("DxbcCompiler: emitDclThreadGroupSharedMemory not implemented");
|
||||
// (imm1) Structure count
|
||||
const bool isStructured = ins.op == DxbcOpcode::DclThreadGroupSharedMemoryStructured;
|
||||
|
||||
const uint32_t regId = ins.dst[0].idx[0].offset;
|
||||
|
||||
if (regId >= m_gRegs.size())
|
||||
m_gRegs.resize(regId + 1);
|
||||
|
||||
const uint32_t elementStride = isStructured ? ins.imm[0].u32 : 0;
|
||||
const uint32_t elementCount = isStructured ? ins.imm[1].u32 : ins.imm[0].u32;
|
||||
|
||||
DxbcRegisterInfo varInfo;
|
||||
varInfo.type.ctype = DxbcScalarType::Uint32;
|
||||
varInfo.type.ccount = 1;
|
||||
varInfo.type.alength = isStructured
|
||||
? elementCount * elementStride / 4
|
||||
: elementCount;
|
||||
varInfo.sclass = spv::StorageClassWorkgroup;
|
||||
|
||||
m_gRegs[regId].type = isStructured
|
||||
? DxbcResourceType::Structured
|
||||
: DxbcResourceType::Raw;
|
||||
m_gRegs[regId].elementStride = elementStride;
|
||||
m_gRegs[regId].elementCount = elementCount;
|
||||
m_gRegs[regId].varId = emitNewVariable(varInfo);
|
||||
|
||||
m_module.setDebugName(m_gRegs[regId].varId,
|
||||
str::format("g", regId).c_str());
|
||||
}
|
||||
|
||||
|
||||
@ -1459,6 +1521,48 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
void DxbcCompiler::emitBarrier(const DxbcShaderInstruction& ins) {
|
||||
// sync takes no operands. Instead, the synchronization
|
||||
// scope is defined by the operand control bits.
|
||||
const DxbcSyncFlags flags = ins.controls.syncFlags;
|
||||
|
||||
uint32_t executionScope = 0;
|
||||
uint32_t memoryScope = 0;
|
||||
uint32_t memorySemantics = 0;
|
||||
|
||||
if (flags.test(DxbcSyncFlag::ThreadsInGroup))
|
||||
executionScope = spv::ScopeWorkgroup;
|
||||
|
||||
if (flags.test(DxbcSyncFlag::ThreadGroupSharedMemory)) {
|
||||
memoryScope = spv::ScopeWorkgroup;
|
||||
memorySemantics |= spv::MemorySemanticsWorkgroupMemoryMask;
|
||||
}
|
||||
|
||||
if (flags.test(DxbcSyncFlag::UavMemoryGroup)) {
|
||||
memoryScope = spv::ScopeWorkgroup;
|
||||
memorySemantics |= spv::MemorySemanticsUniformMemoryMask;
|
||||
}
|
||||
|
||||
if (flags.test(DxbcSyncFlag::UavMemoryGlobal)) {
|
||||
memoryScope = spv::ScopeDevice;
|
||||
memorySemantics |= spv::MemorySemanticsUniformMemoryMask;
|
||||
}
|
||||
|
||||
if (executionScope != 0) {
|
||||
m_module.opControlBarrier(
|
||||
m_module.constu32(executionScope),
|
||||
m_module.constu32(memoryScope),
|
||||
m_module.constu32(memorySemantics));
|
||||
} else if (memorySemantics != spv::MemorySemanticsMaskNone) {
|
||||
m_module.opMemoryBarrier(
|
||||
m_module.constu32(memoryScope),
|
||||
m_module.constu32(memorySemantics));
|
||||
} else {
|
||||
Logger::warn("DxbcCompiler: sync instruction has no effect");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DxbcCompiler::emitBufferLoad(const DxbcShaderInstruction& ins) {
|
||||
// ld_raw takes three arguments:
|
||||
// (dst0) Destination register
|
||||
@ -2142,7 +2246,7 @@ namespace dxvk {
|
||||
|
||||
uint32_t dstIndex = 0;
|
||||
|
||||
for (uint32_t i = 0; i < value.type.ccount; i++) {
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
if (writeMask[i])
|
||||
indices[dstIndex++] = swizzle[i];
|
||||
}
|
||||
@ -2500,6 +2604,26 @@ namespace dxvk {
|
||||
case DxbcOperandType::ImmediateConstantBuffer:
|
||||
return emitGetImmConstBufPtr(operand);
|
||||
|
||||
case DxbcOperandType::InputThreadId:
|
||||
return DxbcRegisterPointer {
|
||||
{ DxbcScalarType::Uint32, 3 },
|
||||
m_cs.builtinGlobalInvocationId };
|
||||
|
||||
case DxbcOperandType::InputThreadGroupId:
|
||||
return DxbcRegisterPointer {
|
||||
{ DxbcScalarType::Uint32, 3 },
|
||||
m_cs.builtinWorkgroupId };
|
||||
|
||||
case DxbcOperandType::InputThreadIdInGroup:
|
||||
return DxbcRegisterPointer {
|
||||
{ DxbcScalarType::Uint32, 3 },
|
||||
m_cs.builtinLocalInvocationId };
|
||||
|
||||
case DxbcOperandType::InputThreadIndexInGroup:
|
||||
return DxbcRegisterPointer {
|
||||
{ DxbcScalarType::Uint32, 1 },
|
||||
m_cs.builtinLocalInvocationIndex };
|
||||
|
||||
default:
|
||||
throw DxvkError(str::format(
|
||||
"DxbcCompiler: Unhandled operand type: ",
|
||||
@ -2561,6 +2685,11 @@ namespace dxvk {
|
||||
scalarTypeId, bufferId, elementIndexAdjusted,
|
||||
SpirvImageOperands());
|
||||
|
||||
case DxbcOperandType::ThreadGroupSharedMemory:
|
||||
return m_module.opLoad(scalarTypeId,
|
||||
m_module.opAccessChain(bufferInfo.typeId,
|
||||
bufferInfo.varId, 1, &elementIndexAdjusted));
|
||||
|
||||
default:
|
||||
throw DxvkError("DxbcCompiler: Invalid operand type for strucured/raw load");
|
||||
}
|
||||
@ -2626,6 +2755,13 @@ namespace dxvk {
|
||||
SpirvImageOperands());
|
||||
break;
|
||||
|
||||
case DxbcOperandType::ThreadGroupSharedMemory:
|
||||
m_module.opStore(
|
||||
m_module.opAccessChain(bufferInfo.typeId,
|
||||
bufferInfo.varId, 1, &elementIndexAdjusted),
|
||||
srcComponentId);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw DxvkError("DxbcCompiler: Invalid operand type for strucured/raw store");
|
||||
}
|
||||
@ -3090,46 +3226,6 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
void DxbcCompiler::emitCsInitBuiltins() {
|
||||
m_cs.builtinGlobalInvocationId = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 3, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInGlobalInvocationId,
|
||||
"cs_global_invocation_id");
|
||||
|
||||
m_cs.builtinLocalInvocationId = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 3, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInLocalInvocationId,
|
||||
"cs_local_invocation_id");
|
||||
|
||||
// FIXME Vulkan might not support this? not documented
|
||||
m_cs.builtinLocalInvocationIndex = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 1, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInLocalInvocationIndex,
|
||||
"cs_local_invocation_index");
|
||||
|
||||
m_cs.builtinWorkgroupId = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 3, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInWorkgroupId,
|
||||
"cs_workgroup_id");
|
||||
|
||||
m_cs.builtinWorkgroupSize = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 3, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInWorkgroupSize,
|
||||
"cs_workgroup_size");
|
||||
|
||||
m_cs.builtinWorkgroupCount = emitNewBuiltinVariable({
|
||||
{ DxbcScalarType::Uint32, 3, 0 },
|
||||
spv::StorageClassInput },
|
||||
spv::BuiltInNumWorkgroups,
|
||||
"cs_workgroup_count");
|
||||
}
|
||||
|
||||
|
||||
void DxbcCompiler::emitVsInit() {
|
||||
m_module.enableCapability(spv::CapabilityClipDistance);
|
||||
m_module.enableCapability(spv::CapabilityCullDistance);
|
||||
@ -3247,10 +3343,6 @@ namespace dxvk {
|
||||
|
||||
|
||||
void DxbcCompiler::emitCsInit() {
|
||||
// There are no input or output
|
||||
// variables for compute shaders
|
||||
emitCsInitBuiltins();
|
||||
|
||||
// Main function of the compute shader
|
||||
m_cs.functionId = m_module.allocateId();
|
||||
m_module.setDebugName(m_cs.functionId, "cs_main");
|
||||
@ -3403,9 +3495,16 @@ namespace dxvk {
|
||||
return result;
|
||||
} break;
|
||||
|
||||
// TODO implement
|
||||
// case DxbcOperandType::ThreadGroupSharedMemory: {
|
||||
// } break;
|
||||
case DxbcOperandType::ThreadGroupSharedMemory: {
|
||||
DxbcBufferInfo result;
|
||||
result.type = m_gRegs.at(registerId).type;
|
||||
result.typeId = m_module.defPointerType(
|
||||
getScalarTypeId(DxbcScalarType::Uint32),
|
||||
spv::StorageClassWorkgroup);
|
||||
result.varId = m_gRegs.at(registerId).varId;
|
||||
result.stride = m_gRegs.at(registerId).elementStride;
|
||||
return result;
|
||||
} break;
|
||||
|
||||
default:
|
||||
throw DxvkError(str::format("DxbcCompiler: Invalid operand type for buffer: ", reg.type));
|
||||
|
@ -86,6 +86,14 @@ namespace dxvk {
|
||||
};
|
||||
|
||||
|
||||
struct DxbcGreg {
|
||||
DxbcResourceType type = DxbcResourceType::Raw;
|
||||
uint32_t elementStride = 0;
|
||||
uint32_t elementCount = 0;
|
||||
uint32_t varId = 0;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Vertex shader-specific structure
|
||||
*/
|
||||
@ -133,8 +141,6 @@ namespace dxvk {
|
||||
uint32_t builtinLocalInvocationId = 0;
|
||||
uint32_t builtinLocalInvocationIndex = 0;
|
||||
uint32_t builtinWorkgroupId = 0;
|
||||
uint32_t builtinWorkgroupSize = 0;
|
||||
uint32_t builtinWorkgroupCount = 0;
|
||||
};
|
||||
|
||||
|
||||
@ -226,6 +232,10 @@ namespace dxvk {
|
||||
std::vector<uint32_t> m_rRegs;
|
||||
std::vector<DxbcXreg> m_xRegs;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Thread group shared memory (g#) registers
|
||||
std::vector<DxbcGreg> m_gRegs;
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// v# registers as defined by the shader. The type of each
|
||||
// of these inputs is either float4 or an array of float4.
|
||||
@ -382,6 +392,9 @@ namespace dxvk {
|
||||
void emitAtomic(
|
||||
const DxbcShaderInstruction& ins);
|
||||
|
||||
void emitBarrier(
|
||||
const DxbcShaderInstruction& ins);
|
||||
|
||||
void emitBufferLoad(
|
||||
const DxbcShaderInstruction& ins);
|
||||
|
||||
@ -577,7 +590,6 @@ namespace dxvk {
|
||||
void emitVsInitBuiltins();
|
||||
void emitGsInitBuiltins(uint32_t vertexCount);
|
||||
void emitPsInitBuiltins();
|
||||
void emitCsInitBuiltins();
|
||||
|
||||
/////////////////////////////////
|
||||
// Shader initialization methods
|
||||
|
@ -811,7 +811,7 @@ namespace dxvk {
|
||||
{ DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 },
|
||||
} },
|
||||
/* Sync */
|
||||
{ },
|
||||
{ 0, DxbcInstClass::Barrier },
|
||||
/* DAdd */
|
||||
{ },
|
||||
/* DMax */
|
||||
|
@ -33,6 +33,7 @@ namespace dxvk {
|
||||
ControlFlow, ///< Control flow instructions
|
||||
GeometryEmit, ///< Special geometry shader instructions
|
||||
Atomic, ///< Atomic operations
|
||||
Barrier, ///< Execution or memory barrier
|
||||
BufferLoad, ///< Structured or raw buffer load
|
||||
BufferStore, ///< Structured or raw buffer store
|
||||
TextureQuery, ///< Texture query instruction
|
||||
|
@ -1869,6 +1869,26 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
void SpirvModule::opControlBarrier(
|
||||
uint32_t execution,
|
||||
uint32_t memory,
|
||||
uint32_t semantics) {
|
||||
m_code.putIns (spv::OpControlBarrier, 4);
|
||||
m_code.putWord(execution);
|
||||
m_code.putWord(memory);
|
||||
m_code.putWord(semantics);
|
||||
}
|
||||
|
||||
|
||||
void SpirvModule::opMemoryBarrier(
|
||||
uint32_t memory,
|
||||
uint32_t semantics) {
|
||||
m_code.putIns (spv::OpMemoryBarrier, 3);
|
||||
m_code.putWord(memory);
|
||||
m_code.putWord(semantics);
|
||||
}
|
||||
|
||||
|
||||
void SpirvModule::opLoopMerge(
|
||||
uint32_t mergeBlock,
|
||||
uint32_t continueTarget,
|
||||
|
@ -643,6 +643,15 @@ namespace dxvk {
|
||||
uint32_t reference,
|
||||
const SpirvImageOperands& operands);
|
||||
|
||||
void opControlBarrier(
|
||||
uint32_t execution,
|
||||
uint32_t memory,
|
||||
uint32_t semantics);
|
||||
|
||||
void opMemoryBarrier(
|
||||
uint32_t memory,
|
||||
uint32_t semantics);
|
||||
|
||||
void opLoopMerge(
|
||||
uint32_t mergeBlock,
|
||||
uint32_t continueTarget,
|
||||
|
@ -13,9 +13,21 @@ using namespace dxvk;
|
||||
const std::string g_computeShaderCode =
|
||||
"StructuredBuffer<uint> buf_in : register(t0);\n"
|
||||
"RWStructuredBuffer<uint> buf_out : register(u0);\n"
|
||||
"[numthreads(1,1,1)]\n"
|
||||
"void main() {\n"
|
||||
" buf_out[0] = buf_in[0] * buf_in[1];\n"
|
||||
"groupshared uint tmp[64];\n"
|
||||
"[numthreads(64,1,1)]\n"
|
||||
"void main(uint localId : SV_GroupIndex, uint3 globalId : SV_DispatchThreadID) {\n"
|
||||
" tmp[localId] = buf_in[2 * globalId.x + 0]\n"
|
||||
" + buf_in[2 * globalId.x + 1];\n"
|
||||
" GroupMemoryBarrierWithGroupSync();\n"
|
||||
" uint activeGroups = 32;\n"
|
||||
" while (activeGroups != 0) {\n"
|
||||
" if (localId < activeGroups)\n"
|
||||
" tmp[localId] += tmp[localId + activeGroups];\n"
|
||||
" GroupMemoryBarrierWithGroupSync();\n"
|
||||
" activeGroups >>= 1;\n"
|
||||
" }\n"
|
||||
" if (localId == 0)\n"
|
||||
" buf_out[0] = tmp[0];\n"
|
||||
"}\n";
|
||||
|
||||
int WINAPI WinMain(HINSTANCE hInstance,
|
||||
@ -63,7 +75,7 @@ int WINAPI WinMain(HINSTANCE hInstance,
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::array<uint32_t, 64> srcData;
|
||||
std::array<uint32_t, 128> srcData;
|
||||
for (uint32_t i = 0; i < srcData.size(); i++)
|
||||
srcData[i] = i + 1;
|
||||
|
||||
@ -93,7 +105,7 @@ int WINAPI WinMain(HINSTANCE hInstance,
|
||||
dstBufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
dstBufferDesc.StructureByteStride = sizeof(uint32_t);
|
||||
|
||||
if (FAILED(device->CreateBuffer(&dstBufferDesc, nullptr, &dstBuffer))) {
|
||||
if (FAILED(device->CreateBuffer(&dstBufferDesc, &srcDataInfo, &dstBuffer))) {
|
||||
std::cerr << "Failed to create destination buffer" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user