mirror of
https://github.com/doitsujin/dxvk.git
synced 2024-12-14 18:23:52 +01:00
3592 lines
118 KiB
C++
3592 lines
118 KiB
C++
#include "dxso_compiler.h"
|
|
|
|
#include "dxso_analysis.h"
|
|
|
|
#include "../d3d9/d3d9_caps.h"
|
|
#include "../d3d9/d3d9_constant_set.h"
|
|
#include "../d3d9/d3d9_state.h"
|
|
#include "../d3d9/d3d9_spec_constants.h"
|
|
#include "../d3d9/d3d9_fixed_function.h"
|
|
#include "dxso_util.h"
|
|
|
|
#include "../dxvk/dxvk_spec_const.h"
|
|
|
|
#include <cfloat>
|
|
|
|
namespace dxvk {
|
|
|
|
DxsoCompiler::DxsoCompiler(
|
|
const std::string& fileName,
|
|
const DxsoModuleInfo& moduleInfo,
|
|
const DxsoProgramInfo& programInfo,
|
|
const DxsoAnalysisInfo& analysis,
|
|
const D3D9ConstantLayout& layout)
|
|
: m_moduleInfo ( moduleInfo )
|
|
, m_programInfo( programInfo )
|
|
, m_analysis ( &analysis )
|
|
, m_layout ( &layout )
|
|
, m_module ( spvVersion(1, 3) ) {
|
|
// Declare an entry point ID. We'll need it during the
|
|
// initialization phase where the execution mode is set.
|
|
m_entryPointId = m_module.allocateId();
|
|
|
|
// Set the shader name so that we recognize it in renderdoc
|
|
m_module.setDebugSource(
|
|
spv::SourceLanguageUnknown, 0,
|
|
m_module.addDebugString(fileName.c_str()),
|
|
nullptr);
|
|
|
|
// Set the memory model. This is the same for all shaders.
|
|
m_module.setMemoryModel(
|
|
spv::AddressingModelLogical,
|
|
spv::MemoryModelGLSL450);
|
|
|
|
m_usedSamplers = 0;
|
|
m_usedRTs = 0;
|
|
|
|
for (uint32_t i = 0; i < m_rRegs.size(); i++)
|
|
m_rRegs.at(i) = DxsoRegisterPointer{ };
|
|
|
|
for (uint32_t i = 0; i < m_cFloat.size(); i++)
|
|
m_cFloat.at(i) = 0;
|
|
|
|
for (uint32_t i = 0; i < m_cInt.size(); i++)
|
|
m_cInt.at(i) = 0;
|
|
|
|
for (uint32_t i = 0; i < m_cBool.size(); i++)
|
|
m_cBool.at(i) = 0;
|
|
|
|
m_vs.addr = DxsoRegisterPointer{ };
|
|
m_vs.oPos = DxsoRegisterPointer{ };
|
|
m_fog = DxsoRegisterPointer{ };
|
|
m_vs.oPSize = DxsoRegisterPointer{ };
|
|
|
|
for (uint32_t i = 0; i < m_ps.oColor.size(); i++)
|
|
m_ps.oColor.at(i) = DxsoRegisterPointer{ };
|
|
m_ps.oDepth = DxsoRegisterPointer{ };
|
|
m_ps.vFace = DxsoRegisterPointer{ };
|
|
m_ps.vPos = DxsoRegisterPointer{ };
|
|
|
|
m_loopCounter = DxsoRegisterPointer{ };
|
|
|
|
this->emitInit();
|
|
}
|
|
|
|
|
|
void DxsoCompiler::processInstruction(
|
|
const DxsoInstructionContext& ctx,
|
|
uint32_t currentCoissueIdx) {
|
|
const DxsoOpcode opcode = ctx.instruction.opcode;
|
|
|
|
for (const auto& coissue : m_analysis->coissues) {
|
|
if (coissue.instructionIdx == ctx.instructionIdx &&
|
|
coissue.instructionIdx != currentCoissueIdx)
|
|
return;
|
|
|
|
if (coissue.instructionIdx == ctx.instructionIdx + 1)
|
|
processInstruction(coissue, coissue.instructionIdx);
|
|
}
|
|
|
|
switch (opcode) {
|
|
case DxsoOpcode::Nop:
|
|
return;
|
|
|
|
case DxsoOpcode::Dcl:
|
|
return this->emitDcl(ctx);
|
|
|
|
case DxsoOpcode::Def:
|
|
case DxsoOpcode::DefI:
|
|
case DxsoOpcode::DefB:
|
|
return this->emitDef(ctx);
|
|
|
|
case DxsoOpcode::Mov:
|
|
case DxsoOpcode::Mova:
|
|
return this->emitMov(ctx);
|
|
|
|
case DxsoOpcode::Add:
|
|
case DxsoOpcode::Sub:
|
|
case DxsoOpcode::Mad:
|
|
case DxsoOpcode::Mul:
|
|
case DxsoOpcode::Rcp:
|
|
case DxsoOpcode::Rsq:
|
|
case DxsoOpcode::Dp3:
|
|
case DxsoOpcode::Dp4:
|
|
case DxsoOpcode::Slt:
|
|
case DxsoOpcode::Sge:
|
|
case DxsoOpcode::Min:
|
|
case DxsoOpcode::ExpP:
|
|
case DxsoOpcode::Exp:
|
|
case DxsoOpcode::Max:
|
|
case DxsoOpcode::Pow:
|
|
case DxsoOpcode::Crs:
|
|
case DxsoOpcode::Abs:
|
|
case DxsoOpcode::Nrm:
|
|
case DxsoOpcode::SinCos:
|
|
case DxsoOpcode::Lit:
|
|
case DxsoOpcode::Dst:
|
|
case DxsoOpcode::LogP:
|
|
case DxsoOpcode::Log:
|
|
case DxsoOpcode::Lrp:
|
|
case DxsoOpcode::Frc:
|
|
case DxsoOpcode::Cmp:
|
|
case DxsoOpcode::Cnd:
|
|
case DxsoOpcode::Dp2Add:
|
|
case DxsoOpcode::DsX:
|
|
case DxsoOpcode::DsY:
|
|
return this->emitVectorAlu(ctx);
|
|
|
|
case DxsoOpcode::SetP:
|
|
return this->emitPredicateOp(ctx);
|
|
|
|
case DxsoOpcode::M3x2:
|
|
case DxsoOpcode::M3x3:
|
|
case DxsoOpcode::M3x4:
|
|
case DxsoOpcode::M4x3:
|
|
case DxsoOpcode::M4x4:
|
|
return this->emitMatrixAlu(ctx);
|
|
|
|
case DxsoOpcode::Loop:
|
|
return this->emitControlFlowLoop(ctx);
|
|
case DxsoOpcode::EndLoop:
|
|
return this->emitControlFlowEndLoop(ctx);
|
|
|
|
case DxsoOpcode::Rep:
|
|
return this->emitControlFlowRep(ctx);
|
|
case DxsoOpcode::EndRep:
|
|
return this->emitControlFlowEndRep(ctx);
|
|
|
|
case DxsoOpcode::Break:
|
|
return this->emitControlFlowBreak(ctx);
|
|
case DxsoOpcode::BreakC:
|
|
return this->emitControlFlowBreakC(ctx);
|
|
|
|
case DxsoOpcode::If:
|
|
case DxsoOpcode::Ifc:
|
|
return this->emitControlFlowIf(ctx);
|
|
case DxsoOpcode::Else:
|
|
return this->emitControlFlowElse(ctx);
|
|
case DxsoOpcode::EndIf:
|
|
return this->emitControlFlowEndIf(ctx);
|
|
|
|
case DxsoOpcode::TexCoord:
|
|
return this->emitTexCoord(ctx);
|
|
|
|
case DxsoOpcode::Tex:
|
|
case DxsoOpcode::TexLdl:
|
|
case DxsoOpcode::TexLdd:
|
|
case DxsoOpcode::TexDp3Tex:
|
|
case DxsoOpcode::TexReg2Ar:
|
|
case DxsoOpcode::TexReg2Gb:
|
|
case DxsoOpcode::TexReg2Rgb:
|
|
case DxsoOpcode::TexBem:
|
|
case DxsoOpcode::TexM3x2Tex:
|
|
case DxsoOpcode::TexM3x3Tex:
|
|
case DxsoOpcode::TexM3x3Spec:
|
|
case DxsoOpcode::TexM3x3VSpec:
|
|
return this->emitTextureSample(ctx);
|
|
case DxsoOpcode::TexKill:
|
|
return this->emitTextureKill(ctx);
|
|
|
|
case DxsoOpcode::TexM3x3Pad:
|
|
case DxsoOpcode::TexM3x2Pad:
|
|
// We don't need to do anything here, these are just padding instructions
|
|
break;
|
|
|
|
case DxsoOpcode::End:
|
|
case DxsoOpcode::Comment:
|
|
break;
|
|
|
|
default:
|
|
Logger::warn(str::format("DxsoCompiler::processInstruction: unhandled opcode: ", opcode));
|
|
break;
|
|
}
|
|
}
|
|
|
|
void DxsoCompiler::finalize() {
|
|
if (m_programInfo.type() == DxsoProgramTypes::VertexShader)
|
|
this->emitVsFinalize();
|
|
else
|
|
this->emitPsFinalize();
|
|
|
|
// Declare the entry point, we now have all the
|
|
// information we need, including the interfaces
|
|
m_module.addEntryPoint(m_entryPointId,
|
|
m_programInfo.executionModel(), "main",
|
|
m_entryPointInterfaces.size(),
|
|
m_entryPointInterfaces.data());
|
|
m_module.setDebugName(m_entryPointId, "main");
|
|
}
|
|
|
|
|
|
DxsoPermutations DxsoCompiler::compile() {
|
|
DxsoPermutations permutations = { };
|
|
|
|
// Create the shader module object
|
|
permutations[D3D9ShaderPermutations::None] = compileShader();
|
|
|
|
// If we need to add more permuations, might be worth making a copy of module
|
|
// before we do anything more. :-)
|
|
if (m_programInfo.type() == DxsoProgramType::PixelShader) {
|
|
if (m_ps.diffuseColorIn)
|
|
m_module.decorate(m_ps.diffuseColorIn, spv::DecorationFlat);
|
|
|
|
if (m_ps.specularColorIn)
|
|
m_module.decorate(m_ps.specularColorIn, spv::DecorationFlat);
|
|
|
|
permutations[D3D9ShaderPermutations::FlatShade] = compileShader();
|
|
}
|
|
|
|
return permutations;
|
|
}
|
|
|
|
|
|
Rc<DxvkShader> DxsoCompiler::compileShader() {
|
|
DxvkShaderOptions shaderOptions = { };
|
|
DxvkShaderConstData constData = { };
|
|
|
|
return new DxvkShader(
|
|
m_programInfo.shaderStage(),
|
|
m_resourceSlots.size(),
|
|
m_resourceSlots.data(),
|
|
m_interfaceSlots,
|
|
m_module.compile(),
|
|
shaderOptions,
|
|
std::move(constData));
|
|
}
|
|
|
|
void DxsoCompiler::emitInit() {
|
|
// Set up common capabilities for all shaders
|
|
m_module.enableCapability(spv::CapabilityShader);
|
|
m_module.enableCapability(spv::CapabilityImageQuery);
|
|
|
|
this->emitDclConstantBuffer();
|
|
this->emitDclInputArray();
|
|
|
|
// Initialize the shader module with capabilities
|
|
// etc. Each shader type has its own peculiarities.
|
|
switch (m_programInfo.type()) {
|
|
case DxsoProgramTypes::VertexShader: return this->emitVsInit();
|
|
case DxsoProgramTypes::PixelShader: return this->emitPsInit();
|
|
default: break;
|
|
}
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitDclConstantBuffer() {
|
|
std::array<uint32_t, 3> members = {
|
|
// float f[256 or 224 or 8192]
|
|
m_module.defArrayTypeUnique(
|
|
getVectorTypeId({ DxsoScalarType::Float32, 4 }),
|
|
m_module.constu32(m_layout->floatCount)),
|
|
|
|
// int i[16 or 2048]
|
|
m_module.defArrayTypeUnique(
|
|
getVectorTypeId({ DxsoScalarType::Sint32, 4 }),
|
|
m_module.constu32(m_layout->intCount)),
|
|
|
|
// uint32_t boolBitmask
|
|
// or uvec4 boolBitmask[512]
|
|
// Defined later...
|
|
0
|
|
};
|
|
|
|
// Decorate array strides, this is required.
|
|
m_module.decorateArrayStride(members[0], 16);
|
|
m_module.decorateArrayStride(members[1], 16);
|
|
|
|
if (m_layout->bitmaskCount == 1) {
|
|
members[2] = getScalarTypeId(DxsoScalarType::Uint32);
|
|
}
|
|
else {
|
|
// Must be a multiple of 4 otherwise.
|
|
members[2] = m_module.defArrayTypeUnique(
|
|
getVectorTypeId({ DxsoScalarType::Uint32, 4 }),
|
|
m_module.constu32(m_layout->bitmaskCount / 4));
|
|
|
|
m_module.decorateArrayStride(members[2], 16);
|
|
}
|
|
|
|
const uint32_t structType =
|
|
m_module.defStructType(members.size(), members.data());
|
|
|
|
m_module.decorateBlock(structType);
|
|
|
|
m_module.memberDecorateOffset(structType, 0, m_layout->floatOffset());
|
|
m_module.memberDecorateOffset(structType, 1, m_layout->intOffset());
|
|
m_module.memberDecorateOffset(structType, 2, m_layout->bitmaskOffset());
|
|
|
|
m_module.setDebugName(structType, "cbuffer_t");
|
|
m_module.setDebugMemberName(structType, 0, "f");
|
|
m_module.setDebugMemberName(structType, 1, "i");
|
|
m_module.setDebugMemberName(structType, 2, "b");
|
|
|
|
m_cBuffer = m_module.newVar(
|
|
m_module.defPointerType(structType, spv::StorageClassUniform),
|
|
spv::StorageClassUniform);
|
|
|
|
m_module.setDebugName(m_cBuffer, "c");
|
|
|
|
const uint32_t bindingId = computeResourceSlotId(
|
|
m_programInfo.type(), DxsoBindingType::ConstantBuffer,
|
|
0);
|
|
|
|
m_module.decorateDescriptorSet(m_cBuffer, 0);
|
|
m_module.decorateBinding(m_cBuffer, bindingId);
|
|
|
|
DxvkResourceSlot resource;
|
|
resource.slot = bindingId;
|
|
resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
|
|
resource.access = VK_ACCESS_UNIFORM_READ_BIT;
|
|
m_resourceSlots.push_back(resource);
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitDclInputArray() {
|
|
DxsoArrayType info;
|
|
info.ctype = DxsoScalarType::Float32;
|
|
info.ccount = 4;
|
|
info.alength = DxsoMaxInterfaceRegs;
|
|
|
|
uint32_t arrayTypeId = getArrayTypeId(info);
|
|
|
|
// Define the actual variable. Note that this is private
|
|
// because we will copy input registers
|
|
// to the array during the setup phase.
|
|
const uint32_t ptrTypeId = m_module.defPointerType(
|
|
arrayTypeId, spv::StorageClassPrivate);
|
|
|
|
m_vArray = m_module.newVar(
|
|
ptrTypeId, spv::StorageClassPrivate);
|
|
m_module.setDebugName(m_vArray, "v");
|
|
}
|
|
|
|
void DxsoCompiler::emitDclOutputArray() {
|
|
DxsoArrayType info;
|
|
info.ctype = DxsoScalarType::Float32;
|
|
info.ccount = 4;
|
|
info.alength = m_programInfo.type() == DxsoProgramTypes::VertexShader
|
|
? DxsoMaxInterfaceRegs
|
|
: caps::MaxSimultaneousRenderTargets;
|
|
|
|
uint32_t arrayTypeId = getArrayTypeId(info);
|
|
|
|
// Define the actual variable. Note that this is private
|
|
// because we will copy input registers
|
|
// to the array during the setup phase.
|
|
const uint32_t ptrTypeId = m_module.defPointerType(
|
|
arrayTypeId, spv::StorageClassPrivate);
|
|
|
|
m_oArray = m_module.newVar(
|
|
ptrTypeId, spv::StorageClassPrivate);
|
|
m_module.setDebugName(m_oArray, "o");
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitVsInit() {
|
|
m_module.enableCapability(spv::CapabilityClipDistance);
|
|
m_module.enableCapability(spv::CapabilityDrawParameters);
|
|
|
|
m_module.enableExtension("SPV_KHR_shader_draw_parameters");
|
|
|
|
// Only VS needs this, because PS has
|
|
// non-indexable specialized output regs
|
|
this->emitDclOutputArray();
|
|
|
|
// Main function of the vertex shader
|
|
m_vs.functionId = m_module.allocateId();
|
|
m_module.setDebugName(m_vs.functionId, "vs_main");
|
|
|
|
this->setupRenderStateInfo();
|
|
|
|
this->emitFunctionBegin(
|
|
m_vs.functionId,
|
|
m_module.defVoidType(),
|
|
m_module.defFunctionType(
|
|
m_module.defVoidType(), 0, nullptr));
|
|
this->emitFunctionLabel();
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitPsSharedConstants() {
|
|
m_ps.sharedState = GetSharedConstants(m_module);
|
|
|
|
const uint32_t bindingId = computeResourceSlotId(
|
|
m_programInfo.type(), DxsoBindingType::ConstantBuffer,
|
|
PSShared);
|
|
|
|
m_module.decorateDescriptorSet(m_ps.sharedState, 0);
|
|
m_module.decorateBinding(m_ps.sharedState, bindingId);
|
|
|
|
DxvkResourceSlot resource;
|
|
resource.slot = bindingId;
|
|
resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
|
|
resource.access = VK_ACCESS_UNIFORM_READ_BIT;
|
|
m_resourceSlots.push_back(resource);
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitPsInit() {
|
|
m_module.enableCapability(spv::CapabilityDerivativeControl);
|
|
|
|
m_module.setExecutionMode(m_entryPointId,
|
|
spv::ExecutionModeOriginUpperLeft);
|
|
|
|
|
|
// Main function of the pixel shader
|
|
m_ps.functionId = m_module.allocateId();
|
|
m_module.setDebugName(m_ps.functionId, "ps_main");
|
|
|
|
if (m_programInfo.majorVersion() < 2) {
|
|
m_ps.samplerTypeSpec = m_module.specConst32(m_module.defIntType(32, 0), 0);
|
|
m_module.decorateSpecId(m_ps.samplerTypeSpec, getSpecId(D3D9SpecConstantId::SamplerType));
|
|
m_module.setDebugName(m_ps.samplerTypeSpec, "s_sampler_types");
|
|
|
|
m_ps.projectionSpec = m_module.specConst32(m_module.defIntType(32, 0), 0);
|
|
m_module.decorateSpecId(m_ps.projectionSpec, getSpecId(D3D9SpecConstantId::ProjectionType));
|
|
m_module.setDebugName(m_ps.projectionSpec, "s_projections");
|
|
}
|
|
|
|
this->setupRenderStateInfo();
|
|
this->emitPsSharedConstants();
|
|
|
|
this->emitFunctionBegin(
|
|
m_ps.functionId,
|
|
m_module.defVoidType(),
|
|
m_module.defFunctionType(
|
|
m_module.defVoidType(), 0, nullptr));
|
|
this->emitFunctionLabel();
|
|
|
|
// We may have to defer kill operations to the end of
|
|
// the shader in order to keep derivatives correct.
|
|
if (m_analysis->usesKill && m_moduleInfo.options.useDemoteToHelperInvocation) {
|
|
// This extension basically implements D3D-style discard
|
|
m_module.enableExtension("SPV_EXT_demote_to_helper_invocation");
|
|
m_module.enableCapability(spv::CapabilityDemoteToHelperInvocationEXT);
|
|
}
|
|
else if (m_analysis->usesKill && m_analysis->usesDerivatives) {
|
|
m_ps.killState = m_module.newVarInit(
|
|
m_module.defPointerType(m_module.defBoolType(), spv::StorageClassPrivate),
|
|
spv::StorageClassPrivate, m_module.constBool(false));
|
|
|
|
m_module.setDebugName(m_ps.killState, "ps_kill");
|
|
|
|
if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) {
|
|
m_module.enableCapability(spv::CapabilityGroupNonUniform);
|
|
m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
|
|
|
|
DxsoRegisterInfo laneId;
|
|
laneId.type = { DxsoScalarType::Uint32, 1, 0 };
|
|
laneId.sclass = spv::StorageClassInput;
|
|
|
|
m_ps.builtinLaneId = emitNewBuiltinVariable(
|
|
laneId, spv::BuiltInSubgroupLocalInvocationId,
|
|
"fLaneId", 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitFunctionBegin(
|
|
uint32_t entryPoint,
|
|
uint32_t returnType,
|
|
uint32_t funcType) {
|
|
this->emitFunctionEnd();
|
|
|
|
m_module.functionBegin(
|
|
returnType, entryPoint, funcType,
|
|
spv::FunctionControlMaskNone);
|
|
|
|
m_insideFunction = true;
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitFunctionEnd() {
|
|
if (m_insideFunction) {
|
|
m_module.opReturn();
|
|
m_module.functionEnd();
|
|
}
|
|
|
|
m_insideFunction = false;
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::emitFunctionLabel() {
|
|
uint32_t labelId = m_module.allocateId();
|
|
m_module.opLabel(labelId);
|
|
return labelId;
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitMainFunctionBegin() {
|
|
this->emitFunctionBegin(
|
|
m_entryPointId,
|
|
m_module.defVoidType(),
|
|
m_module.defFunctionType(
|
|
m_module.defVoidType(), 0, nullptr));
|
|
m_mainFuncLabel = this->emitFunctionLabel();
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::emitNewVariable(const DxsoRegisterInfo& info) {
|
|
const uint32_t ptrTypeId = this->getPointerTypeId(info);
|
|
return m_module.newVar(ptrTypeId, info.sclass);
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::emitNewVariableDefault(
|
|
const DxsoRegisterInfo& info,
|
|
uint32_t value) {
|
|
const uint32_t ptrTypeId = this->getPointerTypeId(info);
|
|
if (value == 0)
|
|
return m_module.newVar(ptrTypeId, info.sclass);
|
|
else
|
|
return m_module.newVarInit(ptrTypeId, info.sclass, value);
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::emitNewBuiltinVariable(
|
|
const DxsoRegisterInfo& info,
|
|
spv::BuiltIn builtIn,
|
|
const char* name,
|
|
uint32_t value) {
|
|
const uint32_t varId = emitNewVariableDefault(info, value);
|
|
|
|
m_module.setDebugName(varId, name);
|
|
m_module.decorateBuiltIn(varId, builtIn);
|
|
|
|
if (m_programInfo.type() == DxsoProgramTypes::PixelShader
|
|
&& info.type.ctype != DxsoScalarType::Float32
|
|
&& info.type.ctype != DxsoScalarType::Bool
|
|
&& info.sclass == spv::StorageClassInput)
|
|
m_module.decorate(varId, spv::DecorationFlat);
|
|
|
|
m_entryPointInterfaces.push_back(varId);
|
|
return varId;
|
|
}
|
|
|
|
DxsoCfgBlock* DxsoCompiler::cfgFindBlock(
|
|
const std::initializer_list<DxsoCfgBlockType>& types) {
|
|
for (auto cur = m_controlFlowBlocks.rbegin();
|
|
cur != m_controlFlowBlocks.rend(); cur++) {
|
|
for (auto type : types) {
|
|
if (cur->type == type)
|
|
return &(*cur);
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
spv::BuiltIn semanticToBuiltIn(bool input, DxsoSemantic semantic) {
|
|
if (input)
|
|
return spv::BuiltInMax;
|
|
|
|
if (semantic == DxsoSemantic{ DxsoUsage::Position, 0 })
|
|
return spv::BuiltInPosition;
|
|
|
|
if (semantic == DxsoSemantic{ DxsoUsage::PointSize, 0 })
|
|
return spv::BuiltInPointSize;
|
|
|
|
return spv::BuiltInMax;
|
|
}
|
|
|
|
void DxsoCompiler::emitDclInterface(
|
|
bool input,
|
|
uint32_t regNumber,
|
|
DxsoSemantic semantic,
|
|
DxsoRegMask mask,
|
|
bool centroid) {
|
|
auto& sgn = input
|
|
? m_isgn : m_osgn;
|
|
|
|
const bool pixel = m_programInfo.type() == DxsoProgramTypes::PixelShader;
|
|
const bool vertex = !pixel;
|
|
|
|
uint32_t slot = 0;
|
|
|
|
uint32_t& slots = input
|
|
? m_interfaceSlots.inputSlots
|
|
: m_interfaceSlots.outputSlots;
|
|
|
|
uint16_t& explicits = input
|
|
? m_explicitInputs
|
|
: m_explicitOutputs;
|
|
|
|
// Some things we consider builtins could be packed in an output reg.
|
|
bool builtin = semanticToBuiltIn(input, semantic) != spv::BuiltInMax;
|
|
|
|
uint32_t i = sgn.elemCount++;
|
|
|
|
if (input && vertex) {
|
|
// Any slot will do! Let's chose the next one
|
|
slot = i;
|
|
}
|
|
else if ( (!input && vertex)
|
|
|| (input && pixel ) ) {
|
|
// Don't register the slot if it belongs to a builtin
|
|
if (!builtin)
|
|
slot = RegisterLinkerSlot(semantic);
|
|
}
|
|
else { //if (!input && pixel)
|
|
// We want to make the output slot the same as the
|
|
// output register for pixel shaders so they go to
|
|
// the right render target.
|
|
slot = regNumber;
|
|
}
|
|
|
|
// Don't want to mark down any of these builtins.
|
|
if (!builtin)
|
|
slots |= 1u << slot;
|
|
explicits |= 1u << regNumber;
|
|
|
|
auto& elem = sgn.elems[i];
|
|
elem.slot = slot;
|
|
elem.regNumber = regNumber;
|
|
elem.semantic = semantic;
|
|
elem.mask = mask;
|
|
elem.centroid = centroid;
|
|
}
|
|
|
|
void DxsoCompiler::emitDclSampler(
|
|
uint32_t idx,
|
|
DxsoTextureType type) {
|
|
m_usedSamplers |= (1u << idx);
|
|
|
|
auto DclSampler = [this](
|
|
uint32_t idx,
|
|
DxsoSamplerType type,
|
|
bool depth,
|
|
bool implicit) {
|
|
// Setup our combines sampler.
|
|
DxsoSamplerInfo& sampler = !depth
|
|
? m_samplers[idx].color[type]
|
|
: m_samplers[idx].depth[type];
|
|
|
|
spv::Dim dimensionality;
|
|
VkImageViewType viewType;
|
|
|
|
const char* suffix = "_2d";
|
|
|
|
switch (type) {
|
|
default:
|
|
case SamplerTypeTexture2D:
|
|
sampler.dimensions = 2;
|
|
dimensionality = spv::Dim2D;
|
|
viewType = VK_IMAGE_VIEW_TYPE_2D;
|
|
break;
|
|
|
|
case SamplerTypeTextureCube:
|
|
suffix = "_cube";
|
|
sampler.dimensions = 3;
|
|
dimensionality = spv::DimCube;
|
|
viewType = VK_IMAGE_VIEW_TYPE_CUBE;
|
|
break;
|
|
|
|
case SamplerTypeTexture3D:
|
|
suffix = "_3d";
|
|
sampler.dimensions = 3;
|
|
dimensionality = spv::Dim3D;
|
|
viewType = VK_IMAGE_VIEW_TYPE_3D;
|
|
break;
|
|
}
|
|
|
|
sampler.typeId = m_module.defImageType(
|
|
m_module.defFloatType(32),
|
|
dimensionality, depth ? 1 : 0, 0, 0, 1,
|
|
spv::ImageFormatUnknown);
|
|
|
|
sampler.typeId = m_module.defSampledImageType(sampler.typeId);
|
|
|
|
sampler.varId = m_module.newVar(
|
|
m_module.defPointerType(
|
|
sampler.typeId, spv::StorageClassUniformConstant),
|
|
spv::StorageClassUniformConstant);
|
|
|
|
std::string name = str::format("s", idx, suffix, depth ? "_shadow" : "");
|
|
m_module.setDebugName(sampler.varId, name.c_str());
|
|
|
|
const uint32_t bindingId = computeResourceSlotId(m_programInfo.type(),
|
|
!depth ? DxsoBindingType::ColorImage : DxsoBindingType::DepthImage,
|
|
idx);
|
|
|
|
m_module.decorateDescriptorSet(sampler.varId, 0);
|
|
m_module.decorateBinding (sampler.varId, bindingId);
|
|
|
|
// Store descriptor info for the shader interface
|
|
DxvkResourceSlot resource;
|
|
resource.slot = bindingId;
|
|
resource.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
resource.view = implicit ? VK_IMAGE_VIEW_TYPE_MAX_ENUM : viewType;
|
|
resource.access = VK_ACCESS_SHADER_READ_BIT;
|
|
m_resourceSlots.push_back(resource);
|
|
};
|
|
|
|
if (m_programInfo.majorVersion() >= 2) {
|
|
DxsoSamplerType samplerType =
|
|
SamplerTypeFromTextureType(type);
|
|
|
|
DclSampler(idx, samplerType, false, false);
|
|
|
|
if (samplerType != SamplerTypeTexture3D) {
|
|
// We could also be depth compared!
|
|
DclSampler(idx, samplerType, true, false);
|
|
}
|
|
}
|
|
else {
|
|
// Could be any of these!
|
|
// We will check with the spec constant at sample time.
|
|
for (uint32_t i = 0; i < SamplerTypeCount; i++) {
|
|
auto samplerType = static_cast<DxsoSamplerType>(i);
|
|
|
|
DclSampler(idx, samplerType, false, true);
|
|
|
|
if (samplerType != SamplerTypeTexture3D)
|
|
DclSampler(idx, samplerType, true, true);
|
|
}
|
|
}
|
|
|
|
|
|
// Declare a specialization constant which will
|
|
// store whether or not the depth view is bound.
|
|
const uint32_t depthBinding = computeResourceSlotId(m_programInfo.type(),
|
|
DxsoBindingType::DepthImage, idx);
|
|
|
|
DxsoSampler& sampler = m_samplers[idx];
|
|
|
|
sampler.depthSpecConst = m_module.specConstBool(true);
|
|
sampler.type = type;
|
|
m_module.decorateSpecId(sampler.depthSpecConst, depthBinding);
|
|
m_module.setDebugName(sampler.depthSpecConst,
|
|
str::format("s", idx, "_useShadow").c_str());
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::emitArrayIndex(
|
|
uint32_t idx,
|
|
const DxsoBaseRegister* relative) {
|
|
uint32_t result = m_module.consti32(idx);
|
|
|
|
if (relative != nullptr) {
|
|
DxsoRegisterValue offset = emitRegisterLoad(*relative, DxsoRegMask(true, false, false, false), nullptr);
|
|
|
|
result = m_module.opIAdd(
|
|
getVectorTypeId(offset.type),
|
|
result, offset.id);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
DxsoRegisterPointer DxsoCompiler::emitInputPtr(
|
|
bool texture,
|
|
const DxsoBaseRegister& reg,
|
|
const DxsoBaseRegister* relative) {
|
|
uint32_t idx = reg.id.num;
|
|
|
|
// Account for the two color regs.
|
|
if (texture)
|
|
idx += 2;
|
|
|
|
DxsoRegisterPointer input;
|
|
|
|
input.type = DxsoVectorType{ DxsoScalarType::Float32, 4 };
|
|
|
|
uint32_t index = this->emitArrayIndex(idx, relative);
|
|
|
|
const uint32_t typeId = getVectorTypeId(input.type);
|
|
input.id = m_module.opAccessChain(
|
|
m_module.defPointerType(typeId, spv::StorageClassPrivate),
|
|
m_vArray,
|
|
1, &index);
|
|
|
|
return input;
|
|
}
|
|
|
|
DxsoRegisterPointer DxsoCompiler::emitRegisterPtr(
|
|
const char* name,
|
|
DxsoScalarType ctype,
|
|
uint32_t ccount,
|
|
uint32_t defaultVal,
|
|
spv::StorageClass storageClass,
|
|
spv::BuiltIn builtIn) {
|
|
DxsoRegisterPointer result;
|
|
|
|
DxsoRegisterInfo info;
|
|
info.type.ctype = ctype;
|
|
info.type.ccount = ccount;
|
|
info.type.alength = 1;
|
|
info.sclass = storageClass;
|
|
|
|
result.type = DxsoVectorType{ ctype, ccount };
|
|
if (builtIn == spv::BuiltInMax) {
|
|
result.id = this->emitNewVariableDefault(info, defaultVal);
|
|
m_module.setDebugName(result.id, name);
|
|
}
|
|
else {
|
|
result.id = this->emitNewBuiltinVariable(
|
|
info, builtIn, name, defaultVal);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitLoadConstant(
|
|
const DxsoBaseRegister& reg,
|
|
const DxsoBaseRegister* relative) {
|
|
// struct cBuffer_t {
|
|
//
|
|
// Type Member Index
|
|
//
|
|
// float f[256 or 224]; 0
|
|
// int32_t i[16]; 1
|
|
// uint32_t boolBitmask; 2
|
|
// }
|
|
DxsoRegisterValue result = { };
|
|
|
|
switch (reg.id.type) {
|
|
case DxsoRegisterType::Const:
|
|
result.type = { DxsoScalarType::Float32, 4 };
|
|
|
|
if (!relative) {
|
|
result.id = m_cFloat.at(reg.id.num);
|
|
m_meta.maxConstIndexF = std::max(m_meta.maxConstIndexF, reg.id.num + 1);
|
|
m_meta.maxConstIndexF = std::min(m_meta.maxConstIndexF, m_layout->floatCount);
|
|
} else {
|
|
m_meta.maxConstIndexF = m_layout->floatCount;
|
|
m_meta.needsConstantCopies |= m_moduleInfo.options.strictConstantCopies
|
|
|| m_cFloat.at(reg.id.num) != 0;
|
|
}
|
|
break;
|
|
|
|
case DxsoRegisterType::ConstInt:
|
|
result.type = { DxsoScalarType::Sint32, 4 };
|
|
result.id = m_cInt.at(reg.id.num);
|
|
m_meta.maxConstIndexI = std::max(m_meta.maxConstIndexI, reg.id.num + 1);
|
|
m_meta.maxConstIndexI = std::min(m_meta.maxConstIndexI, m_layout->intCount);
|
|
break;
|
|
|
|
case DxsoRegisterType::ConstBool:
|
|
result.type = { DxsoScalarType::Bool, 1 };
|
|
result.id = m_cBool.at(reg.id.num);
|
|
m_meta.maxConstIndexB = std::max(m_meta.maxConstIndexB, reg.id.num + 1);
|
|
m_meta.maxConstIndexB = std::min(m_meta.maxConstIndexB, m_layout->boolCount);
|
|
break;
|
|
|
|
default: break;
|
|
}
|
|
|
|
if (result.id)
|
|
return result;
|
|
|
|
uint32_t relativeIdx = this->emitArrayIndex(reg.id.num, relative);
|
|
|
|
if (reg.id.type != DxsoRegisterType::ConstBool) {
|
|
uint32_t structIdx = reg.id.type == DxsoRegisterType::Const
|
|
? m_module.constu32(0)
|
|
: m_module.constu32(1);
|
|
|
|
std::array<uint32_t, 2> indices = { structIdx, relativeIdx };
|
|
|
|
uint32_t typeId = getVectorTypeId(result.type);
|
|
uint32_t ptrId = m_module.opAccessChain(
|
|
m_module.defPointerType(typeId, spv::StorageClassUniform),
|
|
m_cBuffer, indices.size(), indices.data());
|
|
|
|
result.id = m_module.opLoad(typeId, ptrId);
|
|
|
|
if (relative) {
|
|
uint32_t constCount = m_module.constu32(m_layout->floatCount);
|
|
|
|
// Expand condition to bvec4 since the result has four components
|
|
uint32_t cond = m_module.opULessThan(m_module.defBoolType(), relativeIdx, constCount);
|
|
std::array<uint32_t, 4> condIds = { cond, cond, cond, cond };
|
|
|
|
cond = m_module.opCompositeConstruct(
|
|
m_module.defVectorType(m_module.defBoolType(), 4),
|
|
condIds.size(), condIds.data());
|
|
|
|
result.id = m_module.opSelect(typeId, cond, result.id,
|
|
m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f));
|
|
}
|
|
} else {
|
|
// Bool constants have no relative indexing, so we can do the bitfield
|
|
// magic for SWVP at compile time.
|
|
|
|
uint32_t uintType = getScalarTypeId(DxsoScalarType::Uint32);
|
|
uint32_t uvec4Type = getVectorTypeId({ DxsoScalarType::Uint32, 4 });
|
|
|
|
std::array<uint32_t, 2> indices = { m_module.constu32(2), m_module.constu32(reg.id.num / 128) };
|
|
|
|
uint32_t indexCount = m_layout->bitmaskCount == 1 ? 1 : 2;
|
|
uint32_t accessType = m_layout->bitmaskCount == 1 ? uintType : uvec4Type;
|
|
|
|
uint32_t ptrId = m_module.opAccessChain(
|
|
m_module.defPointerType(accessType, spv::StorageClassUniform),
|
|
m_cBuffer, indexCount, indices.data());
|
|
|
|
uint32_t bitIdx = m_module.consti32(reg.id.num % 32);
|
|
|
|
uint32_t bitfield = m_module.opLoad(accessType, ptrId);
|
|
if (m_layout->bitmaskCount != 1) {
|
|
uint32_t index = (reg.id.num % 128) / 32;
|
|
bitfield = m_module.opCompositeExtract(uintType, bitfield, 1, &index);
|
|
}
|
|
uint32_t bit = m_module.opBitFieldUExtract(
|
|
uintType, bitfield, bitIdx, m_module.consti32(1));
|
|
|
|
result.id = m_module.opINotEqual(
|
|
getVectorTypeId(result.type),
|
|
bit, m_module.constu32(0));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
DxsoRegisterPointer DxsoCompiler::emitOutputPtr(
|
|
bool texcrdOut,
|
|
const DxsoBaseRegister& reg,
|
|
const DxsoBaseRegister* relative) {
|
|
uint32_t idx = reg.id.num;
|
|
|
|
// Account for the two color regs.
|
|
if (texcrdOut)
|
|
idx += 2;
|
|
|
|
DxsoRegisterPointer input;
|
|
|
|
input.type = DxsoVectorType{ DxsoScalarType::Float32, 4 };
|
|
|
|
uint32_t index = this->emitArrayIndex(idx, relative);
|
|
|
|
const uint32_t typeId = getVectorTypeId(input.type);
|
|
input.id = m_module.opAccessChain(
|
|
m_module.defPointerType(typeId, spv::StorageClassPrivate),
|
|
m_oArray,
|
|
1, &index);
|
|
|
|
return input;
|
|
}
|
|
|
|
|
|
DxsoRegisterPointer DxsoCompiler::emitGetOperandPtr(
|
|
const DxsoBaseRegister& reg,
|
|
const DxsoBaseRegister* relative) {
|
|
switch (reg.id.type) {
|
|
case DxsoRegisterType::Temp: {
|
|
DxsoRegisterPointer& ptr = m_rRegs.at(reg.id.num);
|
|
if (ptr.id == 0) {
|
|
std::string name = str::format("r", reg.id.num);
|
|
ptr = this->emitRegisterPtr(
|
|
name.c_str(), DxsoScalarType::Float32, 4,
|
|
m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f));
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
case DxsoRegisterType::Input: {
|
|
if (!(m_explicitInputs & 1u << reg.id.num)) {
|
|
this->emitDclInterface(
|
|
true, reg.id.num,
|
|
DxsoSemantic{ DxsoUsage::Color, reg.id.num },
|
|
IdentityWriteMask, false);
|
|
}
|
|
|
|
return this->emitInputPtr(false, reg, relative);
|
|
}
|
|
|
|
case DxsoRegisterType::PixelTexcoord:
|
|
case DxsoRegisterType::Texture: {
|
|
if (m_programInfo.type() == DxsoProgramTypes::PixelShader) {
|
|
// Texture register
|
|
|
|
// SM2, or SM 1.4
|
|
if (reg.id.type == DxsoRegisterType::PixelTexcoord
|
|
|| m_programInfo.majorVersion() >= 2
|
|
|| (m_programInfo.majorVersion() == 1
|
|
&& m_programInfo.minorVersion() == 4)) {
|
|
uint32_t adjustedNumber = reg.id.num + 2;
|
|
if (!(m_explicitInputs & 1u << adjustedNumber)) {
|
|
this->emitDclInterface(
|
|
true, adjustedNumber,
|
|
DxsoSemantic{ DxsoUsage::Texcoord, reg.id.num },
|
|
IdentityWriteMask, false);
|
|
}
|
|
|
|
return this->emitInputPtr(true, reg, relative);
|
|
}
|
|
else {
|
|
// User must use tex/texcoord to put data in this private register.
|
|
// We use the an oob id which fxc never generates for the texcoord data.
|
|
DxsoRegisterPointer& ptr = m_tRegs.at(reg.id.num);
|
|
if (ptr.id == 0) {
|
|
std::string name = str::format("t", reg.id.num);
|
|
ptr = this->emitRegisterPtr(
|
|
name.c_str(), DxsoScalarType::Float32, 4,
|
|
m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f));
|
|
}
|
|
return ptr;
|
|
}
|
|
}
|
|
else {
|
|
// Address register
|
|
if (m_vs.addr.id == 0) {
|
|
m_vs.addr = this->emitRegisterPtr(
|
|
"a0", DxsoScalarType::Sint32, 4,
|
|
m_module.constvec4i32(0, 0, 0, 0));
|
|
}
|
|
return m_vs.addr;
|
|
}
|
|
}
|
|
|
|
case DxsoRegisterType::RasterizerOut:
|
|
switch (reg.id.num) {
|
|
case RasterOutPosition:
|
|
if (m_vs.oPos.id == 0) {
|
|
m_vs.oPos = this->emitRegisterPtr(
|
|
"oPos", DxsoScalarType::Float32, 4,
|
|
m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f),
|
|
spv::StorageClassOutput, spv::BuiltInPosition);
|
|
}
|
|
return m_vs.oPos;
|
|
|
|
case RasterOutFog:
|
|
if (m_fog.id == 0) {
|
|
bool input = m_programInfo.type() == DxsoProgramType::PixelShader;
|
|
DxsoSemantic semantic = DxsoSemantic{ DxsoUsage::Fog, 0 };
|
|
|
|
uint32_t slot = RegisterLinkerSlot(semantic);
|
|
|
|
uint32_t& slots = input
|
|
? m_interfaceSlots.inputSlots
|
|
: m_interfaceSlots.outputSlots;
|
|
|
|
slots |= 1u << slot;
|
|
|
|
m_fog = this->emitRegisterPtr(
|
|
input ? "vFog" : "oFog",
|
|
DxsoScalarType::Float32, 1,
|
|
input ? 0 : m_module.constf32(1.0f),
|
|
input ? spv::StorageClassInput : spv::StorageClassOutput);
|
|
|
|
m_entryPointInterfaces.push_back(m_fog.id);
|
|
|
|
m_module.decorateLocation(m_fog.id, slot);
|
|
}
|
|
return m_fog;
|
|
|
|
case RasterOutPointSize:
|
|
if (m_vs.oPSize.id == 0) {
|
|
m_vs.oPSize = this->emitRegisterPtr(
|
|
"oPSize", DxsoScalarType::Float32, 1,
|
|
m_module.constf32(0.0f),
|
|
spv::StorageClassOutput, spv::BuiltInPointSize);
|
|
}
|
|
return m_vs.oPSize;
|
|
}
|
|
|
|
case DxsoRegisterType::ColorOut: {
|
|
uint32_t idx = std::min(reg.id.num, 4u);
|
|
|
|
if (m_ps.oColor[idx].id == 0) {
|
|
std::string name = str::format("oC", idx);
|
|
m_ps.oColor[idx] = this->emitRegisterPtr(
|
|
name.c_str(), DxsoScalarType::Float32, 4,
|
|
m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f),
|
|
spv::StorageClassOutput);
|
|
|
|
m_interfaceSlots.outputSlots |= 1u << idx;
|
|
m_module.decorateLocation(m_ps.oColor[idx].id, idx);
|
|
m_module.decorateIndex(m_ps.oColor[idx].id, 0);
|
|
|
|
m_entryPointInterfaces.push_back(m_ps.oColor[idx].id);
|
|
m_usedRTs |= (1u << idx);
|
|
}
|
|
return m_ps.oColor[idx];
|
|
}
|
|
|
|
case DxsoRegisterType::AttributeOut: {
|
|
auto ptr = this->emitOutputPtr(false, reg, nullptr);
|
|
|
|
if (!(m_explicitOutputs & 1u << reg.id.num)) {
|
|
this->emitDclInterface(
|
|
false, reg.id.num,
|
|
DxsoSemantic{ DxsoUsage::Color, reg.id.num },
|
|
IdentityWriteMask, false); // TODO: Do we want to make this centroid?
|
|
|
|
m_module.opStore(ptr.id, m_module.constfReplicant(0, ptr.type.ccount));
|
|
}
|
|
|
|
return ptr;
|
|
}
|
|
|
|
case DxsoRegisterType::Output: {
|
|
bool texcrdOut = m_programInfo.type() == DxsoProgramTypes::VertexShader
|
|
&& m_programInfo.majorVersion() != 3;
|
|
|
|
auto ptr = this->emitOutputPtr(texcrdOut, reg, !texcrdOut ? relative : nullptr);
|
|
|
|
if (texcrdOut) {
|
|
uint32_t adjustedNumber = reg.id.num + 2;
|
|
if (!(m_explicitOutputs & 1u << adjustedNumber)) {
|
|
this->emitDclInterface(
|
|
false, adjustedNumber,
|
|
DxsoSemantic{ DxsoUsage::Texcoord, reg.id.num },
|
|
IdentityWriteMask, false);
|
|
|
|
m_module.opStore(ptr.id, m_module.constfReplicant(0, ptr.type.ccount));
|
|
}
|
|
}
|
|
|
|
return ptr;
|
|
}
|
|
|
|
case DxsoRegisterType::DepthOut:
|
|
if (m_ps.oDepth.id == 0) {
|
|
m_module.setExecutionMode(m_entryPointId,
|
|
spv::ExecutionModeDepthReplacing);
|
|
|
|
m_ps.oDepth = this->emitRegisterPtr(
|
|
"oDepth", DxsoScalarType::Float32, 1,
|
|
m_module.constf32(0.0f),
|
|
spv::StorageClassOutput, spv::BuiltInFragDepth);
|
|
}
|
|
return m_ps.oDepth;
|
|
|
|
case DxsoRegisterType::Loop:
|
|
if (m_loopCounter.id == 0) {
|
|
m_loopCounter = this->emitRegisterPtr(
|
|
"aL", DxsoScalarType::Sint32, 1,
|
|
m_module.consti32(0));
|
|
}
|
|
return m_loopCounter;
|
|
|
|
case DxsoRegisterType::MiscType:
|
|
if (reg.id.num == MiscTypePosition) {
|
|
if (m_ps.vPos.id == 0) {
|
|
DxsoRegisterPointer fragCoord = this->emitRegisterPtr(
|
|
"ps_frag_coord", DxsoScalarType::Float32, 4, 0,
|
|
spv::StorageClassInput, spv::BuiltInFragCoord);
|
|
|
|
DxsoRegisterValue val = this->emitValueLoad(fragCoord);
|
|
val.id = m_module.opFSub(
|
|
getVectorTypeId(val.type), val.id,
|
|
m_module.constvec4f32(0.5f, 0.5f, 0.0f, 0.0f));
|
|
|
|
m_ps.vPos = this->emitRegisterPtr(
|
|
"vPos", DxsoScalarType::Float32, 4, 0);
|
|
|
|
m_module.opStore(m_ps.vPos.id, val.id);
|
|
}
|
|
return m_ps.vPos;
|
|
}
|
|
else { // MiscTypeFace
|
|
if (m_ps.vFace.id == 0) {
|
|
DxsoRegisterPointer faceBool = this->emitRegisterPtr(
|
|
"ps_is_front_face", DxsoScalarType::Bool, 1, 0,
|
|
spv::StorageClassInput, spv::BuiltInFrontFacing);
|
|
|
|
DxsoRegisterValue frontFace = emitValueLoad(faceBool);
|
|
DxsoRegisterValue selectOp = emitRegisterExtend(frontFace, 4);
|
|
|
|
m_ps.vFace = this->emitRegisterPtr(
|
|
"vFace", DxsoScalarType::Float32, 4, 0);
|
|
|
|
m_module.opStore(
|
|
m_ps.vFace.id,
|
|
m_module.opSelect(getVectorTypeId(m_ps.vFace.type), selectOp.id,
|
|
m_module.constvec4f32( 1.0f, 1.0f, 1.0f, 1.0f),
|
|
m_module.constvec4f32(-1.0f, -1.0f, -1.0f, -1.0f)));
|
|
}
|
|
return m_ps.vFace;
|
|
}
|
|
|
|
case DxsoRegisterType::Predicate: {
|
|
DxsoRegisterPointer& ptr = m_pRegs.at(reg.id.num);
|
|
if (ptr.id == 0) {
|
|
std::string name = str::format("p", reg.id.num);
|
|
ptr = this->emitRegisterPtr(
|
|
name.c_str(), DxsoScalarType::Bool, 4,
|
|
m_module.constvec4b32(false, false, false, false));
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
default: {
|
|
//Logger::warn(str::format("emitGetOperandPtr: unhandled reg type: ", reg.id.type));
|
|
|
|
DxsoRegisterPointer nullPointer;
|
|
nullPointer.id = 0;
|
|
return nullPointer;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::emitBoolComparison(DxsoVectorType type, DxsoComparison cmp, uint32_t a, uint32_t b) {
|
|
const uint32_t typeId = getVectorTypeId(type);
|
|
switch (cmp) {
|
|
default:
|
|
case DxsoComparison::Never: return m_module.constbReplicant(false, type.ccount); break;
|
|
case DxsoComparison::GreaterThan: return m_module.opFOrdGreaterThan (typeId, a, b); break;
|
|
case DxsoComparison::Equal: return m_module.opFOrdEqual (typeId, a, b); break;
|
|
case DxsoComparison::GreaterEqual: return m_module.opFOrdGreaterThanEqual(typeId, a, b); break;
|
|
case DxsoComparison::LessThan: return m_module.opFOrdLessThan (typeId, a, b); break;
|
|
case DxsoComparison::NotEqual: return m_module.opFOrdNotEqual (typeId, a, b); break;
|
|
case DxsoComparison::LessEqual: return m_module.opFOrdLessThanEqual (typeId, a, b); break;
|
|
case DxsoComparison::Always: return m_module.constbReplicant(true, type.ccount); break;
|
|
}
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitValueLoad(
|
|
DxsoRegisterPointer ptr) {
|
|
DxsoRegisterValue result;
|
|
result.type = ptr.type;
|
|
result.id = m_module.opLoad(
|
|
getVectorTypeId(result.type),
|
|
ptr.id);
|
|
return result;
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::applyPredicate(DxsoRegisterValue pred, DxsoRegisterValue dst, DxsoRegisterValue src) {
|
|
if (dst.type.ccount != pred.type.ccount) {
|
|
DxsoRegMask mask = DxsoRegMask(
|
|
pred.type.ccount > 0,
|
|
pred.type.ccount > 1,
|
|
pred.type.ccount > 2,
|
|
pred.type.ccount > 3);
|
|
|
|
pred = emitRegisterSwizzle(pred, IdentitySwizzle, mask);
|
|
}
|
|
|
|
dst.id = m_module.opSelect(
|
|
getVectorTypeId(dst.type),
|
|
pred.id,
|
|
src.id, dst.id);
|
|
|
|
return dst;
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitValueStore(
|
|
DxsoRegisterPointer ptr,
|
|
DxsoRegisterValue value,
|
|
DxsoRegMask writeMask,
|
|
DxsoRegisterValue predicate) {
|
|
// If the source value consists of only one component,
|
|
// it is stored in all components of the destination.
|
|
if (value.type.ccount == 1)
|
|
value = emitRegisterExtend(value, writeMask.popCount());
|
|
|
|
if (ptr.type.ccount == writeMask.popCount()) {
|
|
if (predicate.id)
|
|
value = applyPredicate(predicate, emitValueLoad(ptr), value);
|
|
|
|
// Simple case: We write to the entire register
|
|
m_module.opStore(ptr.id, value.id);
|
|
} else {
|
|
// We only write to part of the destination
|
|
// register, so we need to load and modify it
|
|
DxsoRegisterValue tmp = emitValueLoad(ptr);
|
|
tmp = emitRegisterInsert(tmp, value, writeMask);
|
|
|
|
if (predicate.id)
|
|
value = applyPredicate(predicate, emitValueLoad(ptr), tmp);
|
|
|
|
m_module.opStore(ptr.id, tmp.id);
|
|
}
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitClampBoundReplicant(
|
|
DxsoRegisterValue srcValue,
|
|
float lb,
|
|
float ub) {
|
|
srcValue.id = m_module.opFClamp(getVectorTypeId(srcValue.type), srcValue.id,
|
|
m_module.constfReplicant(lb, srcValue.type.ccount),
|
|
m_module.constfReplicant(ub, srcValue.type.ccount));
|
|
|
|
return srcValue;
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitSaturate(
|
|
DxsoRegisterValue srcValue) {
|
|
return emitClampBoundReplicant(srcValue, 0.0f, 1.0f);
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitDot(
|
|
DxsoRegisterValue a,
|
|
DxsoRegisterValue b) {
|
|
DxsoRegisterValue dot;
|
|
dot.type = a.type;
|
|
dot.type.ccount = 1;
|
|
|
|
dot.id = m_module.opDot(getVectorTypeId(dot.type), a.id, b.id);
|
|
|
|
return dot;
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitRegisterInsert(
|
|
DxsoRegisterValue dstValue,
|
|
DxsoRegisterValue srcValue,
|
|
DxsoRegMask srcMask) {
|
|
DxsoRegisterValue result;
|
|
result.type = dstValue.type;
|
|
|
|
const uint32_t typeId = getVectorTypeId(result.type);
|
|
|
|
if (srcMask.popCount() == 0) {
|
|
// Nothing to do if the insertion mask is empty
|
|
result.id = dstValue.id;
|
|
} else if (dstValue.type.ccount == 1) {
|
|
// Both values are scalar, so the first component
|
|
// of the write mask decides which one to take.
|
|
result.id = srcMask[0] ? srcValue.id : dstValue.id;
|
|
} else if (srcValue.type.ccount == 1) {
|
|
// The source value is scalar. Since OpVectorShuffle
|
|
// requires both arguments to be vectors, we have to
|
|
// use OpCompositeInsert to modify the vector instead.
|
|
const uint32_t componentId = srcMask.firstSet();
|
|
|
|
result.id = m_module.opCompositeInsert(typeId,
|
|
srcValue.id, dstValue.id, 1, &componentId);
|
|
} else {
|
|
// Both arguments are vectors. We can determine which
|
|
// components to take from which vector and use the
|
|
// OpVectorShuffle instruction.
|
|
std::array<uint32_t, 4> components;
|
|
uint32_t srcComponentId = dstValue.type.ccount;
|
|
|
|
for (uint32_t i = 0; i < dstValue.type.ccount; i++)
|
|
components.at(i) = srcMask[i] ? srcComponentId++ : i;
|
|
|
|
result.id = m_module.opVectorShuffle(
|
|
typeId, dstValue.id, srcValue.id,
|
|
dstValue.type.ccount, components.data());
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitRegisterLoadRaw(
|
|
const DxsoBaseRegister& reg,
|
|
const DxsoBaseRegister* relative) {
|
|
switch (reg.id.type) {
|
|
case DxsoRegisterType::Const:
|
|
case DxsoRegisterType::ConstInt:
|
|
case DxsoRegisterType::ConstBool:
|
|
return emitLoadConstant(reg, relative);
|
|
|
|
default:
|
|
return emitValueLoad(emitGetOperandPtr(reg, relative));
|
|
}
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitRegisterExtend(
|
|
DxsoRegisterValue value,
|
|
uint32_t size) {
|
|
if (size == 1)
|
|
return value;
|
|
|
|
std::array<uint32_t, 4> ids = {{
|
|
value.id, value.id,
|
|
value.id, value.id,
|
|
}};
|
|
|
|
DxsoRegisterValue result;
|
|
result.type.ctype = value.type.ctype;
|
|
result.type.ccount = size;
|
|
result.id = m_module.opCompositeConstruct(
|
|
getVectorTypeId(result.type),
|
|
size, ids.data());
|
|
return result;
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitRegisterSwizzle(
|
|
DxsoRegisterValue value,
|
|
DxsoRegSwizzle swizzle,
|
|
DxsoRegMask writeMask) {
|
|
if (value.type.ccount == 1)
|
|
return emitRegisterExtend(value, writeMask.popCount());
|
|
|
|
std::array<uint32_t, 4> indices;
|
|
|
|
uint32_t dstIndex = 0;
|
|
|
|
for (uint32_t i = 0; i < 4; i++) {
|
|
if (writeMask[i])
|
|
indices[dstIndex++] = swizzle[i];
|
|
}
|
|
|
|
// If the swizzle combined with the mask can be reduced
|
|
// to a no-op, we don't need to insert any instructions.
|
|
bool isIdentitySwizzle = dstIndex == value.type.ccount;
|
|
|
|
for (uint32_t i = 0; i < dstIndex && isIdentitySwizzle; i++)
|
|
isIdentitySwizzle &= indices[i] == i;
|
|
|
|
if (isIdentitySwizzle)
|
|
return value;
|
|
|
|
// Use OpCompositeExtract if the resulting vector contains
|
|
// only one component, and OpVectorShuffle if it is a vector.
|
|
DxsoRegisterValue result;
|
|
result.type.ctype = value.type.ctype;
|
|
result.type.ccount = dstIndex;
|
|
|
|
const uint32_t typeId = getVectorTypeId(result.type);
|
|
|
|
if (dstIndex == 1) {
|
|
result.id = m_module.opCompositeExtract(
|
|
typeId, value.id, 1, indices.data());
|
|
} else {
|
|
result.id = m_module.opVectorShuffle(
|
|
typeId, value.id, value.id,
|
|
dstIndex, indices.data());
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitSrcOperandPreSwizzleModifiers(
|
|
DxsoRegisterValue value,
|
|
DxsoRegModifier modifier) {
|
|
// r / r.z
|
|
// r / r.w
|
|
if (modifier == DxsoRegModifier::Dz
|
|
|| modifier == DxsoRegModifier::Dw) {
|
|
const uint32_t index = modifier == DxsoRegModifier::Dz ? 2 : 3;
|
|
|
|
std::array<uint32_t, 4> indices = { index, index, index, index };
|
|
|
|
uint32_t component = m_module.opVectorShuffle(
|
|
getVectorTypeId(value.type), value.id, value.id, value.type.ccount, indices.data());
|
|
|
|
value.id = m_module.opFDiv(
|
|
getVectorTypeId(value.type), value.id, component);
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitSrcOperandPostSwizzleModifiers(
|
|
DxsoRegisterValue value,
|
|
DxsoRegModifier modifier) {
|
|
// r - 0.5
|
|
if (modifier == DxsoRegModifier::Bias
|
|
|| modifier == DxsoRegModifier::BiasNeg) {
|
|
uint32_t halfVec = m_module.constfReplicant(
|
|
0.5f, value.type.ccount);
|
|
|
|
value.id = m_module.opFSub(
|
|
getVectorTypeId(value.type), value.id, halfVec);
|
|
}
|
|
|
|
// fma(r, 2.0f, -1.0f)
|
|
if (modifier == DxsoRegModifier::Sign
|
|
|| modifier == DxsoRegModifier::SignNeg) {
|
|
uint32_t twoVec = m_module.constfReplicant(
|
|
2.0f, value.type.ccount);
|
|
|
|
uint32_t minusOneVec = m_module.constfReplicant(
|
|
-1.0f, value.type.ccount);
|
|
|
|
value.id = m_module.opFFma(
|
|
getVectorTypeId(value.type), value.id, twoVec, minusOneVec);
|
|
}
|
|
|
|
// 1 - r
|
|
if (modifier == DxsoRegModifier::Comp) {
|
|
uint32_t oneVec = m_module.constfReplicant(
|
|
1.0f, value.type.ccount);
|
|
|
|
value.id = m_module.opFSub(
|
|
getVectorTypeId(value.type), oneVec, value.id);
|
|
}
|
|
|
|
// r * 2
|
|
if (modifier == DxsoRegModifier::X2
|
|
|| modifier == DxsoRegModifier::X2Neg) {
|
|
uint32_t twoVec = m_module.constfReplicant(
|
|
2.0f, value.type.ccount);
|
|
|
|
value.id = m_module.opFMul(
|
|
getVectorTypeId(value.type), value.id, twoVec);
|
|
}
|
|
|
|
// abs( r )
|
|
if (modifier == DxsoRegModifier::Abs
|
|
|| modifier == DxsoRegModifier::AbsNeg) {
|
|
value.id = m_module.opFAbs(
|
|
getVectorTypeId(value.type), value.id);
|
|
}
|
|
|
|
// !r
|
|
if (modifier == DxsoRegModifier::Not) {
|
|
value.id =
|
|
m_module.opLogicalNot(getVectorTypeId(value.type), value.id);
|
|
}
|
|
|
|
// -r
|
|
// Treating as -r
|
|
// Treating as -r
|
|
// -r * 2
|
|
// -abs(r)
|
|
if (modifier == DxsoRegModifier::Neg
|
|
|| modifier == DxsoRegModifier::BiasNeg
|
|
|| modifier == DxsoRegModifier::SignNeg
|
|
|| modifier == DxsoRegModifier::X2Neg
|
|
|| modifier == DxsoRegModifier::AbsNeg) {
|
|
value.id = m_module.opFNegate(
|
|
getVectorTypeId(value.type), value.id);
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
DxsoRegisterValue DxsoCompiler::emitRegisterLoad(
|
|
const DxsoBaseRegister& reg,
|
|
DxsoRegMask writeMask,
|
|
const DxsoBaseRegister* relative) {
|
|
// Load operand from the operand pointer
|
|
DxsoRegisterValue result = emitRegisterLoadRaw(reg, relative);
|
|
|
|
// PS 1.x clamps float constants
|
|
if (m_programInfo.type() == DxsoProgramType::PixelShader && m_programInfo.majorVersion() == 1
|
|
&& reg.id.type == DxsoRegisterType::Const)
|
|
result = emitClampBoundReplicant(result, -1.0f, 1.0f);
|
|
|
|
// Apply operand modifiers
|
|
result = emitSrcOperandPreSwizzleModifiers(result, reg.modifier);
|
|
|
|
// Apply operand swizzle to the operand value
|
|
result = emitRegisterSwizzle(result, reg.swizzle, writeMask);
|
|
|
|
// Apply operand modifiers
|
|
result = emitSrcOperandPostSwizzleModifiers(result, reg.modifier);
|
|
return result;
|
|
}
|
|
|
|
void DxsoCompiler::emitDcl(const DxsoInstructionContext& ctx) {
|
|
auto id = ctx.dst.id;
|
|
|
|
if (id.type == DxsoRegisterType::Sampler) {
|
|
this->emitDclSampler(
|
|
ctx.dst.id.num,
|
|
ctx.dcl.textureType);
|
|
}
|
|
else if (id.type == DxsoRegisterType::Input
|
|
|| id.type == DxsoRegisterType::Texture
|
|
|| id.type == DxsoRegisterType::Output) {
|
|
DxsoSemantic semantic = ctx.dcl.semantic;
|
|
|
|
uint32_t vIndex = id.num;
|
|
|
|
if (m_programInfo.type() == DxsoProgramTypes::PixelShader) {
|
|
// Semantic in PS < 3 is based upon id.
|
|
if (m_programInfo.majorVersion() < 3) {
|
|
// Account for the two color registers.
|
|
if (id.type == DxsoRegisterType::Texture)
|
|
vIndex += 2;
|
|
|
|
semantic = DxsoSemantic{
|
|
id.type == DxsoRegisterType::Texture ? DxsoUsage::Texcoord : DxsoUsage::Color,
|
|
id.num };
|
|
}
|
|
}
|
|
|
|
this->emitDclInterface(
|
|
id.type != DxsoRegisterType::Output,
|
|
vIndex,
|
|
semantic,
|
|
ctx.dst.mask,
|
|
ctx.dst.centroid);
|
|
}
|
|
else {
|
|
//Logger::warn(str::format("DxsoCompiler::emitDcl: unhandled register type ", id.type));
|
|
}
|
|
}
|
|
|
|
void DxsoCompiler::emitDef(const DxsoInstructionContext& ctx) {
|
|
switch (ctx.instruction.opcode) {
|
|
case DxsoOpcode::Def: emitDefF(ctx); break;
|
|
case DxsoOpcode::DefI: emitDefI(ctx); break;
|
|
case DxsoOpcode::DefB: emitDefB(ctx); break;
|
|
default:
|
|
throw DxvkError("DxsoCompiler::emitDef: Invalid definition opcode");
|
|
break;
|
|
}
|
|
}
|
|
|
|
void DxsoCompiler::emitDefF(const DxsoInstructionContext& ctx) {
|
|
const float* data = ctx.def.float32;
|
|
|
|
uint32_t constId = m_module.constvec4f32(data[0], data[1], data[2], data[3]);
|
|
m_cFloat.at(ctx.dst.id.num) = constId;
|
|
|
|
std::string name = str::format("cF", ctx.dst.id.num, "_def");
|
|
m_module.setDebugName(constId, name.c_str());
|
|
|
|
DxsoDefinedConstant constant;
|
|
constant.uboIdx = ctx.dst.id.num;
|
|
for (uint32_t i = 0; i < 4; i++)
|
|
constant.float32[i] = data[i];
|
|
m_constants.push_back(constant);
|
|
}
|
|
|
|
void DxsoCompiler::emitDefI(const DxsoInstructionContext& ctx) {
|
|
const int32_t* data = ctx.def.int32;
|
|
|
|
uint32_t constId = m_module.constvec4i32(data[0], data[1], data[2], data[3]);
|
|
m_cInt.at(ctx.dst.id.num) = constId;
|
|
|
|
std::string name = str::format("cI", ctx.dst.id.num, "_def");
|
|
m_module.setDebugName(constId, name.c_str());
|
|
}
|
|
|
|
void DxsoCompiler::emitDefB(const DxsoInstructionContext& ctx) {
|
|
const int32_t* data = ctx.def.int32;
|
|
|
|
uint32_t constId = m_module.constBool(data[0] != 0);
|
|
m_cBool.at(ctx.dst.id.num) = constId;
|
|
|
|
std::string name = str::format("cB", ctx.dst.id.num, "_def");
|
|
m_module.setDebugName(constId, name.c_str());
|
|
}
|
|
|
|
|
|
bool DxsoCompiler::isScalarRegister(DxsoRegisterId id) {
|
|
return id == DxsoRegisterId{DxsoRegisterType::DepthOut, 0}
|
|
|| id == DxsoRegisterId{DxsoRegisterType::RasterizerOut, RasterOutPointSize}
|
|
|| id == DxsoRegisterId{DxsoRegisterType::RasterizerOut, RasterOutFog};
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitMov(const DxsoInstructionContext& ctx) {
|
|
DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst);
|
|
|
|
DxsoRegMask mask = ctx.dst.mask;
|
|
|
|
if (isScalarRegister(ctx.dst.id))
|
|
mask = DxsoRegMask(true, false, false, false);
|
|
|
|
DxsoRegisterValue src0 = emitRegisterLoad(ctx.src[0], mask);
|
|
|
|
DxsoRegisterValue result;
|
|
result.type.ctype = dst.type.ctype;
|
|
result.type.ccount = mask.popCount();
|
|
|
|
const uint32_t typeId = getVectorTypeId(result.type);
|
|
|
|
if (dst.type.ctype != src0.type.ctype) {
|
|
// We have Mova for this... but it turns out Mov has the same behaviour in d3d9!
|
|
|
|
// Convert float -> int32_t
|
|
// and vice versa
|
|
if (dst.type.ctype == DxsoScalarType::Sint32) {
|
|
// We need to floor for VS 1.1 and below, the documentation is a dirty stinking liar.
|
|
if (m_programInfo.majorVersion() < 2 && m_programInfo.minorVersion() < 2)
|
|
result.id = m_module.opFloor(getVectorTypeId(src0.type), src0.id);
|
|
else
|
|
result.id = m_module.opRound(getVectorTypeId(src0.type), src0.id);
|
|
|
|
result.id = m_module.opConvertFtoS(typeId, result.id);
|
|
}
|
|
else // Float32
|
|
result.id = m_module.opConvertStoF(typeId, src0.id);
|
|
}
|
|
else // No special stuff needed!
|
|
result.id = src0.id;
|
|
|
|
this->emitDstStore(dst, result, mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id);
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitVectorAlu(const DxsoInstructionContext& ctx) {
|
|
const auto& src = ctx.src;
|
|
|
|
DxsoRegMask mask = ctx.dst.mask;
|
|
|
|
DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst);
|
|
|
|
if (isScalarRegister(ctx.dst.id))
|
|
mask = DxsoRegMask(true, false, false, false);
|
|
|
|
DxsoRegisterValue result;
|
|
result.type.ctype = dst.type.ctype;
|
|
result.type.ccount = mask.popCount();
|
|
|
|
DxsoVectorType scalarType = result.type;
|
|
scalarType.ccount = 1;
|
|
|
|
const uint32_t typeId = getVectorTypeId(result.type);
|
|
const uint32_t scalarTypeId = getVectorTypeId(scalarType);
|
|
|
|
const DxsoOpcode opcode = ctx.instruction.opcode;
|
|
switch (opcode) {
|
|
case DxsoOpcode::Add:
|
|
result.id = m_module.opFAdd(typeId,
|
|
emitRegisterLoad(src[0], mask).id,
|
|
emitRegisterLoad(src[1], mask).id);
|
|
break;
|
|
case DxsoOpcode::Sub:
|
|
result.id = m_module.opFSub(typeId,
|
|
emitRegisterLoad(src[0], mask).id,
|
|
emitRegisterLoad(src[1], mask).id);
|
|
break;
|
|
case DxsoOpcode::Mad:
|
|
result.id = m_module.opFFma(typeId,
|
|
emitRegisterLoad(src[0], mask).id,
|
|
emitRegisterLoad(src[1], mask).id,
|
|
emitRegisterLoad(src[2], mask).id);
|
|
break;
|
|
case DxsoOpcode::Mul:
|
|
result.id = m_module.opFMul(typeId,
|
|
emitRegisterLoad(src[0], mask).id,
|
|
emitRegisterLoad(src[1], mask).id);
|
|
break;
|
|
case DxsoOpcode::Rcp:
|
|
result.id = m_module.opFDiv(typeId,
|
|
m_module.constfReplicant(1.0f, result.type.ccount),
|
|
emitRegisterLoad(src[0], mask).id);
|
|
|
|
if (m_moduleInfo.options.d3d9FloatEmulation) {
|
|
result.id = m_module.opNMin(typeId, result.id,
|
|
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
|
}
|
|
break;
|
|
case DxsoOpcode::Rsq:
|
|
result.id = m_module.opFAbs(typeId,
|
|
emitRegisterLoad(src[0], mask).id);
|
|
|
|
result.id = m_module.opInverseSqrt(typeId,
|
|
result.id);
|
|
|
|
if (m_moduleInfo.options.d3d9FloatEmulation) {
|
|
result.id = m_module.opNMin(typeId, result.id,
|
|
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
|
}
|
|
break;
|
|
case DxsoOpcode::Dp3: {
|
|
DxsoRegMask srcMask(true, true, true, false);
|
|
result = emitDot(
|
|
emitRegisterLoad(src[0], srcMask),
|
|
emitRegisterLoad(src[1], srcMask));
|
|
break;
|
|
}
|
|
case DxsoOpcode::Dp4:
|
|
result = emitDot(
|
|
emitRegisterLoad(src[0], IdentityWriteMask),
|
|
emitRegisterLoad(src[1], IdentityWriteMask));
|
|
break;
|
|
case DxsoOpcode::Slt:
|
|
case DxsoOpcode::Sge: {
|
|
const uint32_t boolTypeId =
|
|
getVectorTypeId({ DxsoScalarType::Bool, result.type.ccount });
|
|
|
|
uint32_t cmpResult = opcode == DxsoOpcode::Slt
|
|
? m_module.opFOrdLessThan (boolTypeId, emitRegisterLoad(src[0], mask).id, emitRegisterLoad(src[1], mask).id)
|
|
: m_module.opFOrdGreaterThanEqual(boolTypeId, emitRegisterLoad(src[0], mask).id, emitRegisterLoad(src[1], mask).id);
|
|
|
|
result.id = m_module.opSelect(typeId, cmpResult,
|
|
m_module.constfReplicant(1.0f, result.type.ccount),
|
|
m_module.constfReplicant(0.0f, result.type.ccount));
|
|
break;
|
|
}
|
|
case DxsoOpcode::Min:
|
|
result.id = m_module.opFMin(typeId,
|
|
emitRegisterLoad(src[0], mask).id,
|
|
emitRegisterLoad(src[1], mask).id);
|
|
break;
|
|
case DxsoOpcode::Max:
|
|
result.id = m_module.opFMax(typeId,
|
|
emitRegisterLoad(src[0], mask).id,
|
|
emitRegisterLoad(src[1], mask).id);
|
|
break;
|
|
case DxsoOpcode::ExpP:
|
|
if (m_programInfo.majorVersion() < 2) {
|
|
DxsoRegMask srcMask(true, false, false, false);
|
|
uint32_t src0 = emitRegisterLoad(src[0], srcMask).id;
|
|
|
|
uint32_t index = 0;
|
|
|
|
std::array<uint32_t, 4> resultIndices;
|
|
|
|
if (mask[0]) resultIndices[index++] = m_module.opExp2(scalarTypeId, m_module.opFloor(scalarTypeId, src0));
|
|
if (mask[1]) resultIndices[index++] = m_module.opFSub(scalarTypeId, src0, m_module.opFloor(scalarTypeId, src0));
|
|
if (mask[2]) resultIndices[index++] = m_module.opExp2(scalarTypeId, src0);
|
|
if (mask[3]) resultIndices[index++] = m_module.constf32(1.0f);
|
|
|
|
if (result.type.ccount == 1)
|
|
result.id = resultIndices[0];
|
|
else
|
|
result.id = m_module.opCompositeConstruct(typeId, result.type.ccount, resultIndices.data());
|
|
|
|
break;
|
|
}
|
|
case DxsoOpcode::Exp:
|
|
result.id = m_module.opExp2(typeId,
|
|
emitRegisterLoad(src[0], mask).id);
|
|
break;
|
|
case DxsoOpcode::Pow: {
|
|
uint32_t base = emitRegisterLoad(src[0], mask).id;
|
|
base = m_module.opFAbs(typeId, base);
|
|
|
|
uint32_t exponent = emitRegisterLoad(src[1], mask).id;
|
|
|
|
result.id = m_module.opPow(typeId, base, exponent);
|
|
|
|
if (m_moduleInfo.options.strictPow && m_moduleInfo.options.d3d9FloatEmulation) {
|
|
DxsoRegisterValue cmp;
|
|
cmp.type = { DxsoScalarType::Bool, result.type.ccount };
|
|
cmp.id = m_module.opFOrdEqual(getVectorTypeId(cmp.type),
|
|
exponent, m_module.constfReplicant(0.0f, cmp.type.ccount));
|
|
|
|
result.id = m_module.opSelect(typeId, cmp.id,
|
|
m_module.constfReplicant(1.0f, cmp.type.ccount), result.id);
|
|
}
|
|
break;
|
|
}
|
|
case DxsoOpcode::Crs: {
|
|
DxsoRegMask vec3Mask(true, true, true, false);
|
|
|
|
DxsoRegisterValue crossValue;
|
|
crossValue.type = { DxsoScalarType::Float32, 3 };
|
|
crossValue.id = m_module.opCross(getVectorTypeId(crossValue.type),
|
|
emitRegisterLoad(src[0], vec3Mask).id,
|
|
emitRegisterLoad(src[1], vec3Mask).id);
|
|
|
|
std::array<uint32_t, 3> indices = { 0, 0, 0 };
|
|
|
|
uint32_t index = 0;
|
|
for (uint32_t i = 0; i < indices.size(); i++) {
|
|
if (mask[i])
|
|
indices[index++] = m_module.opCompositeExtract(m_module.defFloatType(32), crossValue.id, 1, &i);
|
|
}
|
|
|
|
result.id = m_module.opCompositeConstruct(getVectorTypeId(result.type), result.type.ccount, indices.data());
|
|
|
|
break;
|
|
}
|
|
case DxsoOpcode::Abs:
|
|
result.id = m_module.opFAbs(typeId,
|
|
emitRegisterLoad(src[0], mask).id);
|
|
break;
|
|
case DxsoOpcode::Nrm: {
|
|
// Nrm is 3D...
|
|
DxsoRegMask srcMask(true, true, true, false);
|
|
auto vec3 = emitRegisterLoad(src[0], srcMask);
|
|
|
|
DxsoRegisterValue dot = emitDot(vec3, vec3);
|
|
dot.id = m_module.opInverseSqrt (scalarTypeId, dot.id);
|
|
if (m_moduleInfo.options.d3d9FloatEmulation) {
|
|
dot.id = m_module.opNMin (scalarTypeId, dot.id,
|
|
m_module.constf32(FLT_MAX));
|
|
}
|
|
|
|
// r * rsq(r . r);
|
|
result.id = m_module.opVectorTimesScalar(
|
|
typeId,
|
|
emitRegisterLoad(src[0], mask).id,
|
|
dot.id);
|
|
break;
|
|
}
|
|
case DxsoOpcode::SinCos: {
|
|
DxsoRegMask srcMask(true, false, false, false);
|
|
uint32_t src0 = emitRegisterLoad(src[0], srcMask).id;
|
|
|
|
std::array<uint32_t, 4> sincosVectorIndices = { 0, 0, 0, 0 };
|
|
|
|
uint32_t index = 0;
|
|
if (mask[0])
|
|
sincosVectorIndices[index++] = m_module.opCos(scalarTypeId, src0);
|
|
|
|
if (mask[1])
|
|
sincosVectorIndices[index++] = m_module.opSin(scalarTypeId, src0);
|
|
|
|
for (; index < result.type.ccount; index++) {
|
|
if (sincosVectorIndices[index] == 0)
|
|
sincosVectorIndices[index] = m_module.constf32(0.0f);
|
|
}
|
|
|
|
if (result.type.ccount == 1)
|
|
result.id = sincosVectorIndices[0];
|
|
else
|
|
result.id = m_module.opCompositeConstruct(typeId, result.type.ccount, sincosVectorIndices.data());
|
|
|
|
break;
|
|
}
|
|
case DxsoOpcode::Lit: {
|
|
DxsoRegMask srcMask(true, true, true, true);
|
|
uint32_t srcOp = emitRegisterLoad(src[0], srcMask).id;
|
|
|
|
const uint32_t x = 0;
|
|
const uint32_t y = 1;
|
|
const uint32_t w = 3;
|
|
|
|
uint32_t srcX = m_module.opCompositeExtract(scalarTypeId, srcOp, 1, &x);
|
|
uint32_t srcY = m_module.opCompositeExtract(scalarTypeId, srcOp, 1, &y);
|
|
uint32_t srcW = m_module.opCompositeExtract(scalarTypeId, srcOp, 1, &w);
|
|
|
|
uint32_t power = m_module.opFClamp(
|
|
scalarTypeId, srcW,
|
|
m_module.constf32(-127.9961f), m_module.constf32(127.9961f));
|
|
|
|
std::array<uint32_t, 4> resultIndices;
|
|
|
|
uint32_t index = 0;
|
|
|
|
if (mask[0]) resultIndices[index++] = m_module.constf32(1.0f);
|
|
if (mask[1]) resultIndices[index++] = m_module.opFMax(scalarTypeId, srcX, m_module.constf32(0));
|
|
if (mask[2]) resultIndices[index++] = m_module.opPow (scalarTypeId, srcY, power);
|
|
if (mask[3]) resultIndices[index++] = m_module.constf32(1.0f);
|
|
|
|
const uint32_t boolType = m_module.defBoolType();
|
|
uint32_t zTestX = m_module.opFOrdGreaterThanEqual(boolType, srcX, m_module.constf32(0));
|
|
uint32_t zTestY = m_module.opFOrdGreaterThanEqual(boolType, srcY, m_module.constf32(0));
|
|
uint32_t zTest = m_module.opLogicalAnd(boolType, zTestX, zTestY);
|
|
|
|
if (result.type.ccount > 2)
|
|
resultIndices[2] = m_module.opSelect(
|
|
scalarTypeId,
|
|
zTest,
|
|
resultIndices[2],
|
|
m_module.constf32(0.0f));
|
|
|
|
if (result.type.ccount == 1)
|
|
result.id = resultIndices[0];
|
|
else
|
|
result.id = m_module.opCompositeConstruct(typeId, result.type.ccount, resultIndices.data());
|
|
break;
|
|
}
|
|
case DxsoOpcode::Dst: {
|
|
//dest.x = 1;
|
|
//dest.y = src0.y * src1.y;
|
|
//dest.z = src0.z;
|
|
//dest.w = src1.w;
|
|
|
|
DxsoRegMask srcMask(true, true, true, true);
|
|
|
|
uint32_t src0 = emitRegisterLoad(src[0], srcMask).id;
|
|
uint32_t src1 = emitRegisterLoad(src[1], srcMask).id;
|
|
|
|
const uint32_t y = 1;
|
|
const uint32_t z = 2;
|
|
const uint32_t w = 3;
|
|
|
|
uint32_t src0Y = m_module.opCompositeExtract(scalarTypeId, src0, 1, &y);
|
|
uint32_t src1Y = m_module.opCompositeExtract(scalarTypeId, src1, 1, &y);
|
|
|
|
uint32_t src0Z = m_module.opCompositeExtract(scalarTypeId, src0, 1, &z);
|
|
uint32_t src1W = m_module.opCompositeExtract(scalarTypeId, src1, 1, &w);
|
|
|
|
std::array<uint32_t, 4> resultIndices;
|
|
resultIndices[0] = m_module.constf32(1.0f);
|
|
resultIndices[1] = m_module.opFMul(scalarTypeId, src0Y, src1Y);
|
|
resultIndices[2] = src0Z;
|
|
resultIndices[3] = src1W;
|
|
|
|
if (result.type.ccount == 1)
|
|
result.id = resultIndices[0];
|
|
else
|
|
result.id = m_module.opCompositeConstruct(typeId, result.type.ccount, resultIndices.data());
|
|
break;
|
|
}
|
|
case DxsoOpcode::LogP:
|
|
case DxsoOpcode::Log:
|
|
result.id = m_module.opFAbs(typeId, emitRegisterLoad(src[0], mask).id);
|
|
result.id = m_module.opLog2(typeId, result.id);
|
|
if (m_moduleInfo.options.d3d9FloatEmulation) {
|
|
result.id = m_module.opNMax(typeId, result.id,
|
|
m_module.constfReplicant(-FLT_MAX, result.type.ccount));
|
|
}
|
|
break;
|
|
case DxsoOpcode::Lrp:
|
|
result.id = m_module.opFMix(typeId,
|
|
emitRegisterLoad(src[2], mask).id,
|
|
emitRegisterLoad(src[1], mask).id,
|
|
emitRegisterLoad(src[0], mask).id);
|
|
break;
|
|
case DxsoOpcode::Frc:
|
|
result.id = m_module.opFract(typeId,
|
|
emitRegisterLoad(src[0], mask).id);
|
|
break;
|
|
case DxsoOpcode::Cmp: {
|
|
const uint32_t boolTypeId =
|
|
getVectorTypeId({ DxsoScalarType::Bool, result.type.ccount });
|
|
|
|
uint32_t cmp = m_module.opFOrdGreaterThanEqual(
|
|
boolTypeId,
|
|
emitRegisterLoad(src[0], mask).id,
|
|
m_module.constfReplicant(0.0f, result.type.ccount));
|
|
|
|
result.id = m_module.opSelect(
|
|
typeId, cmp,
|
|
emitRegisterLoad(src[1], mask).id,
|
|
emitRegisterLoad(src[2], mask).id);
|
|
break;
|
|
}
|
|
case DxsoOpcode::Cnd: {
|
|
const uint32_t boolTypeId =
|
|
getVectorTypeId({ DxsoScalarType::Bool, result.type.ccount });
|
|
|
|
uint32_t cmp = m_module.opFOrdGreaterThan(
|
|
boolTypeId,
|
|
emitRegisterLoad(src[0], mask).id,
|
|
m_module.constfReplicant(0.5f, result.type.ccount));
|
|
|
|
result.id = m_module.opSelect(
|
|
typeId, cmp,
|
|
emitRegisterLoad(src[1], mask).id,
|
|
emitRegisterLoad(src[2], mask).id);
|
|
break;
|
|
}
|
|
case DxsoOpcode::Dp2Add: {
|
|
DxsoRegMask dotSrcMask(true, true, false, false);
|
|
DxsoRegMask addSrcMask(true, false, false, false);
|
|
|
|
DxsoRegisterValue dot = emitDot(
|
|
emitRegisterLoad(src[0], dotSrcMask),
|
|
emitRegisterLoad(src[1], dotSrcMask));
|
|
|
|
dot.id = m_module.opFAdd(scalarTypeId,
|
|
dot.id, emitRegisterLoad(src[2], addSrcMask).id);
|
|
|
|
result.id = dot.id;
|
|
result.type = scalarType;
|
|
break;
|
|
}
|
|
case DxsoOpcode::DsX:
|
|
result.id = m_module.opDpdx(
|
|
typeId, emitRegisterLoad(src[0], mask).id);
|
|
break;
|
|
case DxsoOpcode::DsY:
|
|
result.id = m_module.opDpdy(
|
|
typeId, emitRegisterLoad(src[0], mask).id);
|
|
break;
|
|
default:
|
|
Logger::warn(str::format("DxsoCompiler::emitVectorAlu: unimplemented op ", opcode));
|
|
return;
|
|
}
|
|
|
|
this->emitDstStore(dst, result, mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id);
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitPredicateOp(const DxsoInstructionContext& ctx) {
|
|
const auto& src = ctx.src;
|
|
|
|
DxsoRegMask mask = ctx.dst.mask;
|
|
|
|
DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst);
|
|
|
|
DxsoRegisterValue result;
|
|
result.type.ctype = dst.type.ctype;
|
|
result.type.ccount = mask.popCount();
|
|
|
|
result.id = emitBoolComparison(
|
|
result.type, ctx.instruction.specificData.comparison,
|
|
emitRegisterLoad(src[0], mask).id, emitRegisterLoad(src[1], mask).id);
|
|
|
|
this->emitValueStore(dst, result, mask, emitPredicateLoad(ctx));
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitMatrixAlu(const DxsoInstructionContext& ctx) {
|
|
const auto& src = ctx.src;
|
|
|
|
DxsoRegMask mask = ctx.dst.mask;
|
|
|
|
DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst);
|
|
|
|
DxsoRegisterValue result;
|
|
result.type.ctype = dst.type.ctype;
|
|
result.type.ccount = mask.popCount();
|
|
|
|
DxsoVectorType scalarType = result.type;
|
|
scalarType.ccount = 1;
|
|
|
|
const uint32_t typeId = getVectorTypeId(result.type);
|
|
const uint32_t scalarTypeId = getVectorTypeId(scalarType);
|
|
|
|
const DxsoOpcode opcode = ctx.instruction.opcode;
|
|
|
|
uint32_t dotCount;
|
|
uint32_t iterCount;
|
|
|
|
switch (opcode) {
|
|
case DxsoOpcode::M3x2:
|
|
dotCount = 3;
|
|
iterCount = 2;
|
|
break;
|
|
case DxsoOpcode::M3x3:
|
|
dotCount = 3;
|
|
iterCount = 3;
|
|
break;
|
|
case DxsoOpcode::M3x4:
|
|
dotCount = 3;
|
|
iterCount = 4;
|
|
break;
|
|
case DxsoOpcode::M4x3:
|
|
dotCount = 4;
|
|
iterCount = 3;
|
|
break;
|
|
case DxsoOpcode::M4x4:
|
|
dotCount = 4;
|
|
iterCount = 4;
|
|
break;
|
|
default:
|
|
Logger::warn(str::format("DxsoCompiler::emitMatrixAlu: unimplemented op ", opcode));
|
|
return;
|
|
}
|
|
|
|
DxsoRegMask srcMask(true, true, true, dotCount == 4);
|
|
std::array<uint32_t, 4> indices;
|
|
|
|
DxsoRegister src0 = src[0];
|
|
DxsoRegister src1 = src[1];
|
|
|
|
for (uint32_t i = 0; i < iterCount; i++) {
|
|
indices[i] = m_module.opDot(scalarTypeId,
|
|
emitRegisterLoad(src0, srcMask).id,
|
|
emitRegisterLoad(src1, srcMask).id);
|
|
|
|
src1.id.num++;
|
|
}
|
|
|
|
result.id = m_module.opCompositeConstruct(
|
|
typeId, iterCount, indices.data());
|
|
|
|
this->emitDstStore(dst, result, mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id);
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitControlFlowGenericLoop(
|
|
bool count,
|
|
uint32_t initialVar,
|
|
uint32_t strideVar,
|
|
uint32_t iterationCountVar) {
|
|
const uint32_t itType = m_module.defIntType(32, 1);
|
|
|
|
DxsoCfgBlock block;
|
|
block.type = DxsoCfgBlockType::Loop;
|
|
block.b_loop.labelHeader = m_module.allocateId();
|
|
block.b_loop.labelBegin = m_module.allocateId();
|
|
block.b_loop.labelContinue = m_module.allocateId();
|
|
block.b_loop.labelBreak = m_module.allocateId();
|
|
block.b_loop.iteratorPtr = m_module.newVar(
|
|
m_module.defPointerType(itType, spv::StorageClassPrivate), spv::StorageClassPrivate);
|
|
block.b_loop.strideVar = strideVar;
|
|
block.b_loop.countBackup = 0;
|
|
|
|
if (count) {
|
|
DxsoBaseRegister loop;
|
|
loop.id = { DxsoRegisterType::Loop, 0 };
|
|
|
|
DxsoRegisterPointer loopPtr = emitGetOperandPtr(loop, nullptr);
|
|
uint32_t loopVal = m_module.opLoad(
|
|
getVectorTypeId(loopPtr.type), loopPtr.id);
|
|
|
|
block.b_loop.countBackup = loopVal;
|
|
|
|
m_module.opStore(loopPtr.id, initialVar);
|
|
}
|
|
|
|
m_module.setDebugName(block.b_loop.iteratorPtr, "iter");
|
|
|
|
m_module.opStore(block.b_loop.iteratorPtr, iterationCountVar);
|
|
|
|
m_module.opBranch(block.b_loop.labelHeader);
|
|
m_module.opLabel (block.b_loop.labelHeader);
|
|
|
|
m_module.opLoopMerge(
|
|
block.b_loop.labelBreak,
|
|
block.b_loop.labelContinue,
|
|
spv::LoopControlMaskNone);
|
|
|
|
m_module.opBranch(block.b_loop.labelBegin);
|
|
m_module.opLabel (block.b_loop.labelBegin);
|
|
|
|
uint32_t iterator = m_module.opLoad(itType, block.b_loop.iteratorPtr);
|
|
uint32_t complete = m_module.opIEqual(m_module.defBoolType(), iterator, m_module.consti32(0));
|
|
|
|
const uint32_t breakBlock = m_module.allocateId();
|
|
const uint32_t mergeBlock = m_module.allocateId();
|
|
|
|
m_module.opSelectionMerge(mergeBlock,
|
|
spv::SelectionControlMaskNone);
|
|
|
|
m_module.opBranchConditional(
|
|
complete, breakBlock, mergeBlock);
|
|
|
|
m_module.opLabel(breakBlock);
|
|
|
|
m_module.opBranch(block.b_loop.labelBreak);
|
|
|
|
m_module.opLabel(mergeBlock);
|
|
|
|
iterator = m_module.opISub(itType, iterator, m_module.consti32(1));
|
|
m_module.opStore(block.b_loop.iteratorPtr, iterator);
|
|
|
|
m_controlFlowBlocks.push_back(block);
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowGenericLoopEnd() {
|
|
if (m_controlFlowBlocks.size() == 0
|
|
|| m_controlFlowBlocks.back().type != DxsoCfgBlockType::Loop)
|
|
throw DxvkError("DxsoCompiler: 'EndRep' without 'Rep' or 'Loop' found");
|
|
|
|
// Remove the block from the stack, it's closed
|
|
const DxsoCfgBlock block = m_controlFlowBlocks.back();
|
|
m_controlFlowBlocks.pop_back();
|
|
|
|
if (block.b_loop.strideVar) {
|
|
DxsoBaseRegister loop;
|
|
loop.id = { DxsoRegisterType::Loop, 0 };
|
|
|
|
DxsoRegisterPointer loopPtr = emitGetOperandPtr(loop, nullptr);
|
|
uint32_t val = m_module.opLoad(
|
|
getVectorTypeId(loopPtr.type), loopPtr.id);
|
|
|
|
val = m_module.opIAdd(
|
|
getVectorTypeId(loopPtr.type),
|
|
val, block.b_loop.strideVar);
|
|
|
|
m_module.opStore(loopPtr.id, val);
|
|
}
|
|
|
|
// Declare the continue block
|
|
m_module.opBranch(block.b_loop.labelContinue);
|
|
m_module.opLabel(block.b_loop.labelContinue);
|
|
|
|
// Declare the merge block
|
|
m_module.opBranch(block.b_loop.labelHeader);
|
|
m_module.opLabel(block.b_loop.labelBreak);
|
|
|
|
if (block.b_loop.countBackup) {
|
|
DxsoBaseRegister loop;
|
|
loop.id = { DxsoRegisterType::Loop, 0 };
|
|
|
|
DxsoRegisterPointer loopPtr = emitGetOperandPtr(loop, nullptr);
|
|
|
|
m_module.opStore(loopPtr.id, block.b_loop.countBackup);
|
|
}
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowRep(const DxsoInstructionContext& ctx) {
|
|
DxsoRegMask srcMask(true, false, false, false);
|
|
this->emitControlFlowGenericLoop(
|
|
false, 0, 0,
|
|
emitRegisterLoad(ctx.src[0], srcMask).id);
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowEndRep(const DxsoInstructionContext& ctx) {
|
|
emitControlFlowGenericLoopEnd();
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowLoop(const DxsoInstructionContext& ctx) {
|
|
const uint32_t itType = m_module.defIntType(32, 1);
|
|
|
|
DxsoRegMask srcMask(true, true, true, false);
|
|
uint32_t integerRegister = emitRegisterLoad(ctx.src[1], srcMask).id;
|
|
uint32_t x = 0;
|
|
uint32_t y = 1;
|
|
uint32_t z = 2;
|
|
|
|
uint32_t iterCount = m_module.opCompositeExtract(itType, integerRegister, 1, &x);
|
|
uint32_t initialValue = m_module.opCompositeExtract(itType, integerRegister, 1, &y);
|
|
uint32_t strideSize = m_module.opCompositeExtract(itType, integerRegister, 1, &z);
|
|
|
|
this->emitControlFlowGenericLoop(
|
|
true,
|
|
initialValue,
|
|
strideSize,
|
|
iterCount);
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowEndLoop(const DxsoInstructionContext& ctx) {
|
|
this->emitControlFlowGenericLoopEnd();
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowBreak(const DxsoInstructionContext& ctx) {
|
|
DxsoCfgBlock* cfgBlock =
|
|
cfgFindBlock({ DxsoCfgBlockType::Loop });
|
|
|
|
if (cfgBlock == nullptr)
|
|
throw DxvkError("DxbcCompiler: 'Break' outside 'Rep' or 'Loop' found");
|
|
|
|
m_module.opBranch(cfgBlock->b_loop.labelBreak);
|
|
|
|
// Subsequent instructions assume that there is an open block
|
|
const uint32_t labelId = m_module.allocateId();
|
|
m_module.opLabel(labelId);
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowBreakC(const DxsoInstructionContext& ctx) {
|
|
DxsoCfgBlock* cfgBlock =
|
|
cfgFindBlock({ DxsoCfgBlockType::Loop });
|
|
|
|
if (cfgBlock == nullptr)
|
|
throw DxvkError("DxbcCompiler: 'BreakC' outside 'Rep' or 'Loop' found");
|
|
|
|
DxsoRegMask srcMask(true, false, false, false);
|
|
auto a = emitRegisterLoad(ctx.src[0], srcMask);
|
|
auto b = emitRegisterLoad(ctx.src[1], srcMask);
|
|
|
|
uint32_t result = this->emitBoolComparison(
|
|
{ DxsoScalarType::Bool, a.type.ccount },
|
|
ctx.instruction.specificData.comparison,
|
|
a.id, b.id);
|
|
|
|
// We basically have to wrap this into an 'if' block
|
|
const uint32_t breakBlock = m_module.allocateId();
|
|
const uint32_t mergeBlock = m_module.allocateId();
|
|
|
|
m_module.opSelectionMerge(mergeBlock,
|
|
spv::SelectionControlMaskNone);
|
|
|
|
m_module.opBranchConditional(
|
|
result, breakBlock, mergeBlock);
|
|
|
|
m_module.opLabel(breakBlock);
|
|
|
|
m_module.opBranch(cfgBlock->b_loop.labelBreak);
|
|
|
|
m_module.opLabel(mergeBlock);
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowIf(const DxsoInstructionContext& ctx) {
|
|
const auto opcode = ctx.instruction.opcode;
|
|
|
|
uint32_t result;
|
|
|
|
DxsoRegMask srcMask(true, false, false, false);
|
|
if (opcode == DxsoOpcode::Ifc) {
|
|
auto a = emitRegisterLoad(ctx.src[0], srcMask);
|
|
auto b = emitRegisterLoad(ctx.src[1], srcMask);
|
|
|
|
result = this->emitBoolComparison(
|
|
{ DxsoScalarType::Bool, a.type.ccount },
|
|
ctx.instruction.specificData.comparison,
|
|
a.id, b.id);
|
|
} else
|
|
result = emitRegisterLoad(ctx.src[0], srcMask).id;
|
|
|
|
// Declare the 'if' block. We do not know if there
|
|
// will be an 'else' block or not, so we'll assume
|
|
// that there is one and leave it empty otherwise.
|
|
DxsoCfgBlock block;
|
|
block.type = DxsoCfgBlockType::If;
|
|
block.b_if.ztestId = result;
|
|
block.b_if.labelIf = m_module.allocateId();
|
|
block.b_if.labelElse = 0;
|
|
block.b_if.labelEnd = m_module.allocateId();
|
|
block.b_if.headerPtr = m_module.getInsertionPtr();
|
|
m_controlFlowBlocks.push_back(block);
|
|
|
|
// We'll insert the branch instruction when closing
|
|
// the block, since we don't know whether or not an
|
|
// else block is needed right now.
|
|
m_module.opLabel(block.b_if.labelIf);
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowElse(const DxsoInstructionContext& ctx) {
|
|
if (m_controlFlowBlocks.size() == 0
|
|
|| m_controlFlowBlocks.back().type != DxsoCfgBlockType::If
|
|
|| m_controlFlowBlocks.back().b_if.labelElse != 0)
|
|
throw DxvkError("DxsoCompiler: 'Else' without 'If' found");
|
|
|
|
// Set the 'Else' flag so that we do
|
|
// not insert a dummy block on 'EndIf'
|
|
DxsoCfgBlock& block = m_controlFlowBlocks.back();
|
|
block.b_if.labelElse = m_module.allocateId();
|
|
|
|
// Close the 'If' block by branching to
|
|
// the merge block we declared earlier
|
|
m_module.opBranch(block.b_if.labelEnd);
|
|
m_module.opLabel (block.b_if.labelElse);
|
|
}
|
|
|
|
void DxsoCompiler::emitControlFlowEndIf(const DxsoInstructionContext& ctx) {
|
|
if (m_controlFlowBlocks.size() == 0
|
|
|| m_controlFlowBlocks.back().type != DxsoCfgBlockType::If)
|
|
throw DxvkError("DxsoCompiler: 'EndIf' without 'If' found");
|
|
|
|
// Remove the block from the stack, it's closed
|
|
DxsoCfgBlock block = m_controlFlowBlocks.back();
|
|
m_controlFlowBlocks.pop_back();
|
|
|
|
// Write out the 'if' header
|
|
m_module.beginInsertion(block.b_if.headerPtr);
|
|
|
|
m_module.opSelectionMerge(
|
|
block.b_if.labelEnd,
|
|
spv::SelectionControlMaskNone);
|
|
|
|
m_module.opBranchConditional(
|
|
block.b_if.ztestId,
|
|
block.b_if.labelIf,
|
|
block.b_if.labelElse != 0
|
|
? block.b_if.labelElse
|
|
: block.b_if.labelEnd);
|
|
|
|
m_module.endInsertion();
|
|
|
|
// End the active 'if' or 'else' block
|
|
m_module.opBranch(block.b_if.labelEnd);
|
|
m_module.opLabel (block.b_if.labelEnd);
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitTexCoord(const DxsoInstructionContext& ctx) {
|
|
DxsoRegisterValue result;
|
|
|
|
if (m_programInfo.majorVersion() == 1 && m_programInfo.minorVersion() == 4) {
|
|
// TexCrd Op (PS 1.4)
|
|
DxsoRegister texcoord;
|
|
texcoord.id.type = DxsoRegisterType::PixelTexcoord;
|
|
texcoord.id.num = ctx.src[0].id.num;
|
|
|
|
result = emitRegisterLoadRaw(texcoord, nullptr);
|
|
} else {
|
|
// TexCoord Op (PS 1.0 - PS 1.3)
|
|
DxsoRegister texcoord;
|
|
texcoord.id.type = DxsoRegisterType::PixelTexcoord;
|
|
texcoord.id.num = ctx.dst.id.num;
|
|
|
|
result = emitRegisterLoadRaw(texcoord, nullptr);
|
|
// Saturate
|
|
result = emitSaturate(result);
|
|
// w = 1.0f
|
|
uint32_t wIndex = 3;
|
|
result.id = m_module.opCompositeInsert(getVectorTypeId(result.type),
|
|
m_module.constf32(1.0f),
|
|
result.id,
|
|
1, &wIndex);
|
|
}
|
|
|
|
DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst);
|
|
|
|
this->emitDstStore(dst, result, ctx.dst.mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id);
|
|
}
|
|
|
|
void DxsoCompiler::emitTextureSample(const DxsoInstructionContext& ctx) {
|
|
DxsoRegisterPointer dst = emitGetOperandPtr(ctx.dst);
|
|
|
|
const DxsoOpcode opcode = ctx.instruction.opcode;
|
|
|
|
DxsoRegisterValue texcoordVar;
|
|
uint32_t samplerIdx;
|
|
|
|
DxsoRegMask vec3Mask(true, true, true, false);
|
|
DxsoRegMask srcMask (true, true, true, true);
|
|
|
|
if (opcode == DxsoOpcode::TexM3x2Tex || opcode == DxsoOpcode::TexM3x3Tex || opcode == DxsoOpcode::TexM3x3Spec || opcode == DxsoOpcode::TexM3x3VSpec) {
|
|
const uint32_t count = opcode == DxsoOpcode::TexM3x2Tex ? 2 : 3;
|
|
|
|
auto n = emitRegisterLoad(ctx.src[0], vec3Mask);
|
|
|
|
std::array<uint32_t, 4> indices = { 0, 0, m_module.constf32(0.0f), m_module.constf32(0.0f) };
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
auto reg = ctx.dst;
|
|
reg.id.num -= (count - 1) - i;
|
|
auto m = emitRegisterLoadTexcoord(reg, vec3Mask);
|
|
|
|
indices[i] = m_module.opDot(getScalarTypeId(DxsoScalarType::Float32), m.id, n.id);
|
|
}
|
|
|
|
if (opcode == DxsoOpcode::TexM3x3Spec || opcode == DxsoOpcode::TexM3x3VSpec) {
|
|
uint32_t vec3Type = getVectorTypeId({ DxsoScalarType::Float32, 3 });
|
|
uint32_t normal = m_module.opCompositeConstruct(vec3Type, 3, indices.data());
|
|
|
|
uint32_t eyeRay;
|
|
// VSpec -> Create eye ray from .w of last 3 tex coords (m, m-1, m-2)
|
|
// Spec -> Get eye ray from src[1]
|
|
if (opcode == DxsoOpcode::TexM3x3VSpec) {
|
|
DxsoRegMask wMask(false, false, false, true);
|
|
|
|
std::array<uint32_t, 3> eyeRayIndices;
|
|
for (uint32_t i = 0; i < 3; i++) {
|
|
auto reg = ctx.dst;
|
|
reg.id.num -= (count - 1) - i;
|
|
eyeRayIndices[i] = emitRegisterLoadTexcoord(reg, wMask).id;
|
|
}
|
|
|
|
eyeRay = m_module.opCompositeConstruct(vec3Type, eyeRayIndices.size(), eyeRayIndices.data());
|
|
}
|
|
else
|
|
eyeRay = emitRegisterLoad(ctx.src[1], vec3Mask).id;
|
|
|
|
eyeRay = m_module.opNormalize(vec3Type, eyeRay);
|
|
normal = m_module.opNormalize(vec3Type, normal);
|
|
uint32_t reflection = m_module.opReflect(vec3Type, eyeRay, normal);
|
|
|
|
for (uint32_t i = 0; i < 3; i++)
|
|
indices[i] = m_module.opCompositeExtract(m_module.defFloatType(32), reflection, 1, &i);
|
|
}
|
|
|
|
texcoordVar.type = { DxsoScalarType::Float32, 4 };
|
|
texcoordVar.id = m_module.opCompositeConstruct(getVectorTypeId(texcoordVar.type), indices.size(), indices.data());
|
|
|
|
samplerIdx = ctx.dst.id.num;
|
|
}
|
|
else if (opcode == DxsoOpcode::TexBem) {
|
|
auto m = emitRegisterLoadTexcoord(ctx.dst, srcMask);
|
|
auto n = emitRegisterLoad(ctx.src[0], srcMask);
|
|
|
|
texcoordVar = m;
|
|
|
|
// u' = tc(m).x + [bm00(m) * t(n).x + bm10(m) * t(n).y]
|
|
// v' = tc(m).y + [bm01(m) * t(n).x + bm11(m) * t(n).y]
|
|
|
|
// But we flipped the bm indices so we can use dot here...
|
|
|
|
// u' = tc(m).x + dot(bm0, tn)
|
|
// v' = tc(m).y + dot(bm1, tn)
|
|
|
|
for (uint32_t i = 0; i < 2; i++) {
|
|
uint32_t fl_t = getScalarTypeId(DxsoScalarType::Float32);
|
|
uint32_t vec2_t = getVectorTypeId({ DxsoScalarType::Float32, 2 });
|
|
std::array<uint32_t, 4> indices = { 0, 1, 2, 3 };
|
|
|
|
uint32_t tc_m_n = m_module.opCompositeExtract(fl_t, m.id, 1, &i);
|
|
|
|
uint32_t offset = m_module.constu32(D3D9SharedPSStages_Count * ctx.dst.id.num + D3D9SharedPSStages_BumpEnvMat0 + i);
|
|
uint32_t bm = m_module.opAccessChain(m_module.defPointerType(vec2_t, spv::StorageClassUniform),
|
|
m_ps.sharedState, 1, &offset);
|
|
bm = m_module.opLoad(vec2_t, bm);
|
|
|
|
uint32_t t = m_module.opVectorShuffle(vec2_t, n.id, n.id, 2, indices.data());
|
|
|
|
uint32_t dot = m_module.opDot(fl_t, bm, t);
|
|
|
|
uint32_t result = m_module.opFAdd(fl_t, tc_m_n, dot);
|
|
texcoordVar.id = m_module.opCompositeInsert(getVectorTypeId(texcoordVar.type), result, texcoordVar.id, 1, &i);
|
|
}
|
|
|
|
samplerIdx = ctx.dst.id.num;
|
|
}
|
|
else if (opcode == DxsoOpcode::TexReg2Ar) {
|
|
texcoordVar = emitRegisterLoad(ctx.src[0], srcMask);
|
|
texcoordVar = emitRegisterSwizzle(texcoordVar, DxsoRegSwizzle(3, 0, 0, 0), srcMask);
|
|
|
|
samplerIdx = ctx.dst.id.num;
|
|
}
|
|
else if (opcode == DxsoOpcode::TexReg2Gb) {
|
|
texcoordVar = emitRegisterLoad(ctx.src[0], srcMask);
|
|
texcoordVar = emitRegisterSwizzle(texcoordVar, DxsoRegSwizzle(1, 2, 2, 2), srcMask);
|
|
|
|
samplerIdx = ctx.dst.id.num;
|
|
}
|
|
else if (opcode == DxsoOpcode::TexReg2Rgb) {
|
|
texcoordVar = emitRegisterLoad(ctx.src[0], srcMask);
|
|
texcoordVar = emitRegisterSwizzle(texcoordVar, DxsoRegSwizzle(0, 1, 2, 2), srcMask);
|
|
|
|
samplerIdx = ctx.dst.id.num;
|
|
}
|
|
else if (opcode == DxsoOpcode::TexDp3Tex) {
|
|
auto m = emitRegisterLoadTexcoord(ctx.dst, vec3Mask);
|
|
auto n = emitRegisterLoad(ctx.src[0], vec3Mask);
|
|
|
|
auto dot = emitDot(m, n);
|
|
|
|
std::array<uint32_t, 4> indices = { dot.id, m_module.constf32(0.0f), m_module.constf32(0.0f), m_module.constf32(0.0f) };
|
|
|
|
texcoordVar.type = { DxsoScalarType::Float32, 4 };
|
|
texcoordVar.id = m_module.opCompositeConstruct(getVectorTypeId(texcoordVar.type),
|
|
indices.size(), indices.data());
|
|
|
|
samplerIdx = ctx.dst.id.num;
|
|
}
|
|
else {
|
|
if (m_programInfo.majorVersion() >= 2) { // SM 2.0+
|
|
texcoordVar = emitRegisterLoad(ctx.src[0], srcMask);
|
|
samplerIdx = ctx.src[1].id.num;
|
|
} else if (
|
|
m_programInfo.majorVersion() == 1
|
|
&& m_programInfo.minorVersion() == 4) { // SM 1.4
|
|
texcoordVar = emitRegisterLoad(ctx.src[0], srcMask);
|
|
samplerIdx = ctx.dst.id.num;
|
|
}
|
|
else { // SM 1.0-1.3
|
|
texcoordVar = emitRegisterLoadTexcoord(ctx.dst, srcMask);
|
|
samplerIdx = ctx.dst.id.num;
|
|
}
|
|
}
|
|
|
|
// SM < 1.x does not have dcl sampler type.
|
|
if (m_programInfo.majorVersion() < 2 && m_samplers[samplerIdx].color[SamplerTypeTexture2D].varId == 0)
|
|
emitDclSampler(samplerIdx, DxsoTextureType::Texture2D);
|
|
|
|
DxsoSampler sampler = m_samplers.at(samplerIdx);
|
|
|
|
auto SampleImage = [this, opcode, dst, ctx, samplerIdx](DxsoRegisterValue texcoordVar, DxsoSamplerInfo& sampler, bool depth, DxsoSamplerType samplerType) {
|
|
DxsoRegisterValue result;
|
|
result.type.ctype = dst.type.ctype;
|
|
result.type.ccount = depth ? 1 : 4;
|
|
|
|
const uint32_t typeId = getVectorTypeId(result.type);
|
|
|
|
const uint32_t imageVarId = m_module.opLoad(sampler.typeId, sampler.varId);
|
|
|
|
SpirvImageOperands imageOperands;
|
|
if (m_programInfo.type() == DxsoProgramTypes::VertexShader) {
|
|
imageOperands.sLod = m_module.constf32(0.0f);
|
|
imageOperands.flags |= spv::ImageOperandsLodMask;
|
|
}
|
|
|
|
if (opcode == DxsoOpcode::TexLdl) {
|
|
uint32_t w = 3;
|
|
imageOperands.sLod = m_module.opCompositeExtract(
|
|
m_module.defFloatType(32), texcoordVar.id, 1, &w);
|
|
imageOperands.flags |= spv::ImageOperandsLodMask;
|
|
}
|
|
|
|
if (opcode == DxsoOpcode::TexLdd) {
|
|
DxsoRegMask gradMask(true, true, false, false);
|
|
imageOperands.flags |= spv::ImageOperandsGradMask;
|
|
imageOperands.sGradX = emitRegisterLoad(ctx.src[2], gradMask).id;
|
|
imageOperands.sGradY = emitRegisterLoad(ctx.src[3], gradMask).id;
|
|
}
|
|
|
|
uint32_t projDivider = 0;
|
|
|
|
auto GetProjectionValue = [&]() {
|
|
uint32_t w = 3;
|
|
return m_module.opCompositeExtract(
|
|
m_module.defFloatType(32), texcoordVar.id, 1, &w);
|
|
};
|
|
|
|
if (opcode == DxsoOpcode::Tex
|
|
&& m_programInfo.majorVersion() >= 2) {
|
|
if (ctx.instruction.specificData.texld == DxsoTexLdMode::Project) {
|
|
projDivider = GetProjectionValue();
|
|
}
|
|
else if (ctx.instruction.specificData.texld == DxsoTexLdMode::Bias) {
|
|
uint32_t w = 3;
|
|
imageOperands.sLodBias = m_module.opCompositeExtract(
|
|
m_module.defFloatType(32), texcoordVar.id, 1, &w);
|
|
imageOperands.flags |= spv::ImageOperandsBiasMask;
|
|
}
|
|
}
|
|
|
|
bool switchProjResult = m_programInfo.majorVersion() < 2 && samplerType != SamplerTypeTextureCube;
|
|
if (switchProjResult)
|
|
projDivider = GetProjectionValue();
|
|
|
|
uint32_t reference = 0;
|
|
|
|
if (depth) {
|
|
uint32_t component = sampler.dimensions;
|
|
reference = m_module.opCompositeExtract(
|
|
m_module.defFloatType(32), texcoordVar.id, 1, &component);
|
|
}
|
|
|
|
if (projDivider != 0) {
|
|
for (uint32_t i = sampler.dimensions; i < 4; i++) {
|
|
texcoordVar.id = m_module.opCompositeInsert(getVectorTypeId(texcoordVar.type),
|
|
projDivider, texcoordVar.id, 1, &i);
|
|
}
|
|
}
|
|
|
|
result.id = this->emitSample(
|
|
projDivider != 0,
|
|
typeId,
|
|
imageVarId,
|
|
texcoordVar.id,
|
|
reference,
|
|
imageOperands);
|
|
|
|
if (switchProjResult) {
|
|
uint32_t bool_t = m_module.defBoolType();
|
|
|
|
uint32_t nonProjResult = this->emitSample(
|
|
0,
|
|
typeId,
|
|
imageVarId,
|
|
texcoordVar.id,
|
|
reference,
|
|
imageOperands);
|
|
|
|
uint32_t shouldProj = m_module.opBitFieldUExtract(
|
|
m_module.defIntType(32, 0), m_ps.projectionSpec,
|
|
m_module.consti32(samplerIdx), m_module.consti32(1));
|
|
|
|
shouldProj = m_module.opIEqual(m_module.defBoolType(), shouldProj, m_module.constu32(1));
|
|
|
|
// Depth -> .x
|
|
// Colour -> .xyzw
|
|
// Need to replicate the bool for the opSelect.
|
|
if (!depth) {
|
|
uint32_t bvec4_t = m_module.defVectorType(bool_t, 4);
|
|
std::array<uint32_t, 4> indices = { shouldProj, shouldProj, shouldProj, shouldProj };
|
|
shouldProj = m_module.opCompositeConstruct(bvec4_t, indices.size(), indices.data());
|
|
}
|
|
|
|
result.id = m_module.opSelect(typeId, shouldProj, result.id, nonProjResult);
|
|
}
|
|
|
|
// Apply operand swizzle to the operand value
|
|
result = emitRegisterSwizzle(result, IdentitySwizzle, ctx.dst.mask);
|
|
|
|
this->emitDstStore(dst, result, ctx.dst.mask, ctx.dst.saturate, emitPredicateLoad(ctx), ctx.dst.shift, ctx.dst.id);
|
|
};
|
|
|
|
auto SampleType = [&](DxsoSamplerType samplerType) {
|
|
// Only do the check for depth comp. samplers
|
|
// if we aren't a 3D texture
|
|
if (samplerType != SamplerTypeTexture3D) {
|
|
uint32_t colorLabel = m_module.allocateId();
|
|
uint32_t depthLabel = m_module.allocateId();
|
|
uint32_t endLabel = m_module.allocateId();
|
|
|
|
m_module.opSelectionMerge(endLabel, spv::SelectionControlMaskNone);
|
|
m_module.opBranchConditional(sampler.depthSpecConst, depthLabel, colorLabel);
|
|
|
|
m_module.opLabel(colorLabel);
|
|
SampleImage(texcoordVar, sampler.color[samplerType], false, samplerType);
|
|
m_module.opBranch(endLabel);
|
|
|
|
m_module.opLabel(depthLabel);
|
|
SampleImage(texcoordVar, sampler.depth[samplerType], true, samplerType);
|
|
m_module.opBranch(endLabel);
|
|
|
|
m_module.opLabel(endLabel);
|
|
}
|
|
else
|
|
SampleImage(texcoordVar, sampler.color[samplerType], false, samplerType);
|
|
};
|
|
|
|
if (m_programInfo.majorVersion() >= 2) {
|
|
DxsoSamplerType samplerType =
|
|
SamplerTypeFromTextureType(sampler.type);
|
|
|
|
SampleType(samplerType);
|
|
}
|
|
else {
|
|
std::array<SpirvSwitchCaseLabel, 3> typeCaseLabels = {{
|
|
{ uint32_t(SamplerTypeTexture2D), m_module.allocateId() },
|
|
{ uint32_t(SamplerTypeTexture3D), m_module.allocateId() },
|
|
{ uint32_t(SamplerTypeTextureCube), m_module.allocateId() },
|
|
}};
|
|
|
|
uint32_t switchEndLabel = m_module.allocateId();
|
|
|
|
uint32_t typeId = m_module.defIntType(32, 0);
|
|
|
|
uint32_t offset = m_module.consti32(samplerIdx * 2);
|
|
uint32_t bitCnt = m_module.consti32(2);
|
|
uint32_t type = m_module.opBitFieldUExtract(typeId, m_ps.samplerTypeSpec, offset, bitCnt);
|
|
|
|
m_module.opSelectionMerge(switchEndLabel, spv::SelectionControlMaskNone);
|
|
m_module.opSwitch(type,
|
|
typeCaseLabels[uint32_t(SamplerTypeTexture2D)].labelId,
|
|
typeCaseLabels.size(),
|
|
typeCaseLabels.data());
|
|
|
|
for (const auto& label : typeCaseLabels) {
|
|
m_module.opLabel(label.labelId);
|
|
|
|
SampleType(DxsoSamplerType(label.literal));
|
|
|
|
m_module.opBranch(switchEndLabel);
|
|
}
|
|
|
|
m_module.opLabel(switchEndLabel);
|
|
}
|
|
}
|
|
|
|
void DxsoCompiler::emitTextureKill(const DxsoInstructionContext& ctx) {
|
|
DxsoRegisterValue texReg;
|
|
|
|
if (m_programInfo.majorVersion() >= 2 ||
|
|
(m_programInfo.majorVersion() == 1
|
|
&& m_programInfo.minorVersion() == 4)) // SM 2.0+ or 1.4
|
|
texReg = emitRegisterLoadRaw(ctx.dst, ctx.dst.hasRelative ? &ctx.dst.relative : nullptr);
|
|
else { // SM 1.0-1.3
|
|
DxsoRegister texcoord;
|
|
texcoord.id = { DxsoRegisterType::PixelTexcoord, ctx.dst.id.num };
|
|
|
|
texReg = emitRegisterLoadRaw(texcoord, nullptr);
|
|
}
|
|
|
|
std::array<uint32_t, 4> indices = { 0, 1, 2, 3 };
|
|
|
|
// On SM1 it only works on the first
|
|
if (m_programInfo.majorVersion() < 2) {
|
|
texReg.type.ccount = 3;
|
|
|
|
texReg.id = m_module.opVectorShuffle(
|
|
getVectorTypeId(texReg.type),
|
|
texReg.id, texReg.id,
|
|
texReg.type.ccount, indices.data());
|
|
}
|
|
|
|
const uint32_t boolVecTypeId =
|
|
getVectorTypeId({ DxsoScalarType::Bool, texReg.type.ccount });
|
|
|
|
uint32_t result = m_module.opFOrdLessThan(
|
|
boolVecTypeId, texReg.id,
|
|
m_module.constfReplicant(0.0f, texReg.type.ccount));
|
|
|
|
result = m_module.opAny(m_module.defBoolType(), result);
|
|
|
|
if (m_ps.killState == 0) {
|
|
uint32_t labelIf = m_module.allocateId();
|
|
uint32_t labelEnd = m_module.allocateId();
|
|
|
|
m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone);
|
|
m_module.opBranchConditional(result, labelIf, labelEnd);
|
|
|
|
m_module.opLabel(labelIf);
|
|
|
|
if (m_moduleInfo.options.useDemoteToHelperInvocation) {
|
|
m_module.opDemoteToHelperInvocation();
|
|
m_module.opBranch(labelEnd);
|
|
} else {
|
|
// OpKill terminates the block
|
|
m_module.opKill();
|
|
}
|
|
|
|
m_module.opLabel(labelEnd);
|
|
}
|
|
else {
|
|
uint32_t typeId = m_module.defBoolType();
|
|
|
|
uint32_t killState = m_module.opLoad (typeId, m_ps.killState);
|
|
killState = m_module.opLogicalOr(typeId, killState, result);
|
|
m_module.opStore(m_ps.killState, killState);
|
|
|
|
if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) {
|
|
uint32_t ballot = m_module.opGroupNonUniformBallot(
|
|
getVectorTypeId({ DxsoScalarType::Uint32, 4 }),
|
|
m_module.constu32(spv::ScopeSubgroup),
|
|
killState);
|
|
|
|
uint32_t laneId = m_module.opLoad(
|
|
getScalarTypeId(DxsoScalarType::Uint32),
|
|
m_ps.builtinLaneId);
|
|
|
|
uint32_t laneIdPart = m_module.opShiftRightLogical(
|
|
getScalarTypeId(DxsoScalarType::Uint32),
|
|
laneId, m_module.constu32(5));
|
|
|
|
uint32_t laneMask = m_module.opVectorExtractDynamic(
|
|
getScalarTypeId(DxsoScalarType::Uint32),
|
|
ballot, laneIdPart);
|
|
|
|
uint32_t laneIdQuad = m_module.opBitwiseAnd(
|
|
getScalarTypeId(DxsoScalarType::Uint32),
|
|
laneId, m_module.constu32(0x1c));
|
|
|
|
laneMask = m_module.opShiftRightLogical(
|
|
getScalarTypeId(DxsoScalarType::Uint32),
|
|
laneMask, laneIdQuad);
|
|
|
|
laneMask = m_module.opBitwiseAnd(
|
|
getScalarTypeId(DxsoScalarType::Uint32),
|
|
laneMask, m_module.constu32(0xf));
|
|
|
|
uint32_t killSubgroup = m_module.opIEqual(
|
|
m_module.defBoolType(),
|
|
laneMask, m_module.constu32(0xf));
|
|
|
|
uint32_t labelIf = m_module.allocateId();
|
|
uint32_t labelEnd = m_module.allocateId();
|
|
|
|
m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone);
|
|
m_module.opBranchConditional(killSubgroup, labelIf, labelEnd);
|
|
|
|
// OpKill terminates the block
|
|
m_module.opLabel(labelIf);
|
|
m_module.opKill();
|
|
|
|
m_module.opLabel(labelEnd);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::emitSample(
|
|
bool projected,
|
|
uint32_t resultType,
|
|
uint32_t sampledImage,
|
|
uint32_t coordinates,
|
|
uint32_t reference,
|
|
const SpirvImageOperands& operands) {
|
|
const bool depthCompare = reference != 0;
|
|
const bool explicitLod =
|
|
(operands.flags & spv::ImageOperandsLodMask)
|
|
|| (operands.flags & spv::ImageOperandsGradMask);
|
|
|
|
if (projected) {
|
|
if (depthCompare) {
|
|
if (explicitLod)
|
|
return m_module.opImageSampleProjDrefExplicitLod(resultType, sampledImage, coordinates, reference, operands);
|
|
else
|
|
return m_module.opImageSampleProjDrefImplicitLod(resultType, sampledImage, coordinates, reference, operands);
|
|
}
|
|
else {
|
|
if (explicitLod)
|
|
return m_module.opImageSampleProjExplicitLod(resultType, sampledImage, coordinates, operands);
|
|
else
|
|
return m_module.opImageSampleProjImplicitLod(resultType, sampledImage, coordinates, operands);
|
|
}
|
|
}
|
|
else {
|
|
if (depthCompare) {
|
|
if (explicitLod)
|
|
return m_module.opImageSampleDrefExplicitLod(resultType, sampledImage, coordinates, reference, operands);
|
|
else
|
|
return m_module.opImageSampleDrefImplicitLod(resultType, sampledImage, coordinates, reference, operands);
|
|
}
|
|
else {
|
|
if (explicitLod)
|
|
return m_module.opImageSampleExplicitLod(resultType, sampledImage, coordinates, operands);
|
|
else
|
|
return m_module.opImageSampleImplicitLod(resultType, sampledImage, coordinates, operands);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitInputSetup() {
|
|
uint32_t pointCoord = 0;
|
|
D3D9PointSizeInfoPS pointInfo;
|
|
|
|
if (m_programInfo.type() == DxsoProgramType::PixelShader) {
|
|
pointCoord = GetPointCoord(m_module, m_entryPointInterfaces);
|
|
pointInfo = GetPointSizeInfoPS(m_module, m_rsBlock);
|
|
}
|
|
|
|
for (uint32_t i = 0; i < m_isgn.elemCount; i++) {
|
|
const auto& elem = m_isgn.elems[i];
|
|
const uint32_t slot = elem.slot;
|
|
|
|
DxsoRegisterInfo info;
|
|
info.type.ctype = DxsoScalarType::Float32;
|
|
info.type.ccount = 4;
|
|
info.type.alength = 1;
|
|
info.sclass = spv::StorageClassInput;
|
|
|
|
DxsoRegisterPointer inputPtr;
|
|
inputPtr.id = emitNewVariable(info);
|
|
inputPtr.type.ctype = DxsoScalarType::Float32;
|
|
inputPtr.type.ccount = info.type.ccount;
|
|
|
|
m_module.decorateLocation(inputPtr.id, slot);
|
|
|
|
std::string name =
|
|
str::format("in_", elem.semantic.usage, elem.semantic.usageIndex);
|
|
m_module.setDebugName(inputPtr.id, name.c_str());
|
|
|
|
if (elem.centroid)
|
|
m_module.decorate(inputPtr.id, spv::DecorationCentroid);
|
|
|
|
m_entryPointInterfaces.push_back(inputPtr.id);
|
|
|
|
uint32_t typeId = this->getVectorTypeId({ DxsoScalarType::Float32, 4 });
|
|
uint32_t ptrTypeId = m_module.defPointerType(typeId, spv::StorageClassPrivate);
|
|
|
|
uint32_t regNumVar = m_module.constu32(elem.regNumber);
|
|
|
|
DxsoRegisterPointer indexPtr;
|
|
indexPtr.id = m_module.opAccessChain(ptrTypeId, m_vArray, 1, ®NumVar);
|
|
indexPtr.type = inputPtr.type;
|
|
indexPtr.type.ccount = 4;
|
|
|
|
DxsoRegisterValue indexVal = this->emitValueLoad(inputPtr);
|
|
|
|
DxsoRegisterValue workingReg;
|
|
workingReg.type = indexVal.type;
|
|
|
|
workingReg.id = m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f);
|
|
|
|
DxsoRegMask mask = elem.mask;
|
|
if (mask.popCount() == 0)
|
|
mask = DxsoRegMask(true, true, true, true);
|
|
|
|
std::array<uint32_t, 4> indices = { 0, 1, 2, 3 };
|
|
uint32_t count = 0;
|
|
for (uint32_t i = 0; i < 4; i++) {
|
|
if (mask[i]) {
|
|
indices[i] = i + 4;
|
|
count++;
|
|
}
|
|
}
|
|
|
|
workingReg.id = m_module.opVectorShuffle(getVectorTypeId(workingReg.type),
|
|
workingReg.id, indexVal.id, 4, indices.data());
|
|
|
|
// We need to replace TEXCOORD inputs with gl_PointCoord
|
|
// if D3DRS_POINTSPRITEENABLE is set.
|
|
if (m_programInfo.type() == DxsoProgramType::PixelShader && elem.semantic.usage == DxsoUsage::Texcoord)
|
|
workingReg.id = m_module.opSelect(getVectorTypeId(workingReg.type), pointInfo.isSprite, pointCoord, workingReg.id);
|
|
|
|
if (m_programInfo.type() == DxsoProgramType::PixelShader && elem.semantic.usage == DxsoUsage::Color) {
|
|
if (elem.semantic.usageIndex == 0)
|
|
m_ps.diffuseColorIn = inputPtr.id;
|
|
else if (elem.semantic.usageIndex == 1)
|
|
m_ps.specularColorIn = inputPtr.id;
|
|
}
|
|
|
|
m_module.opStore(indexPtr.id, workingReg.id);
|
|
}
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitLinkerOutputSetup() {
|
|
bool outputtedColor0 = false;
|
|
bool outputtedColor1 = false;
|
|
|
|
for (uint32_t i = 0; i < m_osgn.elemCount; i++) {
|
|
const auto& elem = m_osgn.elems[i];
|
|
const uint32_t slot = elem.slot;
|
|
|
|
if (elem.semantic.usage == DxsoUsage::Color) {
|
|
if (elem.semantic.usageIndex == 0)
|
|
outputtedColor0 = true;
|
|
else
|
|
outputtedColor1 = true;
|
|
}
|
|
|
|
DxsoRegisterInfo info;
|
|
info.type.ctype = DxsoScalarType::Float32;
|
|
info.type.ccount = 4;
|
|
info.type.alength = 1;
|
|
info.sclass = spv::StorageClassOutput;
|
|
|
|
spv::BuiltIn builtIn =
|
|
semanticToBuiltIn(false, elem.semantic);
|
|
|
|
DxsoRegisterPointer outputPtr;
|
|
outputPtr.type.ctype = DxsoScalarType::Float32;
|
|
outputPtr.type.ccount = 4;
|
|
|
|
DxsoRegMask mask = elem.mask;
|
|
|
|
bool scalar = false;
|
|
|
|
if (builtIn == spv::BuiltInMax) {
|
|
outputPtr.id = emitNewVariableDefault(info,
|
|
m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f));
|
|
m_module.decorateLocation(outputPtr.id, slot);
|
|
|
|
std::string name =
|
|
str::format("out_", elem.semantic.usage, elem.semantic.usageIndex);
|
|
m_module.setDebugName(outputPtr.id, name.c_str());
|
|
}
|
|
else {
|
|
const char* name = "unknown_builtin";
|
|
if (builtIn == spv::BuiltInPosition)
|
|
name = "oPos";
|
|
else if (builtIn == spv::BuiltInPointSize) {
|
|
outputPtr.type.ccount = 1;
|
|
info.type.ccount = 1;
|
|
name = "oPSize";
|
|
bool maskValues[4];
|
|
for (uint32_t i = 0; i < 4; i++)
|
|
maskValues[i] = i == elem.mask.firstSet();
|
|
mask = DxsoRegMask(maskValues[0], maskValues[1], maskValues[2], maskValues[3]);
|
|
}
|
|
|
|
outputPtr.id = emitNewVariableDefault(info,
|
|
m_module.constfReplicant(0.0f, info.type.ccount));
|
|
|
|
m_module.setDebugName(outputPtr.id, name);
|
|
m_module.decorateBuiltIn(outputPtr.id, builtIn);
|
|
|
|
if (builtIn == spv::BuiltInPosition)
|
|
m_vs.oPos = outputPtr;
|
|
else if (builtIn == spv::BuiltInPointSize) {
|
|
scalar = true;
|
|
m_vs.oPSize = outputPtr;
|
|
}
|
|
}
|
|
|
|
m_entryPointInterfaces.push_back(outputPtr.id);
|
|
|
|
uint32_t typeId = this->getVectorTypeId({ DxsoScalarType::Float32, 4 });
|
|
uint32_t ptrTypeId = m_module.defPointerType(typeId, spv::StorageClassPrivate);
|
|
|
|
uint32_t regNumVar = m_module.constu32(elem.regNumber);
|
|
|
|
DxsoRegisterPointer indexPtr;
|
|
indexPtr.id = m_module.opAccessChain(ptrTypeId, m_oArray, 1, ®NumVar);
|
|
indexPtr.type = outputPtr.type;
|
|
indexPtr.type.ccount = 4;
|
|
|
|
DxsoRegisterValue indexVal = this->emitValueLoad(indexPtr);
|
|
|
|
DxsoRegisterValue workingReg;
|
|
workingReg.type.ctype = indexVal.type.ctype;
|
|
workingReg.type.ccount = scalar ? 1 : 4;
|
|
|
|
workingReg.id = scalar
|
|
? m_module.constf32(0.0f)
|
|
: m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f);
|
|
|
|
std::array<uint32_t, 4> indices = { 0, 1, 2, 3 };
|
|
|
|
if (scalar) {
|
|
workingReg.id = m_module.opCompositeExtract(getVectorTypeId(workingReg.type),
|
|
indexVal.id, 1, indices.data());
|
|
} else {
|
|
if (mask.popCount() == 0)
|
|
mask = DxsoRegMask(true, true, true, true);
|
|
|
|
uint32_t count = 0;
|
|
for (uint32_t i = 0; i < 4; i++) {
|
|
if (mask[i])
|
|
indices[count++] = i + 4;
|
|
}
|
|
|
|
|
|
workingReg.id = m_module.opVectorShuffle(getVectorTypeId(workingReg.type),
|
|
workingReg.id, indexVal.id, 4, indices.data());
|
|
}
|
|
|
|
// Ie. 0 or 1 for diffuse and specular color
|
|
// and for Shader Model 1 or 2
|
|
// (because those have dedicated color registers
|
|
// where this rule applies)
|
|
if (elem.semantic.usage == DxsoUsage::Color &&
|
|
elem.semantic.usageIndex < 2 &&
|
|
m_programInfo.majorVersion() < 3)
|
|
workingReg = emitSaturate(workingReg);
|
|
|
|
m_module.opStore(outputPtr.id, workingReg.id);
|
|
}
|
|
|
|
auto OutputDefault = [&](DxsoSemantic semantic) {
|
|
DxsoRegisterInfo info;
|
|
info.type.ctype = DxsoScalarType::Float32;
|
|
info.type.ccount = 4;
|
|
info.type.alength = 1;
|
|
info.sclass = spv::StorageClassOutput;
|
|
|
|
uint32_t slot = RegisterLinkerSlot(semantic);
|
|
|
|
uint32_t value = semantic == DxsoSemantic{ DxsoUsage::Color, 0 }
|
|
? m_module.constvec4f32(1.0f, 1.0f, 1.0f, 1.0f)
|
|
: m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f);
|
|
|
|
|
|
uint32_t outputPtr = emitNewVariableDefault(info, value);
|
|
|
|
m_module.decorateLocation(outputPtr, slot);
|
|
|
|
std::string name =
|
|
str::format("out_", semantic.usage, semantic.usageIndex, "_default");
|
|
|
|
m_module.setDebugName(outputPtr, name.c_str());
|
|
|
|
m_interfaceSlots.outputSlots |= 1u << slot;
|
|
m_entryPointInterfaces.push_back(outputPtr);
|
|
};
|
|
|
|
if (!outputtedColor0)
|
|
OutputDefault(DxsoSemantic{ DxsoUsage::Color, 0 });
|
|
|
|
if (!outputtedColor1)
|
|
OutputDefault(DxsoSemantic{ DxsoUsage::Color, 1 });
|
|
|
|
auto pointInfo = GetPointSizeInfoVS(m_module, m_vs.oPos.id, 0, 0, m_rsBlock);
|
|
|
|
if (m_vs.oPSize.id == 0) {
|
|
m_vs.oPSize = this->emitRegisterPtr(
|
|
"oPSize", DxsoScalarType::Float32, 1, 0,
|
|
spv::StorageClassOutput, spv::BuiltInPointSize);
|
|
|
|
uint32_t pointSize = m_module.opFClamp(m_module.defFloatType(32), pointInfo.defaultValue, pointInfo.min, pointInfo.max);
|
|
|
|
m_module.opStore(m_vs.oPSize.id, pointSize);
|
|
}
|
|
else {
|
|
uint32_t float_t = m_module.defFloatType(32);
|
|
uint32_t pointSize = m_module.opFClamp(m_module.defFloatType(32), m_module.opLoad(float_t, m_vs.oPSize.id), pointInfo.min, pointInfo.max);
|
|
m_module.opStore(m_vs.oPSize.id, pointSize);
|
|
}
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitVsClipping() {
|
|
uint32_t clipPlaneCountId = m_module.constu32(caps::MaxClipPlanes);
|
|
|
|
uint32_t floatType = m_module.defFloatType(32);
|
|
uint32_t vec4Type = m_module.defVectorType(floatType, 4);
|
|
|
|
// Declare uniform buffer containing clip planes
|
|
uint32_t clipPlaneArray = m_module.defArrayTypeUnique(vec4Type, clipPlaneCountId);
|
|
uint32_t clipPlaneStruct = m_module.defStructTypeUnique(1, &clipPlaneArray);
|
|
uint32_t clipPlaneBlock = m_module.newVar(
|
|
m_module.defPointerType(clipPlaneStruct, spv::StorageClassUniform),
|
|
spv::StorageClassUniform);
|
|
|
|
m_module.decorateArrayStride (clipPlaneArray, 16);
|
|
|
|
m_module.setDebugName (clipPlaneStruct, "clip_info_t");
|
|
m_module.setDebugMemberName (clipPlaneStruct, 0, "clip_planes");
|
|
m_module.decorate (clipPlaneStruct, spv::DecorationBlock);
|
|
m_module.memberDecorateOffset (clipPlaneStruct, 0, 0);
|
|
|
|
uint32_t bindingId = computeResourceSlotId(
|
|
m_programInfo.type(), DxsoBindingType::ConstantBuffer,
|
|
DxsoConstantBuffers::VSClipPlanes);
|
|
|
|
m_module.setDebugName (clipPlaneBlock, "clip_info");
|
|
m_module.decorateDescriptorSet(clipPlaneBlock, 0);
|
|
m_module.decorateBinding (clipPlaneBlock, bindingId);
|
|
|
|
DxvkResourceSlot resource;
|
|
resource.slot = bindingId;
|
|
resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
|
|
resource.access = VK_ACCESS_UNIFORM_READ_BIT;
|
|
m_resourceSlots.push_back(resource);
|
|
|
|
// Declare output array for clip distances
|
|
uint32_t clipDistArray = m_module.newVar(
|
|
m_module.defPointerType(
|
|
m_module.defArrayType(floatType, clipPlaneCountId),
|
|
spv::StorageClassOutput),
|
|
spv::StorageClassOutput);
|
|
|
|
m_module.decorateBuiltIn(clipDistArray, spv::BuiltInClipDistance);
|
|
m_entryPointInterfaces.push_back(clipDistArray);
|
|
|
|
if (m_moduleInfo.options.invariantPosition)
|
|
m_module.decorate(m_vs.oPos.id, spv::DecorationInvariant);
|
|
|
|
const uint32_t positionPtr = m_vs.oPos.id;
|
|
|
|
// We generated a bad shader, let's not make it even worse.
|
|
if (positionPtr == 0) {
|
|
Logger::warn("Shader without Position output. Something is likely wrong here.");
|
|
return;
|
|
}
|
|
|
|
// Compute clip distances
|
|
uint32_t positionId = m_module.opLoad(vec4Type, positionPtr);
|
|
|
|
for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) {
|
|
std::array<uint32_t, 2> blockMembers = {{
|
|
m_module.constu32(0),
|
|
m_module.constu32(i),
|
|
}};
|
|
|
|
uint32_t planeId = m_module.opLoad(vec4Type,
|
|
m_module.opAccessChain(
|
|
m_module.defPointerType(vec4Type, spv::StorageClassUniform),
|
|
clipPlaneBlock, blockMembers.size(), blockMembers.data()));
|
|
|
|
uint32_t distId = m_module.opDot(floatType, positionId, planeId);
|
|
|
|
m_module.opStore(
|
|
m_module.opAccessChain(
|
|
m_module.defPointerType(floatType, spv::StorageClassOutput),
|
|
clipDistArray, 1, &blockMembers[1]),
|
|
distId);
|
|
}
|
|
}
|
|
|
|
|
|
void DxsoCompiler::setupRenderStateInfo() {
|
|
m_rsBlock = SetupRenderStateBlock(m_module);
|
|
|
|
// Only need alpha ref for PS 3.
|
|
// No FF fog component.
|
|
if (m_programInfo.type() == DxsoProgramType::PixelShader) {
|
|
if (m_programInfo.majorVersion() == 3) {
|
|
m_interfaceSlots.pushConstOffset = offsetof(D3D9RenderStateInfo, alphaRef);
|
|
m_interfaceSlots.pushConstSize = sizeof(float);
|
|
}
|
|
else {
|
|
m_interfaceSlots.pushConstOffset = 0;
|
|
m_interfaceSlots.pushConstSize = offsetof(D3D9RenderStateInfo, pointSize);
|
|
}
|
|
}
|
|
else {
|
|
m_interfaceSlots.pushConstOffset = offsetof(D3D9RenderStateInfo, pointSize);
|
|
// Point scale never triggers on programmable
|
|
m_interfaceSlots.pushConstSize = sizeof(float) * 3;
|
|
}
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitFog() {
|
|
DxsoRegister color0;
|
|
color0.id = DxsoRegisterId{ DxsoRegisterType::ColorOut, 0 };
|
|
auto oColor0Ptr = this->emitGetOperandPtr(color0);
|
|
|
|
DxsoRegister vFog;
|
|
vFog.id = DxsoRegisterId{ DxsoRegisterType::RasterizerOut, RasterOutFog };
|
|
auto vFogPtr = this->emitGetOperandPtr(vFog);
|
|
|
|
DxsoRegister vPos;
|
|
vPos.id = DxsoRegisterId{ DxsoRegisterType::MiscType, DxsoMiscTypeIndices::MiscTypePosition };
|
|
auto vPosPtr = this->emitGetOperandPtr(vPos);
|
|
|
|
D3D9FogContext fogCtx;
|
|
fogCtx.IsPixel = true;
|
|
fogCtx.RangeFog = false;
|
|
fogCtx.RenderState = m_rsBlock;
|
|
fogCtx.vPos = m_module.opLoad(getVectorTypeId(vPosPtr.type), vPosPtr.id);
|
|
fogCtx.vFog = m_module.opLoad(getVectorTypeId(vFogPtr.type), vFogPtr.id);
|
|
fogCtx.oColor = m_module.opLoad(getVectorTypeId(oColor0Ptr.type), oColor0Ptr.id);
|
|
|
|
m_module.opStore(oColor0Ptr.id, DoFixedFunctionFog(m_module, fogCtx));
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitPsProcessing() {
|
|
uint32_t boolType = m_module.defBoolType();
|
|
uint32_t floatType = m_module.defFloatType(32);
|
|
uint32_t floatPtr = m_module.defPointerType(floatType, spv::StorageClassPushConstant);
|
|
|
|
// Declare spec constants for render states
|
|
uint32_t alphaTestId = m_module.specConstBool(false);
|
|
uint32_t alphaFuncId = m_module.specConst32(m_module.defIntType(32, 0), uint32_t(VK_COMPARE_OP_ALWAYS));
|
|
|
|
m_module.setDebugName (alphaTestId, "alpha_test");
|
|
m_module.decorateSpecId (alphaTestId, getSpecId(D3D9SpecConstantId::AlphaTestEnable));
|
|
|
|
m_module.setDebugName (alphaFuncId, "alpha_func");
|
|
m_module.decorateSpecId (alphaFuncId, getSpecId(D3D9SpecConstantId::AlphaCompareOp));
|
|
|
|
// Implement alpha test and fog
|
|
DxsoRegister color0;
|
|
color0.id = DxsoRegisterId{ DxsoRegisterType::ColorOut, 0 };
|
|
auto oC0 = this->emitGetOperandPtr(color0);
|
|
|
|
if (oC0.id) {
|
|
if (m_programInfo.majorVersion() < 3)
|
|
emitFog();
|
|
|
|
// Labels for the alpha test
|
|
std::array<SpirvSwitchCaseLabel, 8> atestCaseLabels = {{
|
|
{ uint32_t(VK_COMPARE_OP_NEVER), m_module.allocateId() },
|
|
{ uint32_t(VK_COMPARE_OP_LESS), m_module.allocateId() },
|
|
{ uint32_t(VK_COMPARE_OP_EQUAL), m_module.allocateId() },
|
|
{ uint32_t(VK_COMPARE_OP_LESS_OR_EQUAL), m_module.allocateId() },
|
|
{ uint32_t(VK_COMPARE_OP_GREATER), m_module.allocateId() },
|
|
{ uint32_t(VK_COMPARE_OP_NOT_EQUAL), m_module.allocateId() },
|
|
{ uint32_t(VK_COMPARE_OP_GREATER_OR_EQUAL), m_module.allocateId() },
|
|
{ uint32_t(VK_COMPARE_OP_ALWAYS), m_module.allocateId() },
|
|
}};
|
|
|
|
uint32_t atestBeginLabel = m_module.allocateId();
|
|
uint32_t atestTestLabel = m_module.allocateId();
|
|
uint32_t atestDiscardLabel = m_module.allocateId();
|
|
uint32_t atestKeepLabel = m_module.allocateId();
|
|
uint32_t atestSkipLabel = m_module.allocateId();
|
|
|
|
// if (alpha_test) { ... }
|
|
m_module.opSelectionMerge(atestSkipLabel, spv::SelectionControlMaskNone);
|
|
m_module.opBranchConditional(alphaTestId, atestBeginLabel, atestSkipLabel);
|
|
m_module.opLabel(atestBeginLabel);
|
|
|
|
// Load alpha component
|
|
uint32_t alphaComponentId = 3;
|
|
uint32_t alphaId = m_module.opCompositeExtract(floatType,
|
|
m_module.opLoad(m_module.defVectorType(floatType, 4), oC0.id),
|
|
1, &alphaComponentId);
|
|
|
|
// Load alpha reference
|
|
uint32_t alphaRefMember = m_module.constu32(uint32_t(D3D9RenderStateItem::AlphaRef));
|
|
uint32_t alphaRefId = m_module.opLoad(floatType,
|
|
m_module.opAccessChain(floatPtr, m_rsBlock, 1, &alphaRefMember));
|
|
|
|
// switch (alpha_func) { ... }
|
|
m_module.opSelectionMerge(atestTestLabel, spv::SelectionControlMaskNone);
|
|
m_module.opSwitch(alphaFuncId,
|
|
atestCaseLabels[uint32_t(VK_COMPARE_OP_ALWAYS)].labelId,
|
|
atestCaseLabels.size(),
|
|
atestCaseLabels.data());
|
|
|
|
std::array<SpirvPhiLabel, 8> atestVariables;
|
|
|
|
for (uint32_t i = 0; i < atestCaseLabels.size(); i++) {
|
|
m_module.opLabel(atestCaseLabels[i].labelId);
|
|
|
|
atestVariables[i].labelId = atestCaseLabels[i].labelId;
|
|
atestVariables[i].varId = [&] {
|
|
switch (VkCompareOp(atestCaseLabels[i].literal)) {
|
|
case VK_COMPARE_OP_NEVER: return m_module.constBool(false);
|
|
case VK_COMPARE_OP_LESS: return m_module.opFOrdLessThan (boolType, alphaId, alphaRefId);
|
|
case VK_COMPARE_OP_EQUAL: return m_module.opFOrdEqual (boolType, alphaId, alphaRefId);
|
|
case VK_COMPARE_OP_LESS_OR_EQUAL: return m_module.opFOrdLessThanEqual (boolType, alphaId, alphaRefId);
|
|
case VK_COMPARE_OP_GREATER: return m_module.opFOrdGreaterThan (boolType, alphaId, alphaRefId);
|
|
case VK_COMPARE_OP_NOT_EQUAL: return m_module.opFOrdNotEqual (boolType, alphaId, alphaRefId);
|
|
case VK_COMPARE_OP_GREATER_OR_EQUAL: return m_module.opFOrdGreaterThanEqual(boolType, alphaId, alphaRefId);
|
|
default:
|
|
case VK_COMPARE_OP_ALWAYS: return m_module.constBool(true);
|
|
}
|
|
}();
|
|
|
|
m_module.opBranch(atestTestLabel);
|
|
}
|
|
|
|
// end switch
|
|
m_module.opLabel(atestTestLabel);
|
|
|
|
uint32_t atestResult = m_module.opPhi(boolType,
|
|
atestVariables.size(),
|
|
atestVariables.data());
|
|
uint32_t atestDiscard = m_module.opLogicalNot(boolType, atestResult);
|
|
|
|
atestResult = m_module.opLogicalNot(boolType, atestResult);
|
|
|
|
// if (do_discard) { ... }
|
|
m_module.opSelectionMerge(atestKeepLabel, spv::SelectionControlMaskNone);
|
|
m_module.opBranchConditional(atestDiscard, atestDiscardLabel, atestKeepLabel);
|
|
|
|
m_module.opLabel(atestDiscardLabel);
|
|
m_module.opKill();
|
|
|
|
// end if (do_discard)
|
|
m_module.opLabel(atestKeepLabel);
|
|
m_module.opBranch(atestSkipLabel);
|
|
|
|
// end if (alpha_test)
|
|
m_module.opLabel(atestSkipLabel);
|
|
}
|
|
}
|
|
|
|
void DxsoCompiler::emitOutputDepthClamp() {
|
|
// HACK: Some drivers do not clamp FragDepth to [minDepth..maxDepth]
|
|
// before writing to the depth attachment, but we do not have acccess
|
|
// to those. Clamp to [0..1] instead.
|
|
|
|
if (m_ps.oDepth.id != 0) {
|
|
auto result = emitValueLoad(m_ps.oDepth);
|
|
|
|
result = emitSaturate(result);
|
|
|
|
m_module.opStore(
|
|
m_ps.oDepth.id,
|
|
result.id);
|
|
}
|
|
}
|
|
|
|
|
|
void DxsoCompiler::emitVsFinalize() {
|
|
this->emitMainFunctionBegin();
|
|
|
|
this->emitInputSetup();
|
|
m_module.opFunctionCall(
|
|
m_module.defVoidType(),
|
|
m_vs.functionId, 0, nullptr);
|
|
this->emitLinkerOutputSetup();
|
|
|
|
this->emitVsClipping();
|
|
|
|
this->emitFunctionEnd();
|
|
}
|
|
|
|
void DxsoCompiler::emitPsFinalize() {
|
|
this->emitMainFunctionBegin();
|
|
|
|
this->emitInputSetup();
|
|
m_module.opFunctionCall(
|
|
m_module.defVoidType(),
|
|
m_ps.functionId, 0, nullptr);
|
|
|
|
if (m_ps.killState != 0) {
|
|
uint32_t labelIf = m_module.allocateId();
|
|
uint32_t labelEnd = m_module.allocateId();
|
|
|
|
uint32_t killTest = m_module.opLoad(m_module.defBoolType(), m_ps.killState);
|
|
|
|
m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone);
|
|
m_module.opBranchConditional(killTest, labelIf, labelEnd);
|
|
|
|
m_module.opLabel(labelIf);
|
|
m_module.opKill();
|
|
|
|
m_module.opLabel(labelEnd);
|
|
}
|
|
|
|
// r0 in PS1 is the colour output register. Move r0 -> cO0 here.
|
|
if (m_programInfo.majorVersion() == 1
|
|
&& m_programInfo.type() == DxsoProgramTypes::PixelShader) {
|
|
DxsoRegister r0;
|
|
r0.id = { DxsoRegisterType::Temp, 0 };
|
|
|
|
DxsoRegister c0;
|
|
c0.id = { DxsoRegisterType::ColorOut, 0 };
|
|
|
|
DxsoRegisterValue val = emitRegisterLoadRaw(r0, nullptr);
|
|
DxsoRegisterPointer out = emitGetOperandPtr(c0);
|
|
m_module.opStore(out.id, val.id);
|
|
}
|
|
|
|
// No need to setup output here as it's non-indexable
|
|
// everything has already gone to the right place!
|
|
|
|
this->emitPsProcessing();
|
|
this->emitOutputDepthClamp();
|
|
this->emitFunctionEnd();
|
|
}
|
|
|
|
|
|
|
|
uint32_t DxsoCompiler::getScalarTypeId(DxsoScalarType type) {
|
|
switch (type) {
|
|
case DxsoScalarType::Uint32: return m_module.defIntType(32, 0);
|
|
case DxsoScalarType::Sint32: return m_module.defIntType(32, 1);
|
|
case DxsoScalarType::Float32: return m_module.defFloatType(32);
|
|
case DxsoScalarType::Bool: return m_module.defBoolType();
|
|
}
|
|
|
|
throw DxvkError("DxsoCompiler: Invalid scalar type");
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::getVectorTypeId(const DxsoVectorType& type) {
|
|
uint32_t typeId = this->getScalarTypeId(type.ctype);
|
|
|
|
if (type.ccount > 1)
|
|
typeId = m_module.defVectorType(typeId, type.ccount);
|
|
|
|
return typeId;
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::getArrayTypeId(const DxsoArrayType& type) {
|
|
DxsoVectorType vtype;
|
|
vtype.ctype = type.ctype;
|
|
vtype.ccount = type.ccount;
|
|
|
|
uint32_t typeId = this->getVectorTypeId(vtype);
|
|
|
|
if (type.alength > 1) {
|
|
typeId = m_module.defArrayType(typeId,
|
|
m_module.constu32(type.alength));
|
|
}
|
|
|
|
return typeId;
|
|
}
|
|
|
|
|
|
uint32_t DxsoCompiler::getPointerTypeId(const DxsoRegisterInfo& type) {
|
|
return m_module.defPointerType(
|
|
this->getArrayTypeId(type.type),
|
|
type.sclass);
|
|
}
|
|
|
|
} |