From 438e9a487dec30744777b5f3d8d8491e1589174c Mon Sep 17 00:00:00 2001 From: Unknown <0.tamas.marton@gmail.com> Date: Fri, 17 Apr 2020 20:59:04 +0100 Subject: [PATCH] changed shader module creation so that it goes through the std way needs special handling, but it works --- QPUassembler/qpu_assembler.h | 3 +- QPUassembler/vc4_qpu_defines.h | 225 +------------------- QPUassembler/vc4_qpu_enums.h | 258 +++++++++++++++++++++++ driver/common.h | 2 +- driver/copy.c | 123 ++++++++++- driver/instance.c | 5 +- driver/shader.c | 103 ++++++++- driver/vkExt.h | 5 +- driver/vkExtFunctions.c | 135 ------------ driver/vkExtFunctions.h | 18 -- test/ETC/CMakeLists.txt | 2 +- test/ETC/ETC.cpp | 55 ++++- test/HDR/CMakeLists.txt | 2 +- test/HDR/HDR.cpp | 67 ++++-- test/MSAA/CMakeLists.txt | 2 +- test/MSAA/MSAA.cpp | 55 ++++- test/blending/CMakeLists.txt | 2 +- test/blending/blending.cpp | 55 ++++- test/clear/CMakeLists.txt | 2 +- test/cubemapping/CMakeLists.txt | 2 +- test/cubemapping/cubemapping.cpp | 55 ++++- test/depthTest/CMakeLists.txt | 2 +- test/depthTest/depthTest.cpp | 55 ++++- test/depthTex/CMakeLists.txt | 2 +- test/depthTex/depthTex.cpp | 55 ++++- test/indexedTriangle/CMakeLists.txt | 2 +- test/indexedTriangle/indexedTriangle.cpp | 55 ++++- test/mipmapping/CMakeLists.txt | 2 +- test/mipmapping/mipmapping.cpp | 55 ++++- test/query/CMakeLists.txt | 2 +- test/query/query.cpp | 55 ++++- test/stencilTest/CMakeLists.txt | 2 +- test/stencilTest/stencilTest.cpp | 66 ++++-- test/texturing/CMakeLists.txt | 2 +- test/texturing/texturing.cpp | 55 ++++- test/triangle/CMakeLists.txt | 2 +- test/triangle/triangle.cpp | 76 +++++-- test/varyings/CMakeLists.txt | 2 +- test/varyings/varyings.cpp | 55 ++++- 39 files changed, 1177 insertions(+), 544 deletions(-) create mode 100644 QPUassembler/vc4_qpu_enums.h delete mode 100644 driver/vkExtFunctions.c delete mode 100644 driver/vkExtFunctions.h diff --git a/QPUassembler/qpu_assembler.h b/QPUassembler/qpu_assembler.h index fb9ebf6..cc40374 100644 --- a/QPUassembler/qpu_assembler.h +++ b/QPUassembler/qpu_assembler.h @@ -1,12 +1,13 @@ #pragma once #include -#include "vc4_qpu_defines.h" #ifdef __cplusplus extern "C" { #endif +#include "vc4_qpu_enums.h" + uint64_t encode_alu(qpu_sig_bits sig_bits, qpu_unpack unpack_mode, //If the pm bit is set, the unpack field programs the r4 unpack unit, diff --git a/QPUassembler/vc4_qpu_defines.h b/QPUassembler/vc4_qpu_defines.h index 0b6a901..1c027a5 100644 --- a/QPUassembler/vc4_qpu_defines.h +++ b/QPUassembler/vc4_qpu_defines.h @@ -27,32 +27,7 @@ #include #include -typedef enum{ - QPU_ALU, - QPU_SEM, - QPU_BRANCH, - QPU_LOAD_IMM -} qpu_alu_type; - -typedef enum{ - QPU_LOAD32, - QPU_LOAD16 -} qpu_load_type; - -//Condition Codes -//The QPU keeps a set of N, Z and C flag bits per 16 SIMD element. These flags are updated based on the result -//of the ADD ALU if the ‘sf’ bit is set. If the sf bit is set and the ADD ALU executes a NOP or its condition code was -//NEVER, flags are set based upon the result of the MUL ALU result. -typedef enum { - QPU_COND_NEVER, - QPU_COND_ALWAYS, - QPU_COND_ZS, //set - QPU_COND_ZC, //clear - QPU_COND_NS, - QPU_COND_NC, - QPU_COND_CS, - QPU_COND_CC, -} qpu_cond; +#include "vc4_qpu_enums.h" static const char *qpu_cond_str[] = { [QPU_COND_NEVER] = "never", @@ -65,22 +40,6 @@ static const char *qpu_cond_str[] = { [QPU_COND_CC] = "cc", }; -//ALU Input muxes -//selects one register for input -//The add_a, add_b, mul_a, and mul_b fields specify the input data for the A and B ports of the ADD and MUL -//pipelines, respectively -typedef enum { - /* hardware mux values */ - QPU_MUX_R0, - QPU_MUX_R1, - QPU_MUX_R2, - QPU_MUX_R3, - QPU_MUX_R4, //special purpose, read only - QPU_MUX_R5, //special purpose - QPU_MUX_A, - QPU_MUX_B, -} qpu_mux; - static const char *qpu_mux_str[] = { [QPU_MUX_R0] = "r0", [QPU_MUX_R1] = "r1", @@ -93,29 +52,6 @@ static const char *qpu_mux_str[] = { }; -//Signaling Bits -//The 4-bit signaling field signal is connected to the 3d pipeline and is set to indicate one of a number of -//conditions to the 3d hardware. Values from this field are also used to encode a ‘BKPT’ instruction, and to -//encode Branches and Load Immediate instructions. -typedef enum { - QPU_SIG_SW_BREAKPOINT, - QPU_SIG_NONE, - QPU_SIG_THREAD_SWITCH, - QPU_SIG_PROG_END, - QPU_SIG_WAIT_FOR_SCOREBOARD, //stall until this QPU can safely access tile buffer - QPU_SIG_SCOREBOARD_UNLOCK, - QPU_SIG_LAST_THREAD_SWITCH, - QPU_SIG_COVERAGE_LOAD, //from tile buffer to r4 - QPU_SIG_COLOR_LOAD, //from tile buffer to r4 - QPU_SIG_COLOR_LOAD_END, //color load and program end - QPU_SIG_LOAD_TMU0, //read data from TMU0 to r4 - QPU_SIG_LOAD_TMU1, //read data from TMU1 to r4 - QPU_SIG_ALPHA_MASK_LOAD, //from tile buffer to r4 - QPU_SIG_SMALL_IMM, //ALU instruction with raddr_b specifying small immediate or vector rotate - QPU_SIG_LOAD_IMM, //load immediate instruction - QPU_SIG_BRANCH -} qpu_sig_bits; - static const char *qpu_sig_bits_str[] = { [QPU_SIG_SW_BREAKPOINT] = "sig_brk", [QPU_SIG_NONE] = "sig_none", @@ -184,19 +120,6 @@ static uint8_t qpu_encode_small_immediate(uint32_t i) return ~0; } -//QPU unpack values -//(can be used to unpack from r4 too) -typedef enum { - QPU_UNPACK_NOP, - QPU_UNPACK_16A, //from A reg: convert 16bit float to 32bit float, or 16bit int to 32bit int, depending on the instruction - QPU_UNPACK_16B, - QPU_UNPACK_8D_REP, //replicate most significant byte (alpha) across word: {a, a, a, a} - QPU_UNPACK_8A, //convert 8bit color in range [0...1] to 32bit float or 32bit int, depending on the instruction - QPU_UNPACK_8B, - QPU_UNPACK_8C, - QPU_UNPACK_8D, -} qpu_unpack; - static const char *qpu_unpack_str[] = { [QPU_UNPACK_NOP] = "nop", [QPU_UNPACK_16A] = "16a", @@ -208,28 +131,6 @@ static const char *qpu_unpack_str[] = { [QPU_UNPACK_8D] = "8d", }; -//QPU pack regfile A -typedef enum { - QPU_PACK_A_NOP, - QPU_PACK_A_16A, //convert to 16 bit float if float input, or to int16 (just takes least significant 16bits) - QPU_PACK_A_16B, - QPU_PACK_A_8888, //convert to 8bit uint (just takes least significant 8bits) and replicate across all bytes of 32bit word - QPU_PACK_A_8A, // Convert to 8-bit unsigned int. (just takes least significant 8bits) - QPU_PACK_A_8B, - QPU_PACK_A_8C, - QPU_PACK_A_8D, - - // Saturating variants of the previous instructions. - QPU_PACK_A_32_SAT, //saturate signed 32bit number (takes into account overflow/carry flags) - QPU_PACK_A_16A_SAT, //convert to 16bit float if float input, or int16, depending on input (with saturation) - QPU_PACK_A_16B_SAT, - QPU_PACK_A_8888_SAT, //convert to uint8 with saturation and replicate across all bytes of 32bit word - QPU_PACK_A_8A_SAT, //conver to uint8 with saturation - QPU_PACK_A_8B_SAT, - QPU_PACK_A_8C_SAT, - QPU_PACK_A_8D_SAT, -} qpu_pack_a; - static const char *qpu_pack_a_str[] = { [QPU_PACK_A_NOP] = "nop", [QPU_PACK_A_16A] = "16a", @@ -250,16 +151,6 @@ static const char *qpu_pack_a_str[] = { [QPU_PACK_A_8D_SAT] = "8d.sat", }; -//QPU pack MUL ALU values -typedef enum { - QPU_PACK_MUL_NOP, - QPU_PACK_MUL_8888 = 3, // converts mul float result to 8bit color in range [0...1] and replicate across all bytes of 32bit word - QPU_PACK_MUL_8A, // converts mul float result to 8bit color in range [0...1] - QPU_PACK_MUL_8B, - QPU_PACK_MUL_8C, - QPU_PACK_MUL_8D, -} qpu_pack_mul; - static const char *qpu_pack_mul_str[] = { [QPU_PACK_MUL_NOP] = "nop", [QPU_PACK_MUL_8888] = "8888", @@ -269,23 +160,6 @@ static const char *qpu_pack_mul_str[] = { [QPU_PACK_MUL_8D] = "8d", }; -typedef enum { - QPU_COND_BRANCH_ALL_ZS, //all z flags set - QPU_COND_BRANCH_ALL_ZC, //all z flags clear - QPU_COND_BRANCH_ANY_ZS, - QPU_COND_BRANCH_ANY_ZC, - QPU_COND_BRANCH_ALL_NS, - QPU_COND_BRANCH_ALL_NC, - QPU_COND_BRANCH_ANY_NS, - QPU_COND_BRANCH_ANY_NC, - QPU_COND_BRANCH_ALL_CS, - QPU_COND_BRANCH_ALL_CC, - QPU_COND_BRANCH_ANY_CS, - QPU_COND_BRANCH_ANY_CC, - - QPU_COND_BRANCH_ALWAYS = 15 //always execute -} qpu_branch_cond; - static const char *qpu_branch_cond_str[] = { [QPU_COND_BRANCH_ALL_ZS] = "all_zs", [QPU_COND_BRANCH_ALL_ZC] = "all_zc", @@ -302,34 +176,6 @@ static const char *qpu_branch_cond_str[] = { [QPU_COND_BRANCH_ALWAYS] = "always", }; -//QPU ADD instruction set -typedef enum { - QPU_A_NOP, - QPU_A_FADD, //float add - QPU_A_FSUB, - QPU_A_FMIN, - QPU_A_FMAX, - QPU_A_FMINABS, //float min(abs(x)) - QPU_A_FMAXABS, - QPU_A_FTOI, //convert float to int - QPU_A_ITOF, //convert int to float - QPU_A_ADD = 12, //int add - QPU_A_SUB, - QPU_A_SHR, //int shift right - QPU_A_ASR, //int arithmetic shift right - QPU_A_ROR, //int rotate right - QPU_A_SHL, //int shift left - QPU_A_MIN, - QPU_A_MAX, - QPU_A_AND, - QPU_A_OR, - QPU_A_XOR, - QPU_A_NOT, - QPU_A_CLZ, //int count leading zeroes - QPU_A_V8ADDS = 30, //add with saturation per 8bit element - QPU_A_V8SUBS = 31, -} qpu_op_add; - static const char *qpu_op_add_str[] = { [QPU_A_NOP] = "nop", [QPU_A_FADD] = "fadd", @@ -357,18 +203,6 @@ static const char *qpu_op_add_str[] = { [QPU_A_V8SUBS] = "v8subs", }; -//QPU MUL instruction set -typedef enum { - QPU_M_NOP, - QPU_M_FMUL, //float mul - QPU_M_MUL24, //24bit int mul? - QPU_M_V8MULD, //mul two vectors of 8bit ints in range [0...1] - QPU_M_V8MIN, - QPU_M_V8MAX, - QPU_M_V8ADDS, //add two vectors of 8bit ints in range [0...1] with saturation - QPU_M_V8SUBS, -} qpu_op_mul; - static const char *qpu_op_mul_str[] = { [QPU_M_NOP] = "nop", [QPU_M_FMUL] = "fmul", @@ -383,25 +217,6 @@ static const char *qpu_op_mul_str[] = { //read and write ops may mean different things... //hence two maps -//QPU register address read map -typedef enum { - QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */ - /* 0-31 are the plain regfile a or b fields */ - QPU_R_UNIF = 32, //uniform read - QPU_R_VARY = 35, //varying read - QPU_R_ELEM_QPU = 38, //element number - QPU_R_NOP, - QPU_R_XY_PIXEL_COORD = 41, // X for regfile a, Y for regfile b - QPU_R_MS_FLAGS = 42, //A reg - QPU_R_REV_FLAG = 42, //B reg - QPU_R_VPM = 48, - QPU_R_VPM_LD_BUSY = 49, //load busy for reg A - QPU_R_VPM_ST_BUSY = 49, //store busy for reg B - QPU_R_VPM_LD_WAIT = 50, //load wait for reg A - QPU_R_VPM_ST_WAIT = 50, //store wait for reg B - QPU_R_MUTEX_ACQUIRE, -} qpu_raddr; - static const char *qpu_raddr_str[][52] = { { //A //ra0-31 @@ -433,44 +248,6 @@ static const char *qpu_raddr_str[][52] = { } }; -//QPU register address write map -typedef enum { - /* 0-31 are the plain regfile a or b fields */ - QPU_W_ACC0 = 32, //accumulation 0, aka r0 - QPU_W_ACC1, - QPU_W_ACC2, - QPU_W_ACC3, - QPU_W_TMU_NOSWAP, - QPU_W_ACC5, //replicate pixel0 per quad for reg A, replicate SIMD element0 for reg B - QPU_W_HOST_INT, //host interrupt - QPU_W_NOP, - QPU_W_UNIFORMS_ADDRESS, - QPU_W_QUAD_XY, // X for regfile a, Y for regfile b - QPU_W_MS_FLAGS = 42, //A reg - QPU_W_REV_FLAG = 42, //B reg - QPU_W_TLB_STENCIL_SETUP = 43, - QPU_W_TLB_Z, - QPU_W_TLB_COLOR_MS, - QPU_W_TLB_COLOR_ALL, - QPU_W_TLB_ALPHA_MASK, - QPU_W_VPM, - QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */ - QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */ - QPU_W_MUTEX_RELEASE, - QPU_W_SFU_RECIP, //special function unit 1/x - QPU_W_SFU_RECIPSQRT, //1/sqrt(x) - QPU_W_SFU_EXP, - QPU_W_SFU_LOG, - QPU_W_TMU0_S, - QPU_W_TMU0_T, - QPU_W_TMU0_R, - QPU_W_TMU0_B, - QPU_W_TMU1_S, - QPU_W_TMU1_T, - QPU_W_TMU1_R, - QPU_W_TMU1_B, -} qpu_waddr; - static const char *qpu_waddr_str[][64] = { { //A //ra0-31 diff --git a/QPUassembler/vc4_qpu_enums.h b/QPUassembler/vc4_qpu_enums.h new file mode 100644 index 0000000..2bfcaab --- /dev/null +++ b/QPUassembler/vc4_qpu_enums.h @@ -0,0 +1,258 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_QPU_ENUMS_H +#define VC4_QPU_ENUMS_H + +#include +#include + +typedef enum{ + QPU_ALU, + QPU_SEM, + QPU_BRANCH, + QPU_LOAD_IMM +} qpu_alu_type; + +typedef enum{ + QPU_LOAD32, + QPU_LOAD16 +} qpu_load_type; + +//Condition Codes +//The QPU keeps a set of N, Z and C flag bits per 16 SIMD element. These flags are updated based on the result +//of the ADD ALU if the ‘sf’ bit is set. If the sf bit is set and the ADD ALU executes a NOP or its condition code was +//NEVER, flags are set based upon the result of the MUL ALU result. +typedef enum { + QPU_COND_NEVER, + QPU_COND_ALWAYS, + QPU_COND_ZS, //set + QPU_COND_ZC, //clear + QPU_COND_NS, + QPU_COND_NC, + QPU_COND_CS, + QPU_COND_CC, +} qpu_cond; + +//ALU Input muxes +//selects one register for input +//The add_a, add_b, mul_a, and mul_b fields specify the input data for the A and B ports of the ADD and MUL +//pipelines, respectively +typedef enum { + /* hardware mux values */ + QPU_MUX_R0, + QPU_MUX_R1, + QPU_MUX_R2, + QPU_MUX_R3, + QPU_MUX_R4, //special purpose, read only + QPU_MUX_R5, //special purpose + QPU_MUX_A, + QPU_MUX_B, +} qpu_mux; + +//Signaling Bits +//The 4-bit signaling field signal is connected to the 3d pipeline and is set to indicate one of a number of +//conditions to the 3d hardware. Values from this field are also used to encode a ‘BKPT’ instruction, and to +//encode Branches and Load Immediate instructions. +typedef enum { + QPU_SIG_SW_BREAKPOINT, + QPU_SIG_NONE, + QPU_SIG_THREAD_SWITCH, + QPU_SIG_PROG_END, + QPU_SIG_WAIT_FOR_SCOREBOARD, //stall until this QPU can safely access tile buffer + QPU_SIG_SCOREBOARD_UNLOCK, + QPU_SIG_LAST_THREAD_SWITCH, + QPU_SIG_COVERAGE_LOAD, //from tile buffer to r4 + QPU_SIG_COLOR_LOAD, //from tile buffer to r4 + QPU_SIG_COLOR_LOAD_END, //color load and program end + QPU_SIG_LOAD_TMU0, //read data from TMU0 to r4 + QPU_SIG_LOAD_TMU1, //read data from TMU1 to r4 + QPU_SIG_ALPHA_MASK_LOAD, //from tile buffer to r4 + QPU_SIG_SMALL_IMM, //ALU instruction with raddr_b specifying small immediate or vector rotate + QPU_SIG_LOAD_IMM, //load immediate instruction + QPU_SIG_BRANCH +} qpu_sig_bits; + +//QPU unpack values +//(can be used to unpack from r4 too) +typedef enum { + QPU_UNPACK_NOP, + QPU_UNPACK_16A, //from A reg: convert 16bit float to 32bit float, or 16bit int to 32bit int, depending on the instruction + QPU_UNPACK_16B, + QPU_UNPACK_8D_REP, //replicate most significant byte (alpha) across word: {a, a, a, a} + QPU_UNPACK_8A, //convert 8bit color in range [0...1] to 32bit float or 32bit int, depending on the instruction + QPU_UNPACK_8B, + QPU_UNPACK_8C, + QPU_UNPACK_8D, +} qpu_unpack; + +//QPU pack regfile A +typedef enum { + QPU_PACK_A_NOP, + QPU_PACK_A_16A, //convert to 16 bit float if float input, or to int16 (just takes least significant 16bits) + QPU_PACK_A_16B, + QPU_PACK_A_8888, //convert to 8bit uint (just takes least significant 8bits) and replicate across all bytes of 32bit word + QPU_PACK_A_8A, // Convert to 8-bit unsigned int. (just takes least significant 8bits) + QPU_PACK_A_8B, + QPU_PACK_A_8C, + QPU_PACK_A_8D, + + // Saturating variants of the previous instructions. + QPU_PACK_A_32_SAT, //saturate signed 32bit number (takes into account overflow/carry flags) + QPU_PACK_A_16A_SAT, //convert to 16bit float if float input, or int16, depending on input (with saturation) + QPU_PACK_A_16B_SAT, + QPU_PACK_A_8888_SAT, //convert to uint8 with saturation and replicate across all bytes of 32bit word + QPU_PACK_A_8A_SAT, //conver to uint8 with saturation + QPU_PACK_A_8B_SAT, + QPU_PACK_A_8C_SAT, + QPU_PACK_A_8D_SAT, +} qpu_pack_a; + +//QPU pack MUL ALU values +typedef enum { + QPU_PACK_MUL_NOP, + QPU_PACK_MUL_8888 = 3, // converts mul float result to 8bit color in range [0...1] and replicate across all bytes of 32bit word + QPU_PACK_MUL_8A, // converts mul float result to 8bit color in range [0...1] + QPU_PACK_MUL_8B, + QPU_PACK_MUL_8C, + QPU_PACK_MUL_8D, +} qpu_pack_mul; + +typedef enum { + QPU_COND_BRANCH_ALL_ZS, //all z flags set + QPU_COND_BRANCH_ALL_ZC, //all z flags clear + QPU_COND_BRANCH_ANY_ZS, + QPU_COND_BRANCH_ANY_ZC, + QPU_COND_BRANCH_ALL_NS, + QPU_COND_BRANCH_ALL_NC, + QPU_COND_BRANCH_ANY_NS, + QPU_COND_BRANCH_ANY_NC, + QPU_COND_BRANCH_ALL_CS, + QPU_COND_BRANCH_ALL_CC, + QPU_COND_BRANCH_ANY_CS, + QPU_COND_BRANCH_ANY_CC, + + QPU_COND_BRANCH_ALWAYS = 15 //always execute +} qpu_branch_cond; + +//QPU ADD instruction set +typedef enum { + QPU_A_NOP, + QPU_A_FADD, //float add + QPU_A_FSUB, + QPU_A_FMIN, + QPU_A_FMAX, + QPU_A_FMINABS, //float min(abs(x)) + QPU_A_FMAXABS, + QPU_A_FTOI, //convert float to int + QPU_A_ITOF, //convert int to float + QPU_A_ADD = 12, //int add + QPU_A_SUB, + QPU_A_SHR, //int shift right + QPU_A_ASR, //int arithmetic shift right + QPU_A_ROR, //int rotate right + QPU_A_SHL, //int shift left + QPU_A_MIN, + QPU_A_MAX, + QPU_A_AND, + QPU_A_OR, + QPU_A_XOR, + QPU_A_NOT, + QPU_A_CLZ, //int count leading zeroes + QPU_A_V8ADDS = 30, //add with saturation per 8bit element + QPU_A_V8SUBS = 31, +} qpu_op_add; + +//QPU MUL instruction set +typedef enum { + QPU_M_NOP, + QPU_M_FMUL, //float mul + QPU_M_MUL24, //24bit int mul? + QPU_M_V8MULD, //mul two vectors of 8bit ints in range [0...1] + QPU_M_V8MIN, + QPU_M_V8MAX, + QPU_M_V8ADDS, //add two vectors of 8bit ints in range [0...1] with saturation + QPU_M_V8SUBS, +} qpu_op_mul; + +//read and write ops may mean different things... +//hence two maps + +//QPU register address read map +typedef enum { + QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */ + /* 0-31 are the plain regfile a or b fields */ + QPU_R_UNIF = 32, //uniform read + QPU_R_VARY = 35, //varying read + QPU_R_ELEM_QPU = 38, //element number + QPU_R_NOP, + QPU_R_XY_PIXEL_COORD = 41, // X for regfile a, Y for regfile b + QPU_R_MS_FLAGS = 42, //A reg + QPU_R_REV_FLAG = 42, //B reg + QPU_R_VPM = 48, + QPU_R_VPM_LD_BUSY = 49, //load busy for reg A + QPU_R_VPM_ST_BUSY = 49, //store busy for reg B + QPU_R_VPM_LD_WAIT = 50, //load wait for reg A + QPU_R_VPM_ST_WAIT = 50, //store wait for reg B + QPU_R_MUTEX_ACQUIRE, +} qpu_raddr; + +//QPU register address write map +typedef enum { + /* 0-31 are the plain regfile a or b fields */ + QPU_W_ACC0 = 32, //accumulation 0, aka r0 + QPU_W_ACC1, + QPU_W_ACC2, + QPU_W_ACC3, + QPU_W_TMU_NOSWAP, + QPU_W_ACC5, //replicate pixel0 per quad for reg A, replicate SIMD element0 for reg B + QPU_W_HOST_INT, //host interrupt + QPU_W_NOP, + QPU_W_UNIFORMS_ADDRESS, + QPU_W_QUAD_XY, // X for regfile a, Y for regfile b + QPU_W_MS_FLAGS = 42, //A reg + QPU_W_REV_FLAG = 42, //B reg + QPU_W_TLB_STENCIL_SETUP = 43, + QPU_W_TLB_Z, + QPU_W_TLB_COLOR_MS, + QPU_W_TLB_COLOR_ALL, + QPU_W_TLB_ALPHA_MASK, + QPU_W_VPM, + QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */ + QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */ + QPU_W_MUTEX_RELEASE, + QPU_W_SFU_RECIP, //special function unit 1/x + QPU_W_SFU_RECIPSQRT, //1/sqrt(x) + QPU_W_SFU_EXP, + QPU_W_SFU_LOG, + QPU_W_TMU0_S, + QPU_W_TMU0_T, + QPU_W_TMU0_R, + QPU_W_TMU0_B, + QPU_W_TMU1_S, + QPU_W_TMU1_T, + QPU_W_TMU1_R, + QPU_W_TMU1_B, +} qpu_waddr; + +#endif /* VC4_QPU_ENUMS_H */ diff --git a/driver/common.h b/driver/common.h index 7586a41..ca7e151 100644 --- a/driver/common.h +++ b/driver/common.h @@ -255,7 +255,7 @@ typedef struct VkShaderModule_T { uint32_t bos[RPI_ASSEMBLY_TYPE_MAX]; uint32_t sizes[RPI_ASSEMBLY_TYPE_MAX]; - uint64_t* instructions[RPI_ASSEMBLY_TYPE_MAX]; + //uint64_t* instructions[RPI_ASSEMBLY_TYPE_MAX]; VkRpiAssemblyMappingEXT* mappings; uint32_t numMappings; uint32_t hasThreadSwitch; diff --git a/driver/copy.c b/driver/copy.c index cea6861..f266f6b 100644 --- a/driver/copy.c +++ b/driver/copy.c @@ -1,7 +1,8 @@ #include "common.h" #include "declarations.h" -#include "vkExtFunctions.h" + +#include "QPUassembler/qpu_assembler.h" //TODO change allocations to pool allocator @@ -600,18 +601,68 @@ void createBufferToTextureShaderModule(VkDevice device, VkShaderModule* blitShad } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = blit_asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = blit_mappings; shaderModuleCreateInfo.numMappings = sizeof(blit_mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = blitShaderModule; - ((_device*)device)->dev->customData = (uintptr_t)&shaderModuleCreateInfo; + //TODO use allocator - //PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + //TODO this alloc feels kinda useless, we just copy the data anyway to kernel space + //why not map kernel space mem to user space instead? + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - VkResult res = rpi_vkCreateShaderModuleFromRpiAssemblyEXT(((_device*)device)->dev); + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + //TODO this alloc feels kinda useless, we just copy the data anyway to kernel space + //why not map kernel space mem to user space instead? + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } + + { //assemble fs code + asm_sizes[2] = get_num_instructions(blit_fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + //TODO this alloc feels kinda useless, we just copy the data anyway to kernel space + //why not map kernel space mem to user space instead? + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(blit_fs_asm_code, asm_ptrs[2]); + } + + asm_sizes[3] = 0; + asm_ptrs[3] = 0; + + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + rpi_vkCreateShaderModule(device, &smci, 0, blitShaderModule); assert(blitShaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitShaderModule) @@ -819,16 +870,68 @@ void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitSha }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = blit_asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = blit_mappings; shaderModuleCreateInfo.numMappings = sizeof(blit_mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = blitShaderModule; - ((_device*)device)->dev->customData = (uintptr_t)&shaderModuleCreateInfo; + //TODO use allocator - VkResult res = rpi_vkCreateShaderModuleFromRpiAssemblyEXT(((_device*)device)->dev); + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + //TODO this alloc feels kinda useless, we just copy the data anyway to kernel space + //why not map kernel space mem to user space instead? + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } + + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + //TODO this alloc feels kinda useless, we just copy the data anyway to kernel space + //why not map kernel space mem to user space instead? + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } + + { //assemble fs code + asm_sizes[2] = get_num_instructions(sample_fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + //TODO this alloc feels kinda useless, we just copy the data anyway to kernel space + //why not map kernel space mem to user space instead? + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(sample_fs_asm_code, asm_ptrs[2]); + } + + asm_sizes[3] = 0; + asm_ptrs[3] = 0; + + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + rpi_vkCreateShaderModule(device, &smci, 0, blitShaderModule); assert(blitShaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } void setupEmulationResources(VkDevice device) diff --git a/driver/instance.c b/driver/instance.c index 633baa0..e6fe45e 100644 --- a/driver/instance.c +++ b/driver/instance.c @@ -3,7 +3,6 @@ #include #include "declarations.h" -#include "vkExtFunctions.h" #define RETFUNC(f) if(!strcmp(pName, #f)) return &rpi_##f @@ -37,8 +36,8 @@ VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(VkInsta { void* ptr = 0; - if(!strcmp(pName, "vkCreateShaderModuleFromRpiAssemblyEXT")) - ptr = &rpi_vkCreateShaderModuleFromRpiAssemblyEXT; +// if(!strcmp(pName, "vkCreateShaderModuleFromRpiAssemblyEXT")) +// ptr = &rpi_vkCreateShaderModuleFromRpiAssemblyEXT; return ptr; } diff --git a/driver/shader.c b/driver/shader.c index f57a4e2..442a991 100644 --- a/driver/shader.c +++ b/driver/shader.c @@ -6,13 +6,112 @@ #include "vkExt.h" +//TODO collect shader performance data +//eg number of texture samples etc. +//TODO check if shader has flow control and make sure instance also has flow control +//TODO make sure instance has threaded fs if shader contains thread switch + /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateShaderModule */ VkResult rpi_vkCreateShaderModule(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule) { - UNSUPPORTED(vkCreateShaderModule); - return UNSUPPORTED_RETURN; + uint32_t magic = pCreateInfo->pCode[2]; + VkRpiShaderModuleAssemblyCreateInfoEXT* ci = pCreateInfo->pCode[4]; + + //shader magic doesn't add up + if(magic != 0x14E45250) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + assert(ci); + assert(pShaderModule); + assert(ci->instructions); + + _shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if(!shader) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + shader->hasThreadSwitch = 0; + + for(int c = 0; c < RPI_ASSEMBLY_TYPE_MAX; ++c) + { + if(ci->instructions[c]) + { + for(uint64_t d = 0; d < ci->numInstructions[c]; ++d) + { + uint64_t s = (ci->instructions[c][d] & (0xfll << 60)) >> 60; + if(s == 2ll) + { + shader->hasThreadSwitch = 1; + break; + } + } + + shader->numVaryings = 0; + for(uint64_t d = 0; d < ci->numInstructions[c]; ++d) + { + unsigned is_sem = ((ci->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74; + unsigned sig_bits = ((ci->instructions[c][d] & (0xfll << 60)) >> 60); + + //if it's an ALU instruction + if(!is_sem && sig_bits != 14 && sig_bits != 15) + { + unsigned raddr_a = ((ci->instructions[c][d] & (0x3fll << 18)) >> 18); + unsigned raddr_b = ((ci->instructions[c][d] & (0x3fll << 12)) >> 12); + + if(raddr_a == 35) + { + shader->numVaryings++; + } + + //don't count small immediates + if(sig_bits != 13 && raddr_b == 35) + { + shader->numVaryings++; + } + } + } + + shader->sizes[c] = ci->numInstructions[c]*sizeof(uint64_t); + + + for(uint64_t e = 0; e < shader->sizes[c] / 8; ++e) + { + printf("%#llx ", ci->instructions[c][e]); + disassemble_qpu_asm(ci->instructions[c][e]); + } + printf("\n"); + shader->bos[c] = vc4_bo_alloc_shader(controlFd, ci->instructions[c], &shader->sizes[c]); + } + else + { + shader->bos[c] = 0; + shader->sizes[c] = 0; + } + } + + shader->numMappings = ci->numMappings; + + if(ci->numMappings > 0) + { + shader->mappings = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if(!shader->mappings) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + memcpy(shader->mappings, ci->mappings, sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings); + } + + *pShaderModule = shader; + + return VK_SUCCESS; } void rpi_vkDestroyShaderModule(VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks* pAllocator) diff --git a/driver/vkExt.h b/driver/vkExt.h index 8a387e1..d9c6533 100644 --- a/driver/vkExt.h +++ b/driver/vkExt.h @@ -84,11 +84,10 @@ typedef struct VkRpiAssemblyMappingEXT { typedef struct VkRpiShaderModuleAssemblyCreateInfoEXT { VkStructureType sType; const void* pNext; - char** asmStrings; + uint64_t** instructions; + uint32_t* numInstructions; VkRpiAssemblyMappingEXT* mappings; uint32_t numMappings; - const VkAllocationCallbacks* pAllocator; - VkShaderModule* pShaderModule; } VkRpiShaderModuleAssemblyCreateInfoEXT; #ifdef __cplusplus diff --git a/driver/vkExtFunctions.c b/driver/vkExtFunctions.c deleted file mode 100644 index bbcddfa..0000000 --- a/driver/vkExtFunctions.c +++ /dev/null @@ -1,135 +0,0 @@ -#pragma once - -#include "common.h" -#include "QPUassembler/qpu_assembler.h" -#include "modeset.h" -#include "vkExtFunctions.h" - -#ifdef __cplusplus -extern "C" { -#endif - -//TODO collect shader performance data -//eg number of texture samples etc. -//TODO check if shader has flow control and make sure instance also has flow control -//TODO make sure instance has threaded fs if shader contains thread switch - -VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(VkPhysicalDevice physicalDevice) -{ - assert(physicalDevice); - - _physicalDevice* ptr = physicalDevice; - VkRpiShaderModuleAssemblyCreateInfoEXT* ci = ptr->customData; - const const VkAllocationCallbacks* pAllocator = ci->pAllocator; - - assert(ci); - assert(ci->pShaderModule); - assert(ci->asmStrings); - - _shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if(!shader) - { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - shader->hasThreadSwitch = 0; - - for(int c = 0; c < RPI_ASSEMBLY_TYPE_MAX; ++c) - { - if(ci->asmStrings[c]) - { - uint32_t numInstructions = get_num_instructions(ci->asmStrings[c]); - uint32_t size = sizeof(uint64_t)*numInstructions; - //TODO this alloc feels kinda useless, we just copy the data anyway to kernel space - //why not map kernel space mem to user space instead? - shader->instructions[c] = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if(!shader->instructions[c]) - { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - //need to create a temporary copy as the assembly algorithm is destructive - uint32_t stringLength = strlen(ci->asmStrings[c]); - char* tmpShaderStr = ALLOCATE(stringLength+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - memcpy(tmpShaderStr, ci->asmStrings[c], stringLength+1); - - assemble_qpu_asm(tmpShaderStr, shader->instructions[c]); - - FREE(tmpShaderStr); - - for(uint64_t d = 0; d < numInstructions; ++d) - { - uint64_t s = (shader->instructions[c][d] & (0xfll << 60)) >> 60; - if(s == 2ll) - { - shader->hasThreadSwitch = 1; - break; - } - } - - shader->numVaryings = 0; - for(uint64_t d = 0; d < numInstructions; ++d) - { - unsigned is_sem = ((shader->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74; - unsigned sig_bits = ((shader->instructions[c][d] & (0xfll << 60)) >> 60); - - //if it's an ALU instruction - if(!is_sem && sig_bits != 14 && sig_bits != 15) - { - unsigned raddr_a = ((shader->instructions[c][d] & (0x3fll << 18)) >> 18); - unsigned raddr_b = ((shader->instructions[c][d] & (0x3fll << 12)) >> 12); - - if(raddr_a == 35) - { - shader->numVaryings++; - } - - //don't count small immediates - if(sig_bits != 13 && raddr_b == 35) - { - shader->numVaryings++; - } - } - } - - shader->sizes[c] = size; - - - for(uint64_t e = 0; e < shader->sizes[c] / 8; ++e) - { - printf("%#llx ", shader->instructions[c][e]); - disassemble_qpu_asm(shader->instructions[c][e]); - } - printf("\n"); - shader->bos[c] = vc4_bo_alloc_shader(controlFd, shader->instructions[c], &shader->sizes[c]); - } - else - { - shader->bos[c] = 0; - shader->sizes[c] = 0; - } - } - - shader->numMappings = ci->numMappings; - - if(ci->numMappings > 0) - { - shader->mappings = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if(!shader->mappings) - { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - memcpy(shader->mappings, ci->mappings, sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings); - } - - *ci->pShaderModule = shader; - - return VK_SUCCESS; -} - -#ifdef __cplusplus -} -#endif diff --git a/driver/vkExtFunctions.h b/driver/vkExtFunctions.h deleted file mode 100644 index d2a0a62..0000000 --- a/driver/vkExtFunctions.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include "vkExt.h" - -#ifdef __cplusplus -extern "C" { -#endif - -//extension that allows developers to submit QPU assembly directly and thus hand optimise code -extern VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT( - VkPhysicalDevice physicalDevice); - -//TODO performance counters / perfmon - - -#ifdef __cplusplus -} -#endif diff --git a/test/ETC/CMakeLists.txt b/test/ETC/CMakeLists.txt index 7e9502c..b3ff028 100644 --- a/test/ETC/CMakeLists.txt +++ b/test/ETC/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(ETC ${testSrc} ) target_compile_options(ETC PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(ETC vulkan) +target_link_libraries(ETC vulkan $) diff --git a/test/ETC/ETC.cpp b/test/ETC/ETC.cpp index 88c2664..18c7eee 100644 --- a/test/ETC/ETC.cpp +++ b/test/ETC/ETC.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1178,23 +1179,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = sample_asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = sample_mappings; shaderModuleCreateInfo.numMappings = sizeof(sample_mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &sampleShaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(sample_fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(sample_fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(sampleShaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; - //exit(-1); + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &sampleShaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/HDR/CMakeLists.txt b/test/HDR/CMakeLists.txt index df0454e..173ef19 100644 --- a/test/HDR/CMakeLists.txt +++ b/test/HDR/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(HDR ${testSrc} ) target_compile_options(HDR PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(HDR vulkan) +target_link_libraries(HDR vulkan $) diff --git a/test/HDR/HDR.cpp b/test/HDR/HDR.cpp index 1276611..8bc524a 100644 --- a/test/HDR/HDR.cpp +++ b/test/HDR/HDR.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1188,28 +1189,68 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = sample_asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = sample_mappings; shaderModuleCreateInfo.numMappings = sizeof(sample_mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &sampleShaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(sample_fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(sample_fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(sampleShaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; - shaderModuleCreateInfo.asmStrings = hdr_asm_strings; - shaderModuleCreateInfo.pShaderModule = &HDRshaderModule; - res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(HDRshaderModule); + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; - //exit(-1); + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &sampleShaderModule); + + { //assemble fs code + asm_sizes[2] = get_num_instructions(hdr_fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(hdr_fs_asm_code, asm_ptrs[2]); + } + + vkCreateShaderModule(device, &smci, 0, &HDRshaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/MSAA/CMakeLists.txt b/test/MSAA/CMakeLists.txt index 684ed4f..de17666 100644 --- a/test/MSAA/CMakeLists.txt +++ b/test/MSAA/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(MSAA ${testSrc} ) target_compile_options(MSAA PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(MSAA vulkan) +target_link_libraries(MSAA vulkan $) diff --git a/test/MSAA/MSAA.cpp b/test/MSAA/MSAA.cpp index b187b55..e46f8c0 100644 --- a/test/MSAA/MSAA.cpp +++ b/test/MSAA/MSAA.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1068,21 +1069,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = mappings; shaderModuleCreateInfo.numMappings = sizeof(mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &shaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(shaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; + + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &shaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/blending/CMakeLists.txt b/test/blending/CMakeLists.txt index 6de9b53..a2b767f 100644 --- a/test/blending/CMakeLists.txt +++ b/test/blending/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(blending ${testSrc} ) target_compile_options(blending PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(blending vulkan) +target_link_libraries(blending vulkan $) diff --git a/test/blending/blending.cpp b/test/blending/blending.cpp index 570acc5..aacbe61 100644 --- a/test/blending/blending.cpp +++ b/test/blending/blending.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1077,21 +1078,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = mappings; shaderModuleCreateInfo.numMappings = sizeof(mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &shaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(shaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; + + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &shaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/clear/CMakeLists.txt b/test/clear/CMakeLists.txt index e090b65..6465982 100644 --- a/test/clear/CMakeLists.txt +++ b/test/clear/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(clear ${testSrc}) target_compile_options(clear PRIVATE -Wall -std=c++11) -target_link_libraries(clear vulkan) +target_link_libraries(clear vulkan $) diff --git a/test/cubemapping/CMakeLists.txt b/test/cubemapping/CMakeLists.txt index 48221c3..ffbadaa 100644 --- a/test/cubemapping/CMakeLists.txt +++ b/test/cubemapping/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(cubemapping ${testSrc} ) target_compile_options(cubemapping PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(cubemapping vulkan) +target_link_libraries(cubemapping vulkan $) diff --git a/test/cubemapping/cubemapping.cpp b/test/cubemapping/cubemapping.cpp index 43e6117..0e874fd 100644 --- a/test/cubemapping/cubemapping.cpp +++ b/test/cubemapping/cubemapping.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1119,23 +1120,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = sample_asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = sample_mappings; shaderModuleCreateInfo.numMappings = sizeof(sample_mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &sampleShaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(sample_fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(sample_fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(sampleShaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; - //exit(-1); + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &sampleShaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/depthTest/CMakeLists.txt b/test/depthTest/CMakeLists.txt index 8cd3100..12d5313 100644 --- a/test/depthTest/CMakeLists.txt +++ b/test/depthTest/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(depthTest ${testSrc} ) target_compile_options(depthTest PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(depthTest vulkan) +target_link_libraries(depthTest vulkan $) diff --git a/test/depthTest/depthTest.cpp b/test/depthTest/depthTest.cpp index afe42ef..c4a1f58 100644 --- a/test/depthTest/depthTest.cpp +++ b/test/depthTest/depthTest.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1197,21 +1198,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = mappings; shaderModuleCreateInfo.numMappings = sizeof(mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &shaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(shaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; + + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &shaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/depthTex/CMakeLists.txt b/test/depthTex/CMakeLists.txt index 0d0b654..1d90ac8 100644 --- a/test/depthTex/CMakeLists.txt +++ b/test/depthTex/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(depthTex ${testSrc} ) target_compile_options(depthTex PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(depthTex vulkan) +target_link_libraries(depthTex vulkan $) diff --git a/test/depthTex/depthTex.cpp b/test/depthTex/depthTex.cpp index 164f609..9b8454d 100644 --- a/test/depthTex/depthTex.cpp +++ b/test/depthTex/depthTex.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1114,23 +1115,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = sample_asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = sample_mappings; shaderModuleCreateInfo.numMappings = sizeof(sample_mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &sampleShaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(sample_fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(sample_fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(sampleShaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; - //exit(-1); + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &sampleShaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/indexedTriangle/CMakeLists.txt b/test/indexedTriangle/CMakeLists.txt index 2c3cdc7..4c724e7 100644 --- a/test/indexedTriangle/CMakeLists.txt +++ b/test/indexedTriangle/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(indexedTriangle ${testSrc} ) target_compile_options(indexedTriangle PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(indexedTriangle vulkan) +target_link_libraries(indexedTriangle vulkan $) diff --git a/test/indexedTriangle/indexedTriangle.cpp b/test/indexedTriangle/indexedTriangle.cpp index f0fc2ab..95f0d60 100644 --- a/test/indexedTriangle/indexedTriangle.cpp +++ b/test/indexedTriangle/indexedTriangle.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1024,21 +1025,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = mappings; shaderModuleCreateInfo.numMappings = sizeof(mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &shaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(shaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; + + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &shaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/mipmapping/CMakeLists.txt b/test/mipmapping/CMakeLists.txt index 77f98f8..e81f36e 100644 --- a/test/mipmapping/CMakeLists.txt +++ b/test/mipmapping/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(mipmapping ${testSrc} ) target_compile_options(mipmapping PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(mipmapping vulkan) +target_link_libraries(mipmapping vulkan $) diff --git a/test/mipmapping/mipmapping.cpp b/test/mipmapping/mipmapping.cpp index 62ca7a3..7918b06 100644 --- a/test/mipmapping/mipmapping.cpp +++ b/test/mipmapping/mipmapping.cpp @@ -8,6 +8,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1139,23 +1140,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = sample_asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = sample_mappings; shaderModuleCreateInfo.numMappings = sizeof(sample_mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &sampleShaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(sample_fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(sample_fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(sampleShaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; - //exit(-1); + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &sampleShaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/query/CMakeLists.txt b/test/query/CMakeLists.txt index 624b825..29d5a7d 100644 --- a/test/query/CMakeLists.txt +++ b/test/query/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(query ${testSrc} ) target_compile_options(query PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(query vulkan) +target_link_libraries(query vulkan $) diff --git a/test/query/query.cpp b/test/query/query.cpp index a41283f..eedc244 100644 --- a/test/query/query.cpp +++ b/test/query/query.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1091,21 +1092,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = mappings; shaderModuleCreateInfo.numMappings = sizeof(mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &shaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(shaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; + + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &shaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/stencilTest/CMakeLists.txt b/test/stencilTest/CMakeLists.txt index 03d4315..a1614e5 100644 --- a/test/stencilTest/CMakeLists.txt +++ b/test/stencilTest/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(stencilTest ${testSrc} ) target_compile_options(stencilTest PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(stencilTest vulkan) +target_link_libraries(stencilTest vulkan $) diff --git a/test/stencilTest/stencilTest.cpp b/test/stencilTest/stencilTest.cpp index 75482e7..367fd5c 100644 --- a/test/stencilTest/stencilTest.cpp +++ b/test/stencilTest/stencilTest.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1212,27 +1213,68 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = mappings; shaderModuleCreateInfo.numMappings = sizeof(mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &shaderModule1; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(shaderModule1); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; - shaderModuleCreateInfo.pShaderModule = &shaderModule2; - asm_strings[2] = (char*)fs_asm_code2; + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; - res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(shaderModule2); + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &shaderModule1); + + { //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code2); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code2, asm_ptrs[2]); + } + + vkCreateShaderModule(device, &smci, 0, &shaderModule2); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/texturing/CMakeLists.txt b/test/texturing/CMakeLists.txt index c5f4284..4147e9c 100644 --- a/test/texturing/CMakeLists.txt +++ b/test/texturing/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(texturing ${testSrc} ) target_compile_options(texturing PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(texturing vulkan) +target_link_libraries(texturing vulkan $) diff --git a/test/texturing/texturing.cpp b/test/texturing/texturing.cpp index 530e0da..058fd50 100644 --- a/test/texturing/texturing.cpp +++ b/test/texturing/texturing.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1096,23 +1097,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = sample_asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = sample_mappings; shaderModuleCreateInfo.numMappings = sizeof(sample_mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &sampleShaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(sample_fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(sample_fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(sampleShaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; - //exit(-1); + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &sampleShaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/triangle/CMakeLists.txt b/test/triangle/CMakeLists.txt index 2d150d5..d3c1769 100644 --- a/test/triangle/CMakeLists.txt +++ b/test/triangle/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(triangle ${testSrc} ) target_compile_options(triangle PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(triangle vulkan) +target_link_libraries(triangle vulkan $) diff --git a/test/triangle/triangle.cpp b/test/triangle/triangle.cpp index 09d5063..fb83cab 100644 --- a/test/triangle/triangle.cpp +++ b/test/triangle/triangle.cpp @@ -8,6 +8,8 @@ #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" + //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR //#include @@ -132,10 +134,11 @@ void run() { void setupVulkan() { createInstance(); findPhysicalDevice(); - createWindowSurface(); - checkSwapChainSupport(); findQueueFamilies(); createLogicalDevice(); + CreateShaders(); + createWindowSurface(); + checkSwapChainSupport(); createSemaphores(); createSwapChain(); createCommandQueues(); @@ -143,7 +146,6 @@ void setupVulkan() { CreateFramebuffer(); CreateVertexBuffer(); //CreateUniformBuffer(); - CreateShaders(); CreatePipeline(); recordCommandBuffers(); } @@ -228,10 +230,18 @@ void createInstance() { "VK_LAYER_KHRONOS_validation" }; + VkValidationFeatureDisableEXT disabledFeatures[] = { + VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT, + VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT + }; + VkValidationFeaturesEXT validationFeatures = {}; + validationFeatures.sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT; + validationFeatures.disabledValidationFeatureCount = 2; + validationFeatures.pDisabledValidationFeatures = disabledFeatures; VkInstanceCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - createInfo.pNext = 0; + createInfo.pNext = &validationFeatures; createInfo.pApplicationInfo = &appInfo; createInfo.enabledExtensionCount = sizeof(enabledExtensions) / sizeof(const char*); createInfo.ppEnabledExtensionNames = enabledExtensions; @@ -376,7 +386,7 @@ void findQueueFamilies() { std::cout << "physical device has " << queueFamilyCount << " queue families" << std::endl; - bool foundGraphicsQueueFamily = false; + /*bool foundGraphicsQueueFamily = false; bool foundPresentQueueFamily = false; for (uint32_t i = 0; i < queueFamilyCount; i++) { @@ -414,7 +424,7 @@ void findQueueFamilies() { else { std::cerr << "could not find a valid queue family with graphics support" << std::endl; assert(0); - } + }*/ } void createLogicalDevice() { @@ -1067,21 +1077,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = mappings; shaderModuleCreateInfo.numMappings = sizeof(mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &shaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(shaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; + + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &shaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } } diff --git a/test/varyings/CMakeLists.txt b/test/varyings/CMakeLists.txt index 0f8e5dc..f332bc5 100644 --- a/test/varyings/CMakeLists.txt +++ b/test/varyings/CMakeLists.txt @@ -6,4 +6,4 @@ file(GLOB testSrc add_executable(varyings ${testSrc} ) target_compile_options(varyings PRIVATE -Wall -std=c++11 -std=c11) -target_link_libraries(varyings vulkan) +target_link_libraries(varyings vulkan $) diff --git a/test/varyings/varyings.cpp b/test/varyings/varyings.cpp index 8642a01..819ec86 100644 --- a/test/varyings/varyings.cpp +++ b/test/varyings/varyings.cpp @@ -7,6 +7,7 @@ #include #include "driver/vkExt.h" +#include "QPUassembler/qpu_assembler.h" //#define GLFW_INCLUDE_VULKAN //#define VK_USE_PLATFORM_WIN32_KHR @@ -1082,21 +1083,59 @@ void CreateShaders() } }; + uint32_t spirv[6]; + + uint64_t* asm_ptrs[4]; + uint32_t asm_sizes[4]; + VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; - shaderModuleCreateInfo.asmStrings = asm_strings; + shaderModuleCreateInfo.instructions = asm_ptrs; + shaderModuleCreateInfo.numInstructions = asm_sizes; shaderModuleCreateInfo.mappings = mappings; shaderModuleCreateInfo.numMappings = sizeof(mappings) / sizeof(VkRpiAssemblyMappingEXT); - shaderModuleCreateInfo.pShaderModule = &shaderModule; - LoaderTrampoline* trampoline = (LoaderTrampoline*)physicalDevice; - VkRpiPhysicalDevice* realPhysicalDevice = trampoline->loaderTerminator->physicalDevice; + { //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); + } - realPhysicalDevice->customData = (uintptr_t)&shaderModuleCreateInfo; + { //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); + } - PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT"); + { //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code, asm_ptrs[2]); + } - VkResult res = vkCreateShaderModuleFromRpiAssemblyEXT(physicalDevice); - assert(shaderModule); + asm_sizes[3] = 0; + asm_ptrs[3] = 0; + + spirv[0] = 0x07230203; + spirv[1] = 0x00010000; + spirv[2] = 0x14E45250; + spirv[3] = 1; + spirv[4] = (uint32_t)&shaderModuleCreateInfo; + //words start here + spirv[5] = 1 << 16; + + VkShaderModuleCreateInfo smci = {}; + smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + smci.codeSize = sizeof(uint32_t)*6; + smci.pCode = spirv; + vkCreateShaderModule(device, &smci, 0, &shaderModule); + + for(uint32_t c = 0; c < 4; ++c) + { + free(asm_ptrs[c]); + } }