diff --git a/driver/common.h b/driver/common.h
index f59b580..53eef23 100644
--- a/driver/common.h
+++ b/driver/common.h
@@ -220,6 +220,7 @@ typedef struct VkShaderModule_T
 	VkRpiAssemblyMappingEXT* mappings;
 	uint32_t numMappings;
 	uint32_t hasThreadSwitch;
+	uint32_t numVaryings;
 } _shaderModule;
 
 typedef struct VkDescriptorSetLayout_T
diff --git a/driver/draw.c b/driver/draw.c
index e1a770b..b0d1441 100644
--- a/driver/draw.c
+++ b/driver/draw.c
@@ -197,7 +197,7 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
 						 0, //TODO point size included in shaded vertex data?
 						 1, //enable clipping
 						 0, //TODO fragment number of used uniforms?
-						 0, //TODO fragment number of varyings?
+						 cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]->numVaryings, //fragment number of varyings
 						 0, //fragment uniform address?
 						 fragCode, //fragment code address
 						 0, //TODO vertex number of used uniforms?
diff --git a/driver/shader.c b/driver/shader.c
index 2175fd4..c8f57e9 100644
--- a/driver/shader.c
+++ b/driver/shader.c
@@ -67,6 +67,31 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
 				}
 			}
 
+			shader->numVaryings = 0;
+			for(uint64_t d = 0; d < numInstructions; ++d)
+			{
+				unsigned is_sem = ((instructions[d] & (0x7fll << 57)) >> 57) == 0x74;
+				unsigned sig_bits = ((instructions[d] & (0xfll << 60)) >> 60);
+
+				//if it's an ALU instruction
+				if(!is_sem && sig_bits != 14 && sig_bits != 15)
+				{
+					unsigned raddr_a = ((instructions[d] & (0x3fll << 18)) >> 18);
+					unsigned raddr_b = ((instructions[d] & (0x3fll << 12)) >> 12);
+
+					if(raddr_a == 35)
+					{
+						shader->numVaryings++;
+					}
+
+					//don't count small immediates
+					if(sig_bits != 13 && raddr_b == 35)
+					{
+						shader->numVaryings++;
+					}
+				}
+			}
+
 			printf("\n");
 
 			FREE(instructions);
diff --git a/test/varyings/varyings.cpp b/test/varyings/varyings.cpp
index dcc8c21..adb7879 100644
--- a/test/varyings/varyings.cpp
+++ b/test/varyings/varyings.cpp
@@ -845,7 +845,6 @@ void CreateShaders()
 0x500009e7009e7000 sig_unlock_score nop nop, r0, r0 ; nop nop, r0, r0
 
 VS prog 2/1 QPU:
-0xd002102702821f80 sig_small_imm fsub rb0, 2.0, uni ; nop nop, r0, r0
 0x00401a00:
 0000 0000 0‭100 0000 0001 1010 0000 0000‬
 ///addr: 0
@@ -854,6 +853,7 @@ VS prog 2/1 QPU:
 ///horizontal
 ///stride=1
 ///vectors to read = 4
+0xd002102702821f80 sig_small_imm fsub rb0, 2.0, uni ; nop nop, r0, r0
 0xe0024c6700401a00 load_imm vr_setup, nop, 0x00401a00 (0.000000)
 0x100049e220c20037 nop nop, r0, r0 ; fmul r2, vpm_read, uni
 0x100049e3209c0017 nop nop, r0, r0 ; fmul r3, r2, rb0
@@ -868,11 +868,30 @@ VS prog 2/1 QPU:
 0x10020c27159e7240 mov vpm, r1 ; nop nop, r0, r0
 0x300009e7009e7000 sig_end nop nop, r0, r0 ; nop nop, r0, r0
 0x100009e7009e7000 nop nop, r0, r0 ; nop nop, r0, r0
+0x100009e7009e7000 nop nop, r0, r0 ; nop nop, r0, r0
+
+CS prog 2/2 QPU:
+0xe0024c6700201a00 load_imm vr_setup, nop, 0x00201a00 (0.000000)
+0x100208a715c27d80 mov r2, vpm_read ; nop nop, r0, r0
+0xe0025c6700001a00 load_imm vw_setup, nop, 0x00001a00 (0.000000)
+0x100248f095c27d92 mov r3, vpm_read ; mov vpm, r2
+0x10024c21358276de mov vpm, r3 ; fmul r1, r3, uni
+0xd00208e702821f80 sig_small_imm fsub r3, 2.0, uni ; nop nop, r0, r0
+0x100049e220827016 nop nop, r0, r0 ; fmul r2, r2, uni
+0x100049e0209e7013 nop nop, r0, r0 ; fmul r0, r2, r3
+0x10124021279e700b ftoi ra0.16a, r0, r0 ; fmul r1, r1, r3
+0x10220027079e7240 ftoi ra0.16b, r1, r1 ; nop nop, r0, r0
+0xd0020c27159c0fc0 sig_small_imm mov vpm, 0 ; nop nop, r0, r0
+0xd0020c27159e0fc0 sig_small_imm mov vpm, 1.0 ; nop nop, r0, r0
+0x10020c2715027d80 mov vpm, ra0 ; nop nop, r0, r0
+0x10020c2715827d80 mov vpm, uni ; nop nop, r0, r0
+0x10020c27159e76c0 mov vpm, r3 ; nop nop, r0, r0
+0x300009e7009e7000 sig_end nop nop, r0, r0 ; nop nop, r0, r0
+0x100009e7009e7000 nop nop, r0, r0 ; nop nop, r0, r0
 0x100009e7009e7000 nop nop, r0, r0 ; nop nop, r0, r0
 	/**/
 
 
-	//TODO doesn't work for some reason...
 	char vs_asm_code[] =
 			///0x40000000 = 2.0
 			///uni = 1.0
@@ -994,7 +1013,7 @@ VS prog 2/1 QPU:
 			"sig_none ; nop = nop(r0, r0) ; r2 = fmul.always(r2, r3);\n"
 			"sig_none ; nop = nop.pm(r0, r0) ; r0.8b = fmul.always(r1, r2) ;"
 			"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0) ; r0.8a = v8min.always(b, b) ;"
-			"sig_small_imm ; nop = nop.pm(r0, r0, nop, 1) ; r0.8d = v8min.always(b, b) ;"
+			"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8d = v8min.always(b, b) ;"
 			"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
 			"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
 			"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
@@ -1031,7 +1050,18 @@ VS prog 2/1 QPU:
 			"sig_none ; r3 = fadd.pm.always(r0, r5) ; r0.8c = v8min.always(r2, r2) ;"
 			"sig_none ; nop = nop.pm(r0, r0) ; r0.8b = v8min.always(r3, r3) ;"
 			"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0) ; r0.8a = v8min.always(b, b) ;"
-			"sig_small_imm ; nop = nop.pm(r0, r0, nop, 1) ; r0.8d = v8min.always(b, b) ;"
+			"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8d = v8min.always(b, b) ;"
+
+			///"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8d = v8min.always(b, b) ;"
+			///"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8c = v8min.always(b, b) ;"
+			///"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8b = v8min.always(b, b) ;"
+			///"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8a = v8min.always(b, b) ;"
+
+			///"sig_none ; nop = nop.pm(r0, r0, pay_zw, nop) ; r0.8d = v8min.always(a, a) ;"
+			///"sig_none ; nop = nop.pm(r0, r0, pay_zw, nop) ; r0.8c = v8min.always(a, a) ;"
+			///"sig_none ; nop = nop.pm(r0, r0, pay_zw, nop) ; r0.8b = v8min.always(a, a) ;"
+			///"sig_none ; nop = nop.pm(r0, r0, pay_zw, nop) ; r0.8a = v8min.always(a, a) ;"
+
 			"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
 			"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
 			"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"